## Traveling Salesperson Problem

Compute TSP problem over cluster medoids.

Load labels computed in clustering.

In [1]:
from io_ import load_json
from model.clustering import ClusteringModel
from settings import DATASETS

dataset = DATASETS["tiny"]

labels_fp = ClusteringModel.get_labeling_fp(name=dataset.name)
labels = load_json(path_=labels_fp)

I/O: Loading /home/sebaq/Documents/GitHub/IR_project/dataset/tiny/labeling.json 


Load dataset sample.

In [2]:
from model.dataset import RCV1Loader

loader = RCV1Loader()

data = loader.load(
    docs=dataset.docs, terms=dataset.terms,
    sort_docs=True, sort_terms=True
)

INFO: Loading matrix. 
I/O: Loading /home/sebaq/Documents/GitHub/IR_project/dataset/data.npz. 
INFO: Removing non informative terms. 
INFO: Sorting documents by terms count. 
INFO: Sorting terms by their frequency. 


Perform dimensionality reduction

In [4]:
embed = data.embed(eps=0.35)

Divide by cluster.

In [6]:
from model.clustering import Clusters

clusters = Clusters(mat=embed, labels=labels)
clusters

ClusterDataSplit [Data: 10000, Clusters: 100, Mean-per-Cluster: 100.000]

Compute centroids.

In [8]:
medoids = clusters.medoids
medoids

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

Create TSP problem.

In [9]:
from model.tsp import TravellingSalesmanProblem

tsp = TravellingSalesmanProblem(mat=medoids)

In [10]:
tsp

TravellingSalesmanProblem[Items: 100]

Evaluate solutions.

In [11]:
tsp.solutions

INFO: Evaluating solution


array([ 2, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84,
       83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67,
       66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50,
       49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
       32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
       15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  1,  0])

In [12]:
tsp.sorted_items

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])