In [1]:
from hdimvis.data_fetchers.DataFetcher import DataFetcher
from hdimvis.metrics.distance_measures.euclidian_and_manhattan import manhattan,euclidean
from hdimvis.algorithms.spring_force_algos.chalmers96_algo.Chalmers96 import Chalmers96
from hdimvis.create_low_d_layout.LayoutCreation import LayoutCreation
from hdimvis.visualise_layouts_and_metrics.plot import show_layout, show_generation_metrics
from sklearn.decomposition import PCA
from time import perf_counter
import numpy as np
from pathlib import Path
from definitions import PROJECT_ROOT
import pickle


In [2]:
rna = DataFetcher.fetch_data('rna N3k')
coli20 =  DataFetcher.fetch_data('coil20')
globe = DataFetcher.fetch_data('globe', size=7000)
fmnist = DataFetcher.fetch_data('fashion mnist', size=7000)
mnist = DataFetcher.fetch_data('mnist', size=7000)

datasets = [rna, coli20,globe,fmnist, mnist]
dataset_names = ['rna N3k', 'coli20', 'globe', 'fashion mnist', 'mnist']
layouts = {name : {"knnd": [], "no knnd":[]} for name in dataset_names }
times = {name : {"knnd": [], "no knnd":[]} for name in dataset_names }
num_repeats = 25


####################
Fetching the "rna N3k" dataset
####################
Dataset loaded
Dataset shape: (3000, 50)
####################
####################
Fetching the "coil20" dataset
####################
Dataset loaded
Dataset shape: (1440, 1024)
####################
####################
Fetching the "globe" dataset
####################
Dataset loaded
Dataset shape: (7003, 3)
####################
####################
Fetching the "fashion mnist" dataset
####################
Dataset loaded
Dataset shape: (7000, 784)
####################
####################
Fetching the "mnist" dataset


  warn(


####################
Dataset loaded
Dataset shape: (7000, 784)
####################


In [3]:
for j,dataset in enumerate(datasets):
    Xld = PCA(n_components=2, whiten=False, copy=True).fit_transform(dataset.data).astype(np.float64)
    Xld *= 10/np.std(Xld)
    for i in range(num_repeats):
            no_knnd_start = perf_counter()
            algo96 = Chalmers96(dataset=dataset, distance_fn=euclidean,
                     spring_constant=0.6, initial_layout=Xld,
                    use_knnd=False)
            layout = LayoutCreation.create_layout(algo96, no_iters=200)

            times[dataset_names[j]]["no knnd"].append(perf_counter() - no_knnd_start)
            layouts[dataset_names[j]]["no knnd"].append(layout)

            knnd_start = perf_counter()
            algo96 = Chalmers96(dataset=dataset, distance_fn=euclidean,
                     spring_constant=0.6, initial_layout=Xld,
                    use_knnd=True)
            layout = LayoutCreation.create_layout(algo96, no_iters=200)

            times[dataset_names[j]]["knnd"].append(perf_counter() - knnd_start)
            layouts[dataset_names[j]]["knnd"].append(layout)


output_dir= (Path(PROJECT_ROOT).joinpath(
    Path(f"experiments/knnd/out/"))).resolve().absolute()

path_to_pickle_lay = (Path(output_dir).joinpath(Path(f"layouts.pickle"))).resolve()
with open(path_to_pickle_lay, 'wb') as pickle_out:
    pickle.dump(layouts, pickle_out)

path_to_pickle_time = (Path(output_dir).joinpath(Path(f"times.pickle"))).resolve()
with open(path_to_pickle_time, 'wb') as pickle_out:
    pickle.dump(times, pickle_out)



####################
A 2D layout of the "rna N3k" dataset will be created 
using the "Chalmers' 1996" algorithm
####################
The HD distance measure used is: euclidean
####################
####################
No metrics will be collected during layout creation. 
To change this use the 'metric collection' parameter of the layout 
####################
All stress calculations will be performed using the euclidian norm
####################
Spring constant is set to  0.6 
Damping constant is set to 0 
Spring constant scaling factor is set to 0.03333333333333333 
####################
The algorithm will use a k-nearest neighbours graph generated by the k-NN Descent algorithm
####################
A 2D layout of the "rna N3k" dataset will be created 
using the "Chalmers' 1996" algorithm
####################
The HD distance measure used is: euclidean
####################
####################
No metrics will be collected during layout creation. 
To change this use the 'metric collection'

  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._set_arrayXarray(i, j, x)
  self._se