In [1]:
from hdimvis.data_fetchers.DataFetcher import DataFetcher
from hdimvis.metrics.distance_measures.euclidian_and_manhattan import manhattan,euclidean
from hdimvis.algorithms.stochastic_ntet_algo.SNeD import SNeD
from hdimvis.algorithms.spring_force_algos.chalmers96_algo.Chalmers96 import Chalmers96
from hdimvis.create_low_d_layout.LayoutCreation import LayoutCreation
from hdimvis.algorithms.spring_force_algos.hybrid_algo.Hybrid import Hybrid
from hdimvis.visualise_layouts_and_metrics.plot import show_layout, show_generation_metrics
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import umap
from time import perf_counter
import numpy as np
from pathlib import Path
from definitions import PROJECT_ROOT
import pickle

In [8]:

sizes = np.rint(np.linspace(100, 10000, 10))

In [3]:
num_repeats =3
results= np.zeros((4,10, num_repeats)) # 5 for - dataset size, 96, squad, hybrid,

In [10]:



for j,size in enumerate(sizes):
    dataset = DataFetcher.fetch_data('metro', size=int(size))
    results[0,j] = size
    embedding_PCA = PCA(n_components=2, whiten=False, copy=True).fit_transform(dataset.data).astype(np.float64)
    embedding_PCA *= 10/np.std(embedding_PCA)
    print("#"*20)
    print("#"*20)
    print("#"*20)
    print("#"*20)
    print(f"size: {size}")
    print(f" number : {j}")
    print("#"*20)
    print("#"*20)
    print("#"*20)
    print("#"*20)

    for i in range(num_repeats):
        print(f"repeat {i}")
        algo96 = Chalmers96(dataset=dataset, initial_layout=None,  distance_fn=euclidean,
                             sample_set_size=10, neighbour_set_size=5)
        start_96 = perf_counter()
        layout_96 = LayoutCreation.create_layout(algo96, no_iters=100)
        results[1,j,i] = perf_counter() - start_96

        sned = SNeD(dataset=dataset)
        start_sned = perf_counter()
        layout_squad = LayoutCreation.create_layout(sned, no_iters=800, optional_metric_collection=None)
        results[2,j,i] = perf_counter() - start_sned

        hybrid = Hybrid(dataset=dataset, initial_layout=embedding_PCA, alpha=0.6,  distance_fn=euclidean,
                    use_knnd=False, sample_set_size=5, neighbour_set_size=10, use_random_sample=False,
                use_correct_interpolation_error=True)
        start_hybrid = perf_counter()
        layout_hybrid = LayoutCreation.create_layout(hybrid, optional_metric_collection=None)
        results[3,j,i] = perf_counter() - start_hybrid

        # start_tsne = perf_counter()
        # embedding_tsne = TSNE(n_components=2, perplexity=500).fit_transform(dataset.data)
        # results[3,j,i] = perf_counter() - start_tsne
        #
        # start_umap = perf_counter()
        # embedding_umap = umap.UMAP(n_neighbors=500).fit_transform(dataset.data)
        # results[4,j,i] = perf_counter() - start_umap


output_dir= (Path(PROJECT_ROOT).joinpath(
    Path(f"experiments/confirming_time_complexity/out/"))).resolve().absolute()

path_to_pickle = (Path(output_dir).joinpath(Path(f"results_metro.pickle"))).resolve()
with open(path_to_pickle, 'wb') as pickle_out:
    pickle.dump(results, pickle_out)

####################
Fetching the "metro" dataset
[[ 8.784e+00  8.772e+00 -1.800e-02 ...  0.000e+00  1.000e+00  1.000e+00]
 [ 8.754e+00  8.742e+00 -2.200e-02 ...  0.000e+00  1.000e+00  1.000e+00]
 [ 8.292e+00 -2.200e-02 -1.600e-02 ...  0.000e+00  1.000e+00  1.000e+00]
 ...
 [ 8.078e+00  8.068e+00 -1.600e-02 ...  0.000e+00  1.000e+00  1.000e+00]
 [ 7.570e+00 -6.000e-03  2.482e+00 ...  0.000e+00  1.000e+00  1.000e+00]
 [ 8.450e+00  8.440e+00 -2.200e-02 ...  0.000e+00  1.000e+00  1.000e+00]]
####################
Dataset loaded
Dataset shape: (100, 13)
####################
####################
####################
####################
####################
size: 100.0
 number : 0
####################
####################
####################
####################
repeat 0
####################
The algorithm will use a random initialization for the low D embedding/layout
####################
A 2D layout of the "metro" dataset will be created 
using the "Chalmers' 1996" algorithm
##############

In [7]:
print(results)
output_dir= (Path(PROJECT_ROOT).joinpath(
    Path(f"experiments/confirming_time_complexity/out/"))).resolve().absolute()

path_to_pickle = (Path(output_dir).joinpath(Path(f"results.pickle"))).resolve()
with open(path_to_pickle, 'wb') as pickle_out:
    pickle.dump(results, pickle_out)

[[[1.00000000e+02 1.00000000e+02 1.00000000e+02]
  [4.53300000e+03 4.53300000e+03 4.53300000e+03]
  [8.96700000e+03 8.96700000e+03 8.96700000e+03]
  [1.34000000e+04 1.34000000e+04 1.34000000e+04]
  [1.78330000e+04 1.78330000e+04 1.78330000e+04]
  [2.22670000e+04 2.22670000e+04 2.22670000e+04]
  [2.67000000e+04 2.67000000e+04 2.67000000e+04]
  [3.11330000e+04 3.11330000e+04 3.11330000e+04]
  [3.55670000e+04 3.55670000e+04 3.55670000e+04]
  [4.00000000e+04 4.00000000e+04 4.00000000e+04]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]
  [0.00000000e+00 0.00000000e+00 0.00000000e+00]]

 [[1.21540530e+00 1.12608820e+00 1.10689230e+00]
  [6.23623403e+01 6.29433342e+01 6.22266693e+01]
  [1.24227842e+02 1.27449235e+02 1.28995787e+02]
  [1.94907287e+02 1.94527702e+02 1.95797185e+02]
  [2.66892025e+02 2.65010931e+02 2.65869836e+02]
  [3.34003877e+02 