In [2]:
from experiments.cube.Cube import Cube
from hdimvis.metrics.stress.stress import vectorised_stress
from hdimvis.metrics.distance_measures.euclidian_and_manhattan import euclidean
from sklearn.decomposition import PCA
from hdimvis.algorithms.stochastic_ntet_algo.SNeD import SNeD
from hdimvis.create_low_d_layout.LowDLayoutCreation import LowDLayoutCreation
from hdimvis.data_fetchers.DataFetcher import DataFetcher
from hdimvis.visualise_layouts_and_metrics.plot import show_layouts, show_generation_metrics
from hdimvis.metrics.distance_measures.euclidian_and_manhattan import manhattan
import numpy as np
import matplotlib.pyplot as plt
import os
import definitions
from time import perf_counter
import pathlib
from pathlib import Path

In [12]:
all_datasets_list = ['poker', 'mnist', 'bonds', 'coil20', 'rna N3k', 'airfoil', 'wine quality', 'fashion mnist'
                                                                                                'shuttle',
                     'flow cytometry']


In [3]:
cube = Cube(num_points=100, side=30, angle=0.4)
cube_dataset= cube.get_sample_dataset(3000)

rna_dataset = DataFetcher.fetch_data('rna N3k')
coil20_dataset = DataFetcher.fetch_data('coil20')
airfoil_dataset = DataFetcher.fetch_data('airfoil')


####################
Fetching the "rna N3k" dataset
####################
Dataset loaded
Dataset shape: (3000, 50)
####################
Fetching the "coil20" dataset
####################
Dataset loaded
Dataset shape: (1440, 1024)
####################
Fetching the "airfoil" dataset
####################
Dataset loaded
Dataset shape: (1502, 5)


In [4]:
names_list = ['trio', 'quartet', 'quintet', 'sextet', 'septet', 'octet', 'nonet', 'decet',
              'undectet', 'duodecet', 'vigintet', 'quinquagintet', 'centet', 'duocentet', 'quingentet']
ntet_sizes = [3,4,5,6,7,8,9,10,12,20,50,100,200,500]

In [5]:


datasets = [cube_dataset, rna_dataset, coil20_dataset, airfoil_dataset]

results = np.zeros((len(ntet_sizes), 4)) # 4 for - time, stress, average quartet stress and ntet size

num_repeats = 2

In [None]:
metric_collection = {'Average quartet stress': 200, 'Stress': 200}

for i, dataset in enumerate(datasets):

    output_dir= (Path(definitions.PROJECT_ROOT).joinpath(
    Path(f"experiments/stochastic_ntet_descent/out/{dataset.name}/"))).resolve()
    os.mkdir(output_dir)


    Xld = PCA(n_components=2, whiten=False, copy=True).fit_transform(dataset.data).astype(np.float64)
    Xld *= 10/np.std(Xld)

    for j, size in enumerate(ntet_sizes):
        time = 0
        stress = 0
        average_quartet_stress = 0

        for k in range(num_repeats):
            squad = SNeD(dataset=datasets[i], initial_layout=Xld, ntet_size=size)
            start = perf_counter()
            layout = LowDLayoutCreation().create_layout(squad, optional_metric_collection=metric_collection, no_iters=1000)

            time += (start - perf_counter())
            stress += layout.collected_metrics['Stress'][1][-1]
            average_quartet_stress += layout.collected_metrics['Average quartet stress'][1][-1]

        results[j][0] = time/num_repeats
        results[j][1] = stress/num_repeats
        results[j][2] = average_quartet_stress/num_repeats
        results[j][3] = size

        print(layout.collected_metrics)

        title = f"Stochastic {names_list[j]} ({size}) descent"

        if dataset == cube_dataset:
             cube.plot_2d(layout, title=title + " - layout", save_to=output_dir)
        else:
            show_layouts(layout, use_labels=True, color_map='rainbow', title=title + " - layout", save_to=output_dir)

        show_generation_metrics(layout, quartet_stress=True, title= title + " - generation metrics", save_to=output_dir)


    np.save((Path(output_dir).joinpath(Path(f"{title}"))).resolve(),results)

    results.fill(0)



####################
A 2D layout of the "3d cube of 30^3 points - 3000 points sampled" dataset will be created 
using the "Stochastic N-tet Descent MDS" algorithm
####################
The HD distance measure used is: euclidean
####################
"Average quartet stress" will be measured every 200 iterations
####################
"Stress" will be measured every 200 iterations
####################
All stress calculations will be performed using the euclidian norm
####################
 "N-tet" size: 3

 Vectorised euclidean stress 


 Vectorised euclidean stress 


 Vectorised euclidean stress 


 Vectorised euclidean stress 


 Vectorised euclidean stress 


 Vectorised euclidean stress 

####################
A 2D layout of the "3d cube of 30^3 points - 3000 points sampled" dataset will be created 
using the "Stochastic N-tet Descent MDS" algorithm
####################
The HD distance measure used is: euclidean
####################
"Average quartet stress" will be measured every 200 ite