In [1]:
import numpy as np

from experiments.utils.SimpleComparison import SimpleComparison
from hdimvis.algorithms.spring_force_algos.chalmers96_algo.Chalmers96 import Chalmers96
from hdimvis.visualise_layouts_and_metrics.plot import show_layout,show_generation_metrics
from hdimvis.algorithms.stochastic_ntet_algo.SNeD import SNeD
from hdimvis.create_low_d_layout.LayoutCreation import LayoutCreation
from hdimvis.data_fetchers.DataFetcher import DataFetcher
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
from experiments.utils.get_avg_classwise_f1 import get_avg_classwise_f1
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import f1_score
from pathlib import Path
from definitions import PROJECT_ROOT
import pickle
from time import perf_counter
from  sklearn.manifold import SpectralEmbedding, MDS
from hdimvis.metrics.distance_measures.euclidian_and_manhattan import euclidean

In [2]:
rna = DataFetcher.fetch_data('rna N3k')
# coli20 =  DataFetcher.fetch_data('coil20')
# globe = DataFetcher.fetch_data('globe', size=7000)
# fmnist = DataFetcher.fetch_data('fashion mnist', size=7000)
# mnist = DataFetcher.fetch_data('mnist', size=7000)

datasets = [ rna]
#              coli20,globe,fmnist, mnist]
# ,'coli20', 'globe', 'fashion mnist', 'mnist']
dataset_names = [ 'ran N3k']

####################
Fetching the "rna N3k" dataset
####################
Dataset loaded
Dataset shape: (3000, 50)
####################


In [3]:
# rna 200, squad 2000
# coil 400, squad 1000
#96 globe - 200, squad 1000
# mnist  squad 2000
# fmnist

num_repeats = 15
# iters_squad = [ 1000,1000, 2000, 2000]
# iters_chalmers = [400,200, 200, 200]
iters_squad = [2000]
iters_chalmers = [200]

for j,dataset in enumerate(datasets):
    Xld = PCA(n_components=2, whiten=False, copy=True).fit_transform(dataset.data).astype(np.float64)
    Xld *= 10/np.std(Xld)
    layouts = {"squad": [], "96":[]}

    for i in range(num_repeats):
        squad_start= perf_counter()
        squad = SNeD(dataset=dataset, initial_layout=Xld, use_nesterovs_momentum=False, ntet_size=4)
        layout_squad = LayoutCreation.create_layout(squad, no_iters=iters_squad[j])
        print(f"Squad time: {perf_counter() - squad_start}")
        layouts['squad'].append(layout_squad)

        start_96 = perf_counter()
        algo96 = Chalmers96(dataset=dataset, initial_layout=Xld, damping_constant=0, spring_constant=0.6,
                    use_knnd=False, sample_set_size=10, neighbour_set_size=5)
        layout_96 = LayoutCreation.create_layout(algo96, no_iters=iters_chalmers[j])
        print(f"96 time: {perf_counter() - start_96}")
        layouts['96'].append(layout_96)



    output_dir= (Path(PROJECT_ROOT).joinpath(
        Path(f"experiments/sned_vs_96/out/"))).resolve().absolute()

    path_to_pickle = (Path(output_dir).joinpath(Path(f"layouts_{dataset_names[j]}.pickle"))).resolve()
    with open(path_to_pickle, 'wb') as pickle_out:
        pickle.dump(layouts, pickle_out)



####################
A 2D layout of the "rna N3k" dataset will be created 
using the "Stochastic N-tet Descent MDS" algorithm
####################
The HD distance measure used is: euclidean
####################
 "N-tet" size: 4
####################
####################
No metrics will be collected during layout creation. 
To change this use the 'metric collection' parameter of the layout 
####################
All stress calculations will be performed using the euclidian norm
####################
Squad time: 109.73417930000869
####################
A 2D layout of the "rna N3k" dataset will be created 
using the "Chalmers' 1996" algorithm
####################
The HD distance measure used is: euclidean
####################
####################
No metrics will be collected during layout creation. 
To change this use the 'metric collection' parameter of the layout 
####################
All stress calculations will be performed using the euclidian norm
####################
Spring constant is 

In [9]:
# import matplotlib.pyplot as plt
#
# fig, ax = plt.subplots()
# avg_f1 = [f1 for id, f1 in averages]
#
# ax.plot(np.arange(3,200), avg_f1)
# plt.show()

In [10]:
# from experiments.utils.get_f1_for_best_k_with_knn import get_f1_for_best_k_with_knn
# k, f1 = get_f1_for_best_k_with_knn(2,20,10,positions,labels)
#
# print(k)
# print(f1)