In [None]:
import os

subj = "Subj1"
N_STAGES = 9
exp = "exp_feature_selection"
os.makedirs(f"{subj}/{exp}", exist_ok = True)

In [None]:
%load_ext autoreload
%autoreload 2

import time
import warnings
import itertools
warnings.filterwarnings('ignore')

import SDA
import SDA.analytics
import SDA.clustquality

import umap
import tqdm
import numpy
import pandas
import sklearn.preprocessing
import sklearn.decomposition
import tqdm.contrib.itertools
import sklearn.feature_selection

In [None]:
edges_true = numpy.loadtxt(f"{subj}/reproduction/internal/best_edges.txt").astype(numpy.int32)

In [None]:
params = [ ]

len_st_thr_attempts = [
    [ 0 ],
    [ 20 ],
    [ 40 ],
    [ 60 ],
    # [ 0, 20 ],
    # [ 0, 40 ],
    # [ 0, 60 ],
    # [ 20, 40 ],
    # [ 20, 60 ],
    # [ 40, 60 ],
    # [ 0, 20, 40 ],
    # [ 0, 20, 60 ],
    # [ 0, 40, 60 ],
    # [ 20, 40, 60 ],
    # [ 0, 20, 40, 60 ]
]
for (
    n_clusters_min,
    k_neighbours_min,
    len_st_thr
) in itertools.product(
    range(2, 21, 3), # range(2, 21)
    range(20, 51, 5), # range(20, 51),
    len_st_thr_attempts
):
    for (
        n_clusters_max,
        k_neighbours_max
    ) in itertools.product(
        range(n_clusters_min, 21, 3), # range(n_clusters_min, 21),
        range(k_neighbours_min, 51, 5), # range(k_neighbours_min, 51)
    ):
        k_neighb_max_thr = [ k_neighbours_max ]
        n_cl_max_thr = [ n_clusters_max ]
        
        params.append({
            'scale': False,
            
            'n_clusters_min': n_clusters_min, 'n_clusters_max': n_clusters_max,
            'k_neighbours_min': k_neighbours_min, 'k_neighbours_max': k_neighbours_max,
            'len_st_thr': len_st_thr,

            'n_cl_max_thr': n_cl_max_thr,
            'k_neighb_max_thr': k_neighb_max_thr,
            'n_edge_clusters_min': N_STAGES - 1, 'n_edge_clusters_max': N_STAGES - 1
        })

print(len(params))

In [None]:
N_JOBS = 15

def try_default(features: numpy.ndarray):
    start = time.time()
    target_result, _ = SDA.SDA(n_jobs = N_JOBS, scale = False, verbose = True).apply(features)

    print('Target time:', time.time() - start)
    display(SDA.analytics.best_results(target_result, key = 'Avg-Silh'))

def try_params(features: numpy.ndarray, result_name: str):
    results = [ ]
    for param in tqdm.tqdm(params):
        start = time.time()
        try:
            result, _ = SDA.SDA(**param, n_jobs = N_JOBS, verbose = False).apply(features)
        except Exception as e:
            continue
        end = time.time()

        result = SDA.analytics.best_result(result, key = 'Avg-Silh', n_stages = N_STAGES)
        metrics = SDA.clustquality.cluster_metrics_ground(edges_true, result['St_edges'])
        result['time'] = (end - start)
        results.append(dict(**param, **result, **metrics))
        
    results = pandas.DataFrame(results)
    results.to_csv(f"{subj}/{exp}/{result_name}.csv")
    display(results.head())

### TDA

In [None]:
features_tda = pandas.read_feather(f'{subj}/exp_final_filtered/all_features.feather')
print(features_tda.shape)

features_tda = sklearn.preprocessing.StandardScaler().fit_transform(features_tda)
print(features_tda.shape)

#### UMAP

In [None]:
features_tda_umap = umap.UMAP(n_components = 15, random_state = 42).fit_transform(features_tda)
print(features_tda_umap.shape)

In [None]:
try_default(features_tda_umap)

In [None]:
try_params(features_tda_umap, 'tda_umap')

#### PCA

In [None]:
features_tda_pca = sklearn.decomposition.PCA(n_components = 15, svd_solver = "full", random_state = 42).fit_transform(features_tda)
print(features_tda_pca.shape)

In [None]:
try_default(features_tda_pca)

In [None]:
try_params(features_tda_pca, 'tda_pca')

### Neurofeatures

In [None]:
df_ft_psd_loc_db = pandas.read_feather(f'{subj}/src/df_ft_psd_loc_db.feather')
df_ft_psd_ind_loc_log = pandas.read_feather(f'{subj}/src/df_ft_psd_ind_loc_log.feather')
df_ft_coh_ind_loc = pandas.read_feather(f'{subj}/src/df_ft_coh_ind_loc.feather')
df_ft_plv_ind_loc = pandas.read_feather(f'{subj}/src/df_ft_plv_ind_loc.feather')

features_neuro = pandas.concat([ df_ft_psd_loc_db, df_ft_psd_ind_loc_log, df_ft_coh_ind_loc, df_ft_plv_ind_loc ], axis = 1)
print(features_neuro.shape)

features_neuro = sklearn.preprocessing.StandardScaler().fit_transform(features_neuro)
print(features_neuro.shape)

#### UMAP

In [None]:
features_neuro_umap = umap.UMAP(n_components = 15, random_state = 42).fit_transform(features_neuro)
print(features_neuro_umap.shape)

In [None]:
try_default(features_neuro_umap)

In [None]:
try_params(features_neuro_umap, 'neuro_umap')

#### PCA

In [None]:
features_neuro_pca = sklearn.decomposition.PCA(n_components = 15, svd_solver = 'full', random_state = 42).fit_transform(features_neuro)
print(features_neuro_pca.shape)

In [None]:
try_default(features_neuro_pca)

In [None]:
try_params(features_neuro_pca, 'neuro_pca')