In [None]:
import os

subj = "Subj1"
exp = "exp_pca"
print_obj = 7
os.makedirs(f"{subj}/{exp}", exist_ok = True)

### Imports

In [None]:
%load_ext autoreload
%autoreload 2

import mne
import numpy
import pandas
import gtda.curves
import gtda.plotting
import gtda.homology
import gtda.diagrams
import gtda.time_series
import sklearn.decomposition

### Epochs

In [None]:
epochs = mne.read_epochs(f"{subj}/src/epochs_filt_rr-epo.fif")
epochs.average().plot_joint().savefig(f"{subj}/{exp}/eeg.svg")

### TakensEmbedding

In [None]:
embedder = gtda.time_series.TakensEmbedding(dimension = 10)
points = embedder.fit_transform(epochs.get_data(copy = True))
print(points.shape)

In [None]:
plot = gtda.plotting.plot_point_cloud(points[print_obj])
display(plot)
plot.write_image(file = f"{subj}/{exp}/point_cloud.svg", format = "svg")

### VietorisRipsPersistence

In [None]:
persistence = gtda.homology.VietorisRipsPersistence(homology_dimensions = [0, 1, 2], n_jobs = -1)
raw_diagram = persistence.fit_transform(points)
print(raw_diagram.shape)

In [None]:
plot = gtda.plotting.plot_diagram(raw_diagram[8])
display(plot)
plot.write_image(file = f"{subj}/{exp}/raw_diagram.svg", format = "svg")

### Scaler & Filtering

In [None]:
scaler = gtda.diagrams.Scaler()
filtering = gtda.diagrams.Filtering()
diagram = filtering.fit_transform(scaler.fit_transform(raw_diagram))
diagram = scaler.fit_transform(raw_diagram)
print(diagram.shape)
gtda.plotting.plot_diagram(diagram[print_obj])

In [None]:
scaler = gtda.diagrams.Scaler()
filtering = gtda.diagrams.Filtering()
diagram = filtering.fit_transform(scaler.fit_transform(raw_diagram))
print(diagram.shape)

In [None]:
plot = gtda.plotting.plot_diagram(diagram[print_obj])
display(plot)
plot.write_image(file = f"{subj}/{exp}/diagram.svg", format = "svg")

### Features

In [90]:
landscape_features = gtda.diagrams.PersistenceLandscape().fit_transform(diagram)
landscape_features = gtda.curves.StandardFeatures('argmax').fit_transform(landscape_features)
print(landscape_features.shape)

(1046, 3)


In [91]:
betti_features = gtda.diagrams.BettiCurve().fit_transform(diagram)
betti_features = gtda.curves.Derivative().fit_transform(betti_features)
betti_features = gtda.curves.StandardFeatures('max').fit_transform(betti_features)
print(betti_features.shape)

(1046, 3)


In [92]:
entropy_features = gtda.diagrams.PersistenceEntropy().fit_transform(diagram)
print(entropy_features.shape)

(1046, 3)


In [93]:
number_of_points_features = gtda.diagrams.NumberOfPoints().fit_transform(diagram)
print(number_of_points_features.shape)

(1046, 3)


In [94]:
amplitude_features = gtda.diagrams.Amplitude().fit_transform(diagram)
print(amplitude_features.shape)

(1046, 3)


In [None]:
# Analyzing amplitudes (Averaged by region, full frequency range)

#pd_tda_features = pd_ft_tda_loc

df_ft_tda_loc = pd.DataFrame()
for _reg in range(n_regions):    
    pd_ft = pd_ft_tda_loc[_reg]
    pd_ft_life = pd_ft[:,:,1] - pd_ft[:,:,0]
    pd_ft_life[pd_ft_life == 0] = np.nan # removing diagonal points
    pd_ft_bd2 = (pd_ft[:,:,0] + pd_ft[:,:,1]) / 2.0
    pd_ft_bd2[pd_ft_bd2 == 0] = np.nan # removing diagonal points
        
    life_max = np.nan_to_num(np.nanmax(pd_ft_life, axis=1))
    life_mean = np.nan_to_num(np.nanmean(pd_ft_life, axis=1))
    life_std = np.nan_to_num(np.nanstd(pd_ft_life, axis=1))
    #life_p25 = np.nan_to_num(np.nanpercentile(pd_ft_life, 25, axis=1))
    #life_med = np.nan_to_num(np.nanmedian(pd_ft_life, axis=1))
    life_p75 = np.nan_to_num(np.nanpercentile(pd_ft_life, 75, axis=1))
    life_kurt = np.nan_to_num(sp.stats.kurtosis(pd_ft_life, axis=1, nan_policy='omit'))
    life_skew = np.nan_to_num(sp.stats.skew(pd_ft_life, axis=1, nan_policy='omit'))
    life_sum = np.nan_to_num(np.nansum(pd_ft_life, axis=1))
    life_norm = np.linalg.norm(np.nan_to_num(pd_ft_life), ord=2, axis=1) # Identical with wasserstein distance

    bd2_max = np.nan_to_num(np.nanmax(pd_ft_bd2, axis=1))
    bd2_mean = np.nan_to_num(np.nanmean(pd_ft_bd2, axis=1))
    bd2_std = np.nan_to_num(np.nanstd(pd_ft_bd2, axis=1))
    #bd2_p25 = np.nan_to_num(np.nanpercentile(pd_ft_bd2, 25, axis=1))
    #bd2_med = np.nan_to_num(np.nanmedian(pd_ft_bd2, axis=1))
    bd2_p75 = np.nan_to_num(np.nanpercentile(pd_ft_bd2, 75, axis=1))
    bd2_kurt = np.nan_to_num(sp.stats.kurtosis(pd_ft_bd2, axis=1, nan_policy='omit'))
    bd2_skew = np.nan_to_num(sp.stats.skew(pd_ft_bd2, axis=1, nan_policy='omit'))
    bd2_sum = np.nan_to_num(np.nansum(pd_ft_bd2, axis=1))
    bd2_norm = np.linalg.norm(np.nan_to_num(pd_ft_bd2), ord=2, axis=1)

        
    df_ft_tda_loc['F_'+regions[_reg][1]+'_life_max'] = life_max
    df_ft_tda_loc['F_'+regions[_reg][1]+'_life_mean'] = life_mean
    df_ft_tda_loc['F_'+regions[_reg][1]+'_life_std'] = life_std
    #df_ft_tda_loc['F_'+regions[_reg][1]+'_life_p25'] = life_p25
    #df_ft_tda_loc['F_'+regions[_reg][1]+'_life_med'] = life_med
    df_ft_tda_loc['F_'+regions[_reg][1]+'_life_p75'] = life_p75
    df_ft_tda_loc['F_'+regions[_reg][1]+'_life_kurt'] = life_kurt
    df_ft_tda_loc['F_'+regions[_reg][1]+'_life_skew'] = life_skew
    df_ft_tda_loc['F_'+regions[_reg][1]+'_life_sum'] = life_sum
    df_ft_tda_loc['F_'+regions[_reg][1]+'_life_norm'] = life_norm

    df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_max'] = bd2_max
    df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_mean'] = bd2_mean
    df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_std'] = bd2_std
    #df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_p25'] = bd2_p25
    #df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_med'] = bd2_med
    df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_p75'] = bd2_p75
    df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_kurt'] = bd2_kurt
    df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_skew'] = bd2_skew
    df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_sum'] = bd2_sum
    df_ft_tda_loc['F_'+regions[_reg][1]+'_bd2_norm'] = bd2_norm
       
    for _ampl in range(len(tda_metrics)):
        ft_ampl = Amplitude(metric=tda_metrics[_ampl][0], n_jobs=-1).fit_transform(pd_ft_tda_loc[_reg])
        df_ft_tda_loc['F_'+regions[_reg][1]+'_'+tda_metrics[_ampl][1]] = ft_ampl[:,0]

# NaN values to zeros
df_ft_tda_loc = pd.DataFrame(np.nan_to_num(df_ft_tda_loc.to_numpy()), columns=df_ft_tda_loc.columns)

print(df_ft_tda_loc.to_numpy().shape) 

In [96]:
features = numpy.concatenate([
    landscape_features,
    betti_features,
    entropy_features,
    number_of_points_features,
    amplitude_features
], axis = 1)
print(features.shape)

(1046, 15)


### Save

In [97]:
N_COMP = 0.7
pca = sklearn.decomposition.PCA(n_components = N_COMP, random_state = 0)
pca_features = pca.fit_transform(features)

print(pca_features.shape)
print('Explained variance', round(pca.explained_variance_ratio_.sum(), 2))
print([ round(x, 3) for x in pca.explained_variance_ratio_ ])

(1046, 1)
Explained variance 0.85
[0.848]


In [98]:
df = pandas.DataFrame(features)
df.to_feather(f"{subj}/{exp}/features.feather")
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,37.0,20.0,18.0,0.0,13.0,7.0,8.911387,5.794139,5.653280,491.0,136.0,62.0,0.172618,0.471396,0.018095
1,46.0,23.0,23.0,0.0,8.0,5.0,8.885214,5.741187,5.223418,491.0,140.0,47.0,0.243322,0.558745,0.020219
2,32.0,18.0,20.0,0.0,9.0,7.0,8.911129,6.040048,5.362892,491.0,145.0,55.0,0.144134,0.274746,0.024298
3,35.0,19.0,25.0,0.0,9.0,3.0,8.900512,5.226204,4.284227,491.0,95.0,23.0,0.162369,0.446546,0.018118
4,35.0,20.0,21.0,0.0,10.0,6.0,8.904863,6.016462,5.350425,491.0,153.0,53.0,0.162369,0.341199,0.029659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1041,29.0,20.0,43.0,0.0,2.0,1.0,8.893091,4.107055,2.577471,491.0,37.0,9.0,0.121193,0.323925,0.100975
1042,24.0,57.0,35.0,0.0,3.0,1.0,8.898533,3.359531,2.423112,491.0,25.0,9.0,0.089076,0.544349,0.044050
1043,23.0,23.0,35.0,0.0,3.0,1.0,8.898618,4.166171,2.568725,491.0,38.0,10.0,0.084500,0.308390,0.039355
1044,20.0,17.0,22.0,0.0,3.0,2.0,8.909021,3.837490,2.061480,491.0,24.0,6.0,0.067331,0.261568,0.018552
