In [7]:
import os

subj = "Subj1"
exp = "reproduction"
os.makedirs(f"{subj}/{exp}", exist_ok = True)

### Imports

In [8]:
%load_ext autoreload
%autoreload 2

import pandas
import sklearn.preprocessing
import sklearn.decomposition

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Features

In [9]:
df_ft_psd_loc_db = pandas.read_feather(f'{subj}/src/df_ft_psd_loc_db.feather')
df_ft_psd_ind_loc_log = pandas.read_feather(f'{subj}/src/df_ft_psd_ind_loc_log.feather')
df_ft_coh_ind_loc = pandas.read_feather(f'{subj}/src/df_ft_coh_ind_loc.feather')
df_ft_plv_ind_loc = pandas.read_feather(f'{subj}/src/df_ft_plv_ind_loc.feather')

In [10]:
# PSD & PSD indices
ft_psd_loc_db_sc = sklearn.preprocessing.StandardScaler().fit_transform(df_ft_psd_loc_db.to_numpy())
df_ft_psd_loc_db_sc = pandas.DataFrame(ft_psd_loc_db_sc, columns = df_ft_psd_loc_db.columns)

ft_psd_ind_loc_sc = sklearn.preprocessing.StandardScaler().fit_transform(df_ft_psd_ind_loc_log.to_numpy())
df_ft_psd_ind_loc_sc = pandas.DataFrame(ft_psd_ind_loc_sc, columns = df_ft_psd_ind_loc_log.columns)

# Coherence indices
ft_coh_ind_loc_sc = sklearn.preprocessing.StandardScaler().fit_transform(df_ft_coh_ind_loc.to_numpy())
df_ft_coh_ind_loc_sc = pandas.DataFrame(ft_coh_ind_loc_sc, columns = df_ft_coh_ind_loc.columns)

# PLV indices
ft_plv_ind_loc_sc = sklearn.preprocessing.StandardScaler().fit_transform(df_ft_plv_ind_loc.to_numpy())
df_ft_plv_ind_loc_sc = pandas.DataFrame(ft_plv_ind_loc_sc, columns = df_ft_plv_ind_loc.columns)

### PCA

In [11]:
df_ft_tot_orig = pandas.concat([ df_ft_psd_loc_db_sc, df_ft_psd_ind_loc_sc, df_ft_coh_ind_loc_sc, df_ft_plv_ind_loc_sc ], axis = 1)
print(len(df_ft_tot_orig.columns))

N_COMP = 15
pca = sklearn.decomposition.PCA(n_components = N_COMP)
ft_tot_pca = pca.fit_transform(df_ft_tot_orig)

print(ft_tot_pca.shape)
print('Explained variance', round(pca.explained_variance_ratio_.sum(), 2))
print([ round(x, 3) for x in pca.explained_variance_ratio_ ])

pca_comp_names = [ 'PCA_' + str(i) for i in range(len(ft_tot_pca[0, :])) ]
if subj != 'Subj2': df_ft_tot_pca = pandas.DataFrame(ft_tot_pca, columns = pca_comp_names)
else: df_ft_tot_pca = pandas.DataFrame(ft_tot_pca[:-2], columns = pca_comp_names) # for m8 (artifacts at the end)
print(df_ft_tot_pca.shape)

765
(1046, 15)
Explained variance 0.71
[0.211, 0.156, 0.069, 0.063, 0.046, 0.032, 0.026, 0.02, 0.018, 0.015, 0.013, 0.012, 0.01, 0.009, 0.009]
(1046, 15)


### Save

In [12]:
df_ft_tot_pca.to_feather(f"{subj}/{exp}/features.feather")
display(df_ft_tot_pca)

Unnamed: 0,PCA_0,PCA_1,PCA_2,PCA_3,PCA_4,PCA_5,PCA_6,PCA_7,PCA_8,PCA_9,PCA_10,PCA_11,PCA_12,PCA_13,PCA_14
0,-8.743966,22.977588,1.077182,1.635826,11.691562,-3.624053,2.661766,-3.506708,0.455641,0.210322,10.381486,-0.753919,-6.468616,3.380270,-1.763846
1,-12.154091,19.444799,-0.548333,-1.653667,7.772657,1.678627,3.414707,-2.426707,-0.100938,2.471796,4.799469,-4.022783,-4.040144,4.663192,2.650693
2,-9.577463,18.651394,1.253133,-3.619467,7.876716,0.522741,0.852758,0.398999,-0.727722,2.790214,0.733131,-5.441115,-1.857216,1.547514,1.613315
3,-6.459233,19.112326,4.519170,-3.033608,5.258238,0.665391,0.512832,-1.745307,-0.642087,6.526266,2.790894,-5.170352,-6.309313,-0.233129,2.189828
4,-4.925011,21.710955,3.192603,-3.012425,7.050576,2.742248,2.464028,-2.165525,0.331380,7.488011,4.007040,-3.866245,-6.020233,-0.796671,2.582022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1041,-2.668554,-17.052477,-0.284557,-5.457077,-0.955217,1.641773,-4.270602,2.373568,-4.763842,-0.169753,2.359550,-1.516441,-0.122998,-0.567872,-2.394437
1042,3.475329,-16.529435,-0.091754,-6.602376,-2.313119,-2.047079,-3.927931,2.878920,-4.478084,0.957606,1.843706,-2.004673,1.213493,2.623014,-1.133993
1043,8.469497,-3.681575,-5.333180,-3.812369,-4.731431,1.102347,-1.646529,3.984863,-2.946492,1.653507,0.049530,-1.375385,-0.287320,-0.919625,1.460860
1044,13.221696,1.642634,-5.285408,1.566400,-2.870373,-3.958669,-0.261184,4.652563,1.118980,4.275424,1.643169,-2.650443,-0.378861,-1.129749,0.274154
