In [34]:
import os

subj = "Subj2"
exp = "exp_merged"
os.makedirs(f"{subj}/{exp}", exist_ok = True)

In [35]:
import numpy
import pandas
import sklearn.preprocessing
import sklearn.decomposition

### Get traditional

In [36]:
df_ft_psd_loc_db = pandas.read_feather(f'{subj}/src/df_ft_psd_loc_db.feather')
df_ft_psd_ind_loc_log = pandas.read_feather(f'{subj}/src/df_ft_psd_ind_loc_log.feather')
df_ft_coh_ind_loc = pandas.read_feather(f'{subj}/src/df_ft_coh_ind_loc.feather')
df_ft_plv_ind_loc = pandas.read_feather(f'{subj}/src/df_ft_plv_ind_loc.feather')

df_ft_tot_orig = pandas.concat([ df_ft_psd_loc_db, df_ft_psd_ind_loc_log, df_ft_coh_ind_loc, df_ft_plv_ind_loc ], axis = 1)
print(df_ft_tot_orig.shape)

(2019, 765)


### Get topological

In [37]:
topo_features = pandas.read_feather(f'{subj}/exp_final/all_features.feather')
print(topo_features.shape)

(2019, 19563)


In [38]:
IV = pandas.read_csv(f'{subj}/exp_final/internal/iv.csv')
print(IV.shape)

(19563, 4)


In [39]:
IV = IV.sort_values(by = 'IV', ascending = False)
best_features = IV[:df_ft_tot_orig.shape[1]]["Feature"].tolist()
print(len(best_features))

765


In [40]:
best_topo_features = topo_features[best_features]
print(best_topo_features.shape)

(2019, 765)


### Merge

In [41]:
all_features = pandas.concat([ df_ft_tot_orig, best_topo_features ], axis = 1)
all_features.to_feather(f"{subj}/{exp}/all_features.feather")
print(all_features.shape)

(2019, 1530)


### PCA

In [42]:
def pca(features: numpy.ndarray, scale = False, n_comp = 0.7, var_limit = None, random_state = 42) -> numpy.ndarray:
    if scale:
        features = sklearn.preprocessing.StandardScaler().fit_transform(features)

    if var_limit is not None:
        pca = sklearn.decomposition.PCA(random_state = random_state).fit(features)
        n_comp = (pca.explained_variance_ratio_ >= var_limit).sum()

    pca = sklearn.decomposition.PCA(n_components = n_comp, svd_solver = "full", random_state = random_state)
    features = pca.fit_transform(features)

    log = (
        f'Components: {n_comp}\n'
        f'Explained variance: {round(pca.explained_variance_ratio_.sum(), 2)}\n'
        f'{[ round(x, 3) for x in pca.explained_variance_ratio_ ]}'
    )
    print(log)
    return features, log

In [43]:
features_pca, log = pca(all_features, scale = True, n_comp = 15)

log += (
    f'Total features: {all_features.shape}\n'
    f'PCA features: {features_pca.shape}'
)
with open(f"{subj}/{exp}/log.txt", "w") as text_file:
    text_file.write(log)

pandas.DataFrame(features_pca)

Components: 15
Explained variance: 0.64
[0.319, 0.061, 0.058, 0.038, 0.029, 0.019, 0.017, 0.016, 0.014, 0.013, 0.012, 0.012, 0.011, 0.01, 0.01]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,-37.965309,8.453411,-12.335180,1.158671,0.457616,-0.895080,-3.293121,-2.369972,-4.563380,-3.285406,-2.834035,1.088190,-1.719775,0.972651,7.896321
1,-33.785575,6.255192,-8.042505,-0.147091,2.507988,1.933933,0.987783,2.685461,-0.610294,-0.800327,2.307322,5.930976,0.903960,-2.240134,0.459803
2,-32.477194,6.048320,-0.722528,1.532800,-3.379621,-2.164708,-1.893698,3.644361,2.961642,1.535686,-2.247646,1.446022,2.426507,2.726212,1.334197
3,-32.047838,15.681144,0.307234,-7.102863,-0.638718,-2.131329,1.815208,-2.853467,6.641179,2.216987,-2.779090,1.216622,2.488752,-0.636298,-5.405212
4,-36.641890,8.702964,3.955021,-1.651065,-6.884006,2.058378,4.459524,5.633048,2.919501,0.090741,-1.949453,5.207788,-1.578614,-0.774020,-8.259020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2014,-1.305526,-1.280350,-16.206310,-2.192514,-6.680017,5.044795,-1.208695,1.762841,6.365885,0.560362,5.627663,-0.068734,1.635099,-5.724704,-4.898992
2015,13.319390,6.313067,-6.631528,-2.303621,0.700635,-1.574053,1.664644,8.818048,5.564637,7.943182,1.734539,2.016790,0.366759,-5.227318,2.418163
2016,10.063583,3.629072,1.357314,7.375943,4.817463,5.739690,-6.229752,7.819325,10.368742,15.661318,4.731195,-0.311622,-7.037964,0.656953,-0.206976
2017,18.080801,26.711796,2.476543,-3.511239,0.302684,7.999250,-3.378507,-1.102193,11.006145,10.069574,6.258809,-3.439460,-14.647302,-0.288091,-6.206074


In [44]:
final_features = pandas.DataFrame(features_pca)
final_features.to_feather(f"{subj}/{exp}/features.feather")
final_features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,-37.965309,8.453411,-12.335180,1.158671,0.457616,-0.895080,-3.293121,-2.369972,-4.563380,-3.285406,-2.834035,1.088190,-1.719775,0.972651,7.896321
1,-33.785575,6.255192,-8.042505,-0.147091,2.507988,1.933933,0.987783,2.685461,-0.610294,-0.800327,2.307322,5.930976,0.903960,-2.240134,0.459803
2,-32.477194,6.048320,-0.722528,1.532800,-3.379621,-2.164708,-1.893698,3.644361,2.961642,1.535686,-2.247646,1.446022,2.426507,2.726212,1.334197
3,-32.047838,15.681144,0.307234,-7.102863,-0.638718,-2.131329,1.815208,-2.853467,6.641179,2.216987,-2.779090,1.216622,2.488752,-0.636298,-5.405212
4,-36.641890,8.702964,3.955021,-1.651065,-6.884006,2.058378,4.459524,5.633048,2.919501,0.090741,-1.949453,5.207788,-1.578614,-0.774020,-8.259020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2014,-1.305526,-1.280350,-16.206310,-2.192514,-6.680017,5.044795,-1.208695,1.762841,6.365885,0.560362,5.627663,-0.068734,1.635099,-5.724704,-4.898992
2015,13.319390,6.313067,-6.631528,-2.303621,0.700635,-1.574053,1.664644,8.818048,5.564637,7.943182,1.734539,2.016790,0.366759,-5.227318,2.418163
2016,10.063583,3.629072,1.357314,7.375943,4.817463,5.739690,-6.229752,7.819325,10.368742,15.661318,4.731195,-0.311622,-7.037964,0.656953,-0.206976
2017,18.080801,26.711796,2.476543,-3.511239,0.302684,7.999250,-3.378507,-1.102193,11.006145,10.069574,6.258809,-3.439460,-14.647302,-0.288091,-6.206074
