In [None]:
import pickle
import numpy as np
import pandas as pd

In [None]:
EXTRACTED_FEATURES_SAVE_ADDR = "../extracted_features.pickle"
NUMBER_OF_PCA_COMPONENTS = 512

In [None]:
# read extracted features from pickle file
def read_extracted_features(extracted_features_save_adr):
    with open(extracted_features_save_adr, 'rb') as input_file:
        return pickle.load(input_file)

In [None]:
features = read_extracted_features(EXTRACTED_FEATURES_SAVE_ADDR)
features = dict(features)
len(features)

In [None]:
# Réduction dimensionnelle
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture

In [None]:
df1 = pd.DataFrame.from_dict({(i,j): features[i][j]
                            for i in features.keys()
                            for j in features[i].keys()},orient='index')
df1.shape

In [None]:
pca = PCA(n_components=NUMBER_OF_PCA_COMPONENTS, svd_solver='full', whiten=True)
feat1=pca.fit_transform(df1)

In [None]:
new_features = {}

for patient, arrays in features.items():
    num_arrays = len(arrays)
    start_index = sum(len(features[p]) for p in features.keys() if p < patient)
    end_index = start_index + num_arrays
    new_features[patient] = feat1[start_index:end_index]

In [None]:
patient_1 = list(new_features.keys())[0]
len(new_features[patient_1])
new_features[patient_1]

In [None]:
#Gaussian
gmm_features={}
for patient in new_features.keys():
    gmm = GaussianMixture(n_components=1,covariance_type='diag')
    gmm.fit(new_features[patient])
    mean_cov = np.concatenate([gmm.means_, gmm.covariances_], axis=1)
    gmm_features.update({patient: mean_cov})


In [None]:
len(gmm_features)
patient_1 = list(gmm_features.keys())[0]
gmm_features[patient_1]
#gmm_features

In [None]:
reshaped_gmm_features = {}
for key, value in gmm_features.items():
    reshaped_gmm_features[key] = value.reshape(-1)
df = pd.DataFrame.from_dict(reshaped_gmm_features, orient='index')
df.to_csv('../CSV/features_gmm3.csv', index=True, header=True)

In [None]:
# Merge the extracted features with the clinical data on the patient ID
clinical_data = pd.read_csv('../CSV/clinical_data_with_no_missing_values.csv')
clinical_data = clinical_data.set_index('patient_id')
features1 = pd.read_csv('../CSV/features_gmm3.csv')
features1 = features1.set_index('Unnamed: 0')
features1 = clinical_data.join(features1)
features1.to_csv('../CSV/features_gmm3_with_clinical_data.csv', index=True, header=True)