# Create CSV from Folds

In [4]:
import numpy as np
import pickle
import pandas as pd
import os

## Generate CSV from the training/testing folds

### SVR

In [None]:
def generateCSVresults(src_dir, dest_dir, C_in, E_in, P_in):
    """
    Generate a CSV file with measurement_id and predictions from the provided source directory. 
    
    Keyword arguments: 
    - src_dir: String. Absolute path to the *_Fold folders to generate the CSV predictions from.
               Example: /export/c08/lmorove1/kaldi/egs/beatPDivec/on_off_noinact_auto30/exp/ivec_450/resiVecSVR_Fold
    - dest_dir: String. Absolute path to where you want the CSV file to be saved. 
                Example: /export/c08/lmorove1/kaldi/egs/beatPDivec/on_off_noinact_auto30/exp/ivec_450/resiVecSVR_Fold_all/
    - C_in: List with string values. C value for the SVR. 
            Example: ['0.2']
    - E_in: List with string values. Epsilon value for the SVR.
            Example: ['0.1']
    - P_in: List with string values. Number of components for PCA. 
            Example: ['400']
    """
    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    for nPCA in P_in:
        for kernelType in ['linear']:
            for c in C_in:#['0.002', '0.2', '20.0', '2e-05', '2e-07' , '2e-09' , '2e-11' , '2e-13']:
                for epsilon in E_in:
                    fileName='objs_'+str(nPCA)+'_kernel_'+str(kernelType)+'_c_'+str(c)+'_eps_'+str(epsilon)
                    pooled_glob_test_pred=[]
                    pooled_glob_test_mesID=[]
                    for kfold in range(5):
                        pkl_file=[]
                        print(['Kfold: ' + str(kfold)])
                        file_path=src_dir+str(kfold)+'/'+fileName+'.pkl'
                        print(file_path)
                        pickle_in = open(file_path,"rb")
                        pkl_file = pickle.load(pickle_in)
                        #print(pkl_file)
                        pickle_in.close
                        #print(type(pkl_file))
                        #print(len(pkl_file))
                        glob_test_pred=pkl_file[2]
                        glob_test_mesID=pkl_file[-1]
                        #print('New:')
                        #print(len(glob_test_mesID))
                        #print(len(np.unique(glob_test_mesID)))
                        pooled_glob_test_pred.append(glob_test_pred)
                        pooled_glob_test_mesID.extend(glob_test_mesID)
                        #print('Pooled:')
                        #print(len(pooled_glob_test_mesID))
                        #print(len(np.unique(pooled_glob_test_mesID)))

                    pooled_glob_test_pred=np.hstack(pooled_glob_test_pred)
                    #pooled_glob_test_mesID=np.concatenate(pooled_glob_test_mesID,axis=0)
                    #print(pooled_glob_test_mesID)
                    df = pd.DataFrame({'measurement_id': pooled_glob_test_mesID, 'prediction':pooled_glob_test_pred})
                    df.to_csv(dest_dir+fileName+'.csv', index=False)
    print(dest_dir+fileName+'.csv')

### On/Off

In [None]:
src_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/on_off_noinact_auto30/exp/ivec_450/resiVecSVR_Fold'
dest_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/on_off_noinact_auto30/exp/ivec_450/resiVecSVR_Fold_all/'
P_in=['400']
C_in=['0.2']
E_in=['0.1']

generateCSVresults(src_dir, dest_dir, C_in, E_in, P_in)

### Tremor

In [None]:
dest_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/trem_noinact_auto30/exp/ivec_450/resiVecSVR_Fold_all/'
src_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/trem_noinact_auto30/exp/ivec_450/resiVecSVR_Fold'
P_in=['450']
C_in=['0.02']
E_in=['0.1']

generateCSVresults(src_dir, dest_dir, C_in, E_in, P_in)

### Dyskinesia

In [None]:
dest_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/v1_dysk_auto/exp/ivec_500/resiVecSVR_Fold_all/'
src_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/v1_dysk_auto/exp/ivec_500/resiVecSVR_Fold'
P_in=['500']
C_in=['0.002']
E_in=['0.1']

generateCSVresults(src_dir, dest_dir, C_in, E_in, P_in)

### Dyskinesia newivectors

In [None]:
dest_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/dysk_auto30_400fl_dD/exp/ivec_550/resiVecSVR_Fold_all/'
src_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/dysk_auto30_400fl_dD/exp/ivec_550/resiVecSVR_Fold'
P_in=['500']
C_in=['0.002']
E_in=['0.1']

generateCSVresults(src_dir, dest_dir, C_in, E_in, P_in)

## SVR Per Patient

In [2]:
def generateCSVresults_per_patient(dest_dir, src_dir, best_config):
    """
    TODO 
    
    Keyword Arguments: 
    - dest_dir: 
    - src_dir: 
    - best_config: 
    """
    if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)

    pooled_glob_test_pred=[]
    pooled_glob_test_mesID=[]
        
    # For all subject_ids 
    for pid in best_config.keys():
        # Merge la key et le nom de objs
        fileName = str(pid)+"_"+best_config[pid][0].strip('/').strip('.pkl')
        
        
        for kfold in range(5):
            pkl_file=[]
            print(['Kfold: ' + str(kfold)])
            file_path=src_dir+str(kfold)+'/'+fileName+'.pkl'
            print(file_path)
            pickle_in = open(file_path,"rb")
            pkl_file = pickle.load(pickle_in)
            #print(pkl_file)
            pickle_in.close
            #print(type(pkl_file))
            #print(len(pkl_file))
            glob_test_pred=pkl_file[2]
            glob_test_mesID=pkl_file[-1]
            #print('New:')
            #print(len(glob_test_mesID))
            #print(len(np.unique(glob_test_mesID)))

            pooled_glob_test_pred.append(glob_test_pred)
            pooled_glob_test_mesID.extend(glob_test_mesID)
            #print('Pooled:')
            #print(len(pooled_glob_test_mesID))
            #print(len(np.unique(pooled_glob_test_mesID)))

        pooled_glob_test_pred=np.hstack(pooled_glob_test_pred).tolist()

        #pooled_glob_test_mesID=np.concatenate(pooled_glob_test_mesID,axis=0)
        #print(pooled_glob_test_mesID)
    df = pd.DataFrame({'measurement_id': pooled_glob_test_mesID, 'prediction':pooled_glob_test_pred})
    df.to_csv(dest_dir+'preds_per_patient'+'.csv', index=False)
    
    print(dest_dir+'preds_per_patient'+'.csv')

### On / Off

On/Off was not done because it wasn't providing better results in our cross-validation.

In [None]:
best_config = {1004: ['/objs_300_kernel_linear_c_0.002_eps_0.1.pkl', 1.0827976194111284],
 1006: ['/objs_50_kernel_linear_c_20.0_eps_0.1.pkl', 0.2329153271765446],
 1007: ['/objs_450_kernel_linear_c_0.2_eps_0.1.pkl', 1.4714460909882974],
 1019: ['/objs_200_kernel_linear_c_20.0_eps_0.1.pkl', 1.8918060269492412],
 1020: ['/objs_550_kernel_linear_c_0.002_eps_0.1.pkl', 0.886905471524307],
 1023: ['/objs_350_kernel_linear_c_0.2_eps_0.1.pkl', 1.1596042294033257],
 1032: ['/objs_400_kernel_linear_c_20.0_eps_0.1.pkl', 0.7281204208927649],
 1034: ['/objs_250_kernel_linear_c_0.2_eps_0.1.pkl', 1.1144152234028197],
 1038: ['/objs_450_kernel_linear_c_20.0_eps_0.1.pkl', 2.4487567296461563],
 1039: ['/objs_350_kernel_linear_c_0.2_eps_0.1.pkl', 1.0717793502196455],
 1043: ['/objs_300_kernel_linear_c_0.2_eps_0.1.pkl', 1.4289922466877871],
 1044: ['/objs_600_kernel_linear_c_0.002_eps_0.1.pkl', 0.2380464610221433],
 1048: ['/objs_600_kernel_linear_c_20.0_eps_0.1.pkl', 0.6105293829179457],
 1049: ['/objs_350_kernel_linear_c_0.002_eps_0.1.pkl', 0.8740733515064113],
 1051: ['/objs_350_kernel_linear_c_0.2_eps_0.1.pkl', 1.0917628255815726]}

dest_dir='/export/b19/mpgill/kaldi/egs/beatPDivec/on_off_hpf_auto30/exp/ivec_600/resiVecPerPatientSVR_Fold_all/'
src_dir='/export/b19/mpgill/kaldi/egs/beatPDivec/on_off_hpf_auto30/exp/ivec_600/resiVecSVR_Fold'

generateCSVresults_per_patient(dest_dir, src_dir, best_config)

### Tremor 

In [None]:
best_config = {1004: ['/objs_100_kernel_linear_c_0.2_eps_0.1.pkl', 1.4680014190555721],
 1006: ['/objs_50_kernel_linear_c_2e-13_eps_0.1.pkl', 0.5181408183640623],
 1007: ['/objs_500_kernel_linear_c_0.002_eps_0.1.pkl', 0.28865246352999413],
 1019: ['/objs_150_kernel_linear_c_0.002_eps_0.1.pkl', 0.4394132869651092],
 1020: ['/objs_250_kernel_linear_c_20.0_eps_0.1.pkl', 0.20200962304387957],
 1023: ['/objs_450_kernel_linear_c_0.002_eps_0.1.pkl', 0.33852971133060844],
 1032: ['/objs_550_kernel_linear_c_0.2_eps_0.1.pkl', 0.3050677687016101],
 1034: ['/objs_200_kernel_linear_c_20.0_eps_0.1.pkl', 0.3368693259856197],
 1038: ['/objs_500_kernel_linear_c_0.002_eps_0.1.pkl', 0.24267448339760042],
 1043: ['/objs_350_kernel_linear_c_0.002_eps_0.1.pkl', 1.268591016460198],
 1046: ['/objs_350_kernel_linear_c_0.002_eps_0.1.pkl', 0.2396070476537261],
 1048: ['/objs_450_kernel_linear_c_2e-07_eps_0.1.pkl', 0.35171361722889516],
 1049: ['/objs_250_kernel_linear_c_0.2_eps_0.1.pkl', 0.6654219633016787]}

dest_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/trem_noinact_auto30/exp/ivec_550/resiVecPerPatientSVR_Fold_all/'
src_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/trem_noinact_auto30/exp/ivec_550/resiVecPerPatientSVR_Fold'


generateCSVresults_per_patient(dest_dir, src_dir, best_config)

### New best tremor i-vector results 

In [None]:
best_config = {1004: ['/objs_700_kernel_linear_c_0.002_eps_0.1.pkl', 1.531657964416419],
 1006: ['/objs_150_kernel_linear_c_20.0_eps_0.1.pkl', 0.49171222508197227],
 1007: ['/objs_550_kernel_linear_c_0.002_eps_0.1.pkl', 0.2995683053848717],
 1019: ['/objs_150_kernel_linear_c_0.002_eps_0.1.pkl', 0.4495979629768781],
 1020: ['/objs_50_kernel_linear_c_0.2_eps_0.1.pkl', 0.1958384033190276],
 1023: ['/objs_450_kernel_linear_c_0.002_eps_0.1.pkl', 0.3492860938493334],
 1032: ['/objs_600_kernel_linear_c_0.002_eps_0.1.pkl', 0.3071304458845056],
 1034: ['/objs_50_kernel_linear_c_0.2_eps_0.1.pkl', 0.3220335225867783],
 1038: ['/objs_600_kernel_linear_c_0.2_eps_0.1.pkl', 0.23897503018219704],
 1043: ['/objs_150_kernel_linear_c_20.0_eps_0.1.pkl', 1.1227755525400551],
 1046: ['/objs_250_kernel_linear_c_0.2_eps_0.1.pkl', 0.2349490529829686],
 1048: ['/objs_700_kernel_linear_c_2e-07_eps_0.1.pkl', 0.3517114737593213],
 1049: ['/objs_700_kernel_linear_c_0.002_eps_0.1.pkl', 0.6536079323072791]}

dest_dir='/export/b19/mpgill/kaldi/egs/beatPDivec/trem_hpf_auto30/exp/ivec_700/resiVecSVR_Fold_all/'
src_dir='/export/b19/mpgill/kaldi/egs/beatPDivec/trem_hpf_auto30/exp/ivec_700/resiVecSVR_Fold'


generateCSVresults_per_patient(dest_dir, src_dir, best_config)

### Dyskinesia

In [None]:
best_config = {1004: ['/objs_450_kernel_linear_c_0.002_eps_0.1.pkl', 1.1469489658686098],
 1007: ['/objs_100_kernel_linear_c_0.002_eps_0.1.pkl', 0.09115239389591206],
 1019: ['/objs_400_kernel_linear_c_0.2_eps_0.1.pkl', 0.686931370820251],
 1023: ['/objs_300_kernel_linear_c_0.2_eps_0.1.pkl', 0.8462093717280431],
 1034: ['/objs_100_kernel_linear_c_20.0_eps_0.1.pkl', 0.7961188257851409],
 1038: ['/objs_450_kernel_linear_c_0.002_eps_0.1.pkl', 0.3530848340426855],
 1039: ['/objs_450_kernel_linear_c_0.2_eps_0.1.pkl', 0.3826339325882311],
 1043: ['/objs_300_kernel_linear_c_0.2_eps_0.1.pkl', 0.5525085362997469],
 1044: ['/objs_50_kernel_linear_c_0.002_eps_0.1.pkl', 0.09694768640213237],
 1048: ['/objs_650_kernel_linear_c_0.2_eps_0.1.pkl', 0.4505302952804157],
 1049: ['/objs_250_kernel_linear_c_0.2_eps_0.1.pkl', 0.4001809543831368]}

dest_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/dysk_orig_auto60_400fl/exp/ivec_650/resiVecPerPatientSVR_Fold_all/'
src_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/dysk_orig_auto60_400fl/exp/ivec_650/resiVecPerPatientSVR_Fold'

generateCSVresults_per_patient(dest_dir, src_dir, best_config)


In [None]:
best_config = {1004: ['/objs_450_kernel_linear_c_0.002_eps_0.1.pkl', 1.1469489658686098],
 1007: ['/objs_100_kernel_linear_c_0.002_eps_0.1.pkl', 0.09115239389591206],
 1019: ['/objs_400_kernel_linear_c_0.2_eps_0.1.pkl', 0.686931370820251],
 1023: ['/objs_300_kernel_linear_c_0.2_eps_0.1.pkl', 0.8462093717280431],
 1034: ['/objs_100_kernel_linear_c_20.0_eps_0.1.pkl', 0.7961188257851409],
 1038: ['/objs_450_kernel_linear_c_0.002_eps_0.1.pkl', 0.3530848340426855],
 1039: ['/objs_450_kernel_linear_c_0.2_eps_0.1.pkl', 0.3826339325882311],
 1043: ['/objs_300_kernel_linear_c_0.2_eps_0.1.pkl', 0.5525085362997469],
 1044: ['/objs_50_kernel_linear_c_0.002_eps_0.1.pkl', 0.09694768640213237],
 1048: ['/objs_650_kernel_linear_c_0.2_eps_0.1.pkl', 0.4505302952804157],
 1049: ['/objs_250_kernel_linear_c_0.2_eps_0.1.pkl', 0.4001809543831368]}

dest_dir='/export/b19/mpgill/kaldi/egs/beatPDivec/dysk_orig_auto60_400fl_scratch/exp/ivec_650/resiVecSVR_Fold/'
src_dir='/export/b19/mpgill/kaldi/egs/beatPDivec/dysk_orig_auto60_400fl_scratch/exp/ivec_650/resiVecSVR_Fold'

generateCSVresults_per_patient(dest_dir, src_dir, best_config)


## Dyskinesia - What was sent in the 4th submission - Params from dysk_orig_auto60 but applied on dysk_noinact_auto30 features

In [None]:
# Best config found on dysk_auto60_orig 
best_config = {1004: ['/objs_450_kernel_linear_c_0.002_eps_0.1.pkl', 1.1469489658686098],
 1007: ['/objs_100_kernel_linear_c_0.002_eps_0.1.pkl', 0.09115239389591206],
 1019: ['/objs_400_kernel_linear_c_0.2_eps_0.1.pkl', 0.686931370820251],
 1023: ['/objs_300_kernel_linear_c_0.2_eps_0.1.pkl', 0.8462093717280431],
 1034: ['/objs_100_kernel_linear_c_20.0_eps_0.1.pkl', 0.7961188257851409],
 1038: ['/objs_450_kernel_linear_c_0.002_eps_0.1.pkl', 0.3530848340426855],
 1039: ['/objs_450_kernel_linear_c_0.2_eps_0.1.pkl', 0.3826339325882311],
 1043: ['/objs_300_kernel_linear_c_0.2_eps_0.1.pkl', 0.5525085362997469],
 1044: ['/objs_50_kernel_linear_c_0.002_eps_0.1.pkl', 0.09694768640213237],
 1048: ['/objs_650_kernel_linear_c_0.2_eps_0.1.pkl', 0.4505302952804157],
 1049: ['/objs_250_kernel_linear_c_0.2_eps_0.1.pkl', 0.4001809543831368]}

# But by mistake, we applied it on dysk_noinact_auto30 features in the 4th submission
dest_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all/'
src_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecSVR_Fold'

generateCSVresults_per_patient(dest_dir, src_dir, best_config)


## Dyskinesia - Dysk_noinact_auto30

In [None]:
# best config found on dysk_noinact_auto30
best_config = {1004: ['/objs_550_kernel_linear_c_0.002_eps_0.1.pkl', 1.1259629967845186],
 1007: ['/objs_150_kernel_linear_c_0.002_eps_0.1.pkl', 0.09008666568311713],
 1019: ['/objs_500_kernel_linear_c_0.2_eps_0.1.pkl', 0.728239058772483],
 1023: ['/objs_50_kernel_linear_c_2e-13_eps_0.1.pkl', 0.8626402821439083],
 1034: ['/objs_100_kernel_linear_c_20.0_eps_0.1.pkl', 0.7901250257399929],
 1038: ['/objs_500_kernel_linear_c_0.2_eps_0.1.pkl', 0.3434269008454658],
 1039: ['/objs_650_kernel_linear_c_0.2_eps_0.1.pkl', 0.3869949731338493],
 1043: ['/objs_400_kernel_linear_c_0.2_eps_0.1.pkl', 0.5469210567758267],
 1044: ['/objs_100_kernel_linear_c_2e-05_eps_0.1.pkl', 0.09707586142487289],
 1048: ['/objs_500_kernel_linear_c_20.0_eps_0.1.pkl', 0.4618833361455122],
 1049: ['/objs_350_kernel_linear_c_0.2_eps_0.1.pkl', 0.40809179112164407]}

dest_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/'
src_dir='/export/c08/lmorove1/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecSVR_Fold'

generateCSVresults_per_patient(dest_dir, src_dir, best_config)
