# Fusion

Submission 2:
    - Tremor : Gradient Boosting Regression
    - Dyskinesia : Gradient Boosting Regression
    
Submission 4: 
    - Dyskinesia : Average of predictions 

In [1]:
import numpy as np
import os
from sklearn.ensemble import RandomForestClassifier
import csv
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_blobs
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from random import random
from sklearn.metrics import mean_squared_error
import pickle
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor

%load_ext autoreload
%autoreload 2

from transform_data import *
from create_graphs import *
from beatPDivec.default_data.v2_auto.local.get_final_scores_accuracy import *

In [4]:
# Path to where you want the CSV files created 
sDirOut = "<your-out-path>"

# Path to the folder containing the GitHub repository of BeatPD-CLSP-JHU
sPathGithub = os.getcwd()

# Path to the folder containing the kaldi/ folder
sPathKaldi = "<path-to-kaldi>/"

# Path to the data folder
sPathData = "<path-to-data>"

# Fusion for REAL-PD sensors

For REAL-PD, we have these rules to create the predictions:
    - If we have all 3 predictions, we do the average of the two closest 
    - If there are only two measurements, we do the average of both 
    - If only one prediction, we take that one
    
The following code in this Notebook performs this merge to go from three subtypes of REAL-PD predictions to only one CSV file. 

In [64]:
def real_average_fusion(lFilesPred, sDirOut, sTypeLabel):
    iNumFiles=len(lFilesPred)
    vPredIter=np.zeros((1,iNumFiles))
    mPredictions=np.zeros((1,iNumFiles))
    lID=[] # measurement_id 
    lDicts=[] 
    vParID=[] # participant ID

    for sFilePred in lFilesPred:
        with open(sFilePred, mode='r') as infile:
            reader = csv.reader(infile)
            dPred = {rows[0]:rows[1] for rows in reader} #Prediction from the different classifiers (acoustic, w-embed)
        lDicts.append(dPred)
   
    # Append all unique measurement ID as the lead
    all_unique_measurements = set(list(lDicts[0].keys()) + list(lDicts[1].keys()) + list(lDicts[2].keys()))

    #creation of the matrix containing prediction from all classifiers
    bEnter=1
    for k in all_unique_measurements:
        if k!='measurement_id':
            lID.append(k) # measurement_id 
            fPred=lDicts[0].get(k)
            if fPred:
                vPredIter[0,0]=float(np.asarray(lDicts[0][k])) #first predicted value
            else:
                vPredIter[0,0]=None
            # Go through the second and next predictions files
            for j in range(1, iNumFiles):
                # fPred will contain the prediction of the predictions file we're going through
                fPred=lDicts[j].get(k)
                if fPred: # if we found a prediction
                    # we add it in the array
                    vPredIter[0,j]=float(np.asarray(fPred))
                else:
                    #print([str(j) + 'Unknown key:' + k])
                    vPredIter[0,j]=None#float(np.asarray(lDicts[0][k]))
            if bEnter==1:
                # if it's the first prediction file, we initialize it
                mPredictions=vPredIter.copy() # mPredictions initialization.
                bEnter=0
            else:
                #[a, b, c]
                dist_a_b = abs(vPredIter[0,1] - vPredIter[0,0])
                dist_c_b = abs(vPredIter[0,2] - vPredIter[0,1])
                dist_c_a = abs(vPredIter[0,2] - vPredIter[0,0])

                min_value = min(np.array([dist_a_b, dist_c_b, dist_c_a]))
                
                if min_value == dist_a_b:
                    vPredIter[0,2] = None
                elif min_value == dist_c_b:
                    vPredIter[0,0] = None
                elif min_value == dist_c_a:
                    vPredIter[0,1] = None
                    
                # We add the new predictions as a new column if it's not the first file we're going through
                mPredictions=np.append(mPredictions,vPredIter,axis=0)

    # For REAL-PD, we have these rules to create the predictions:
    # If we have all 3 predictions, we do the average of the two closest 
    # If there are only two measurements, we do the average 
    
#     print(mPredictions)
    
    vAverage=np.nanmean(mPredictions,axis=1)
    
    lID, vPrediction=zip(*sorted(zip(lID, vAverage)))
    df = pd.DataFrame({'measurement_id': lID, sTypeLabel:vPrediction})
    df.to_csv(sDirOut+'submissionRealPD'+sTypeLabel+'.csv', index=False)
    print('Submission file was created: '+sDirOut+"submissionRealPD"+sTypeLabel+".csv")
    
    return vAverage

## ON/OFF

In [193]:
sPhoneAccPred=sPathGithub+'/tsfresh/submit/submission/phoneacc_on_off_new.csv'
sWatchAccPred=sPathGithub+'/tsfresh/submit/submission/watchacc_on_off_new.csv'
sWatchGyrPred=sPathGithub+'/tsfresh/submit/submission/watchgyr_on_off_new.csv'

lFilesPred=[sPhoneAccPred,sWatchAccPred,sWatchGyrPred]

sTypeLabel = "on_off"

# Name of the CSV file to be created with the average 
fileName ="on_off_real"

real_average_fusion(lFilesPred, sDirOut, sTypeLabel)

Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionRealPDon_off.csv


## Tremor

In [194]:
sPhoneAccPred=sPathGithub+'/tsfresh/submit/submission/phoneacc.tremor.csv'
sWatchAccPred=sPathGithub+'/tsfresh/submit/submission/watchacc.tremor.csv'
sWatchGyrPred=sPathGithub+'/tsfresh/submit/submission/watchgyr.tremor.csv'

lFilesPred=[sPhoneAccPred,sWatchAccPred,sWatchGyrPred]

sTypeLabel = "tremor"

# Name of the CSV file to be created with the average 
fileName ="tremor_real"

real_average_fusion(lFilesPred, sDirOut, sTypeLabel)

Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionRealPDtremor.csv


array([0.37931034, 0.65416554, 2.18682595, 0.37931034, 0.14303959,
       0.27248547, 2.02380952, 0.43902439, 0.2993007 , 0.13793103,
       0.40129212, 0.14303959, 0.32298894, 0.41187739, 0.41666667,
       0.68810978, 0.7608293 , 0.21676815, 2.13043478, 0.33974428,
       0.41666667, 0.29090909, 0.13793103, 0.44444444, 0.13793103,
       0.39955193, 0.57267669, 2.0053676 , 0.29090909, 0.2993007 ,
       0.37931034, 0.13793103, 0.35403822, 0.81333286, 2.02336013,
       0.30956461, 0.39560964, 0.13793103, 0.30769231, 0.13793103,
       0.45444199, 0.79937801, 2.02380952, 2.13043478, 0.2993007 ,
       1.99466338, 2.33280802, 2.13043478, 0.48426454, 0.13793103,
       0.41666667, 2.18454935, 0.19007548, 0.41666667, 2.16643537,
       2.01350806, 2.38435479, 0.41666667, 2.02380952, 2.04134894,
       2.02380952, 2.39958423, 0.43902439, 0.43646137, 0.42430604,
       2.13043478, 0.96504882, 0.30129155, 2.20308919, 0.29090909,
       0.34564532, 0.45395537, 0.30769231, 2.33909456, 2.48840

## Dyskinesia

In [195]:
sPhoneAccPred=sPathGithub+'/tsfresh/submit/submission/phoneacc.dyskinesia.csv'
sWatchAccPred=sPathGithub+'/tsfresh/submit/submission/watchacc.dyskinesia.csv'
sWatchGyrPred=sPathGithub+'/tsfresh/submit/submission/watchgyr.dyskinesia.csv'

lFilesPred=[sPhoneAccPred,sWatchAccPred,sWatchGyrPred]

sTypeLabel = "dyskinesia"

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_real"

real_average_fusion(lFilesPred, sDirOut, sTypeLabel)

Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionRealPDdyskinesia.csv


array([0.93285683, 0.28006747, 0.17857143, 0.95436689, 0.90929966,
       0.32372102, 0.89552239, 0.20036508, 0.18518519, 0.89552239,
       1.00760241, 0.18518519, 0.26308097, 0.89929588, 0.75281125,
       0.92863867, 0.79298685, 0.90929966, 0.2064634 , 0.28883904,
       0.64864865, 0.2008349 , 0.24714354, 0.91559994, 0.64864865,
       0.19259259, 0.18518519, 0.1925402 , 0.62068966, 0.26079421,
       0.9305576 , 0.21491919, 0.32213041, 0.18518519, 0.17857143,
       0.7692336 , 0.92585684, 0.2182692 , 0.34650603, 0.25443814,
       0.54594984, 0.23603459, 0.62068966, 0.62068966, 0.18695809,
       0.64534303, 0.42934678, 0.22498779, 0.18518519, 0.20650673,
       0.6146469 , 0.97777942, 0.39764001, 0.62068966, 0.68154659,
       0.62068966, 0.30773422, 0.31217173, 0.89552239, 0.71826372,
       0.65007382, 1.05016382, 0.9542592 , 0.23760092, 0.27813159,
       0.68247064, 1.00831413, 1.02909242, 0.80784821, 0.16883117,
       0.18903047, 0.18518519, 0.94013955, 0.17957755, 0.24756

## Submission 4 - Average of predictions for Approach 1 and 2 - CIS-PD 

### Dyskinesia

### Get predictions file on test subset 

In [200]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/submission/cis-pd.dyskinesia.perpatient.csv'

# SVR predictions files on test kfolds 
sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred2]

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dysk_test"

sTypeLabel = "dyskinesia"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0

# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel, True)


Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv


# Merge CIS-PD and REAL-PD predictions in one CSV file 

In [208]:
from os import chdir
from glob import glob
import pandas as pdlib

def produceOneCSV(list_of_files, file_out):
    """
    Produce a single CSV after combining all files

    Source: https://www.techbeamers.com/pandas-merge-csv-files/
    """
    # Consolidate all CSV files into one object
    result_obj = pdlib.concat([pdlib.read_csv(file) for file in list_of_files])
    # Convert the above object into a csv file and export
    result_obj.to_csv(file_out, index=False, encoding="utf-8")
    print('File was created: ', file_out)

## ON/OFF

In [209]:
# Merge tsfresh/.../cis-pd.on_off_new.csv and sDirOut+submissionRealPDon_off.csv

sCisFile = sPathGithub+'/tsfresh/submit/submission/cis-pd.on_off_new.csv'
sRealFile = sDirOut+'submissionRealPDon_off.csv'

list_of_files = [sCisFile, sRealFile]
file_out = sDirOut+"/cis-real.on_off.csv"
produceOneCSV(list_of_files, file_out)

File was created:  /export/b19/mpgill/BeatPD_predictions_tryingout//cis-real.on_off.csv


## Tremor

In [210]:
# Merge /tsfresh/.../cis-pd.tremor.csv and submissionRealPDtremor.csv 

sCisFile = sPathGithub+'/tsfresh/submit/submission/cis-pd.tremor.csv'
sRealFile = sDirOut+'submissionRealPDtremor.csv'

list_of_files = [sCisFile, sRealFile]
file_out = sDirOut+"/cis-real.tremor.csv"
produceOneCSV(list_of_files, file_out)

File was created:  /export/b19/mpgill/BeatPD_predictions_tryingout//cis-real.tremor.csv


## Dyskinesia

In [211]:
# Merge submissionCisPDdyskinesia.csv and submissionRealPDdyskinesia.csv

sCisFile = sDirOut+"submissionCisPDdyskinesia.csv"
sRealFile = sDirOut+'submissionRealPDdyskinesia.csv'

list_of_files = [sCisFile, sRealFile]
file_out = sDirOut+"/cis-real.dyskinesia.csv"
produceOneCSV(list_of_files, file_out)

File was created:  /export/b19/mpgill/BeatPD_predictions_tryingout//cis-real.dyskinesia.csv


### Example on test folds

In [None]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/submission4_preds/cis-pd.dyskinesia.perpatient.csv'

# SVR predictions files on test kfolds 
sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred2]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dysk_test"

sTypeLabel = "dyskinesia"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0

# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

### Tremor - Test Folds - CIS-PD - Gridsearch + global norm

In [8]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_cis-pd_tremor.csv'

# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred1]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="tremor_test"

sTypeLabel = "tremor"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDtremor.csv
--- MSEscore ---
Final score :  0.4425578721059196
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  0.4425578721059196
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  0.4425578721059196
Overall MSE Fusion - average :  None


### Dyskinesia - Gridsearch + global norm

In [9]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_cis-pd_dyskinesia.csv'

# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred1]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_test"

sTypeLabel = "dyskinesia"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4896636009359452
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  0.4896636009359452
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  0.4896636009359452
Overall MSE Fusion - average :  None


### On/Off - Gridsearch + global norm

In [72]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_cis-pd_on_off.csv'
sFilePred1=sPathGithub+'/tsfresh/submit/getpreds_perpatient/kfold_prediction_cis-pd.on_off_dev_folds_thesis.perpatient.csv'
# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred1]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="on_off_teeeest"

sTypeLabel = "on_off"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDon_off.csv
--- MSEscore ---
Final score :  1.236944060278669
Overall MSE Classif. 1 - tsfresh:  1.236944060278669
--- MSEscore ---
Final score :  1.236944060278669
Overall MSE Classif. 2 - ivec:  1.236944060278669
--- MSEscore ---
Final score :  1.236944060278669
Overall MSE Fusion - average :  1.236944060278669


In [10]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_cis-pd_on_off.csv'

# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred1]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="on_off_teeeest"

sTypeLabel = "on_off"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDon_off.csv
--- MSEscore ---
Final score :  1.156679002684981
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  1.156679002684981
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  1.156679002684981
Overall MSE Fusion - average :  None


In [24]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_lambda_0.3_cis-pd_on_off.csv'

# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred1]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "on_off"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDon_off.csv
--- MSEscore ---
Final score :  1.1948072541641777
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  1.1948072541641777
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  1.1948072541641777
Overall MSE Fusion - average :  None


In [9]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_rf_cis-pd_on_off.csv'

# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred1]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "on_off"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDon_off.csv
--- MSEscore ---
Final score :  1.1750855614237372
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  1.1750855614237372
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  1.1750855614237372
Overall MSE Fusion - average :  None


In [10]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_rf_cis-pd_tremor.csv'

# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred1]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "tremor"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDtremor.csv
--- MSEscore ---
Final score :  0.4503645126275858
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  0.4503645126275858
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  0.4503645126275858
Overall MSE Fusion - average :  None


In [26]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_rf_cis-pd_tremor.csv'

# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred1]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "tremor"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDtremor.csv
--- MSEscore ---
Final score :  0.45107914133573473
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  0.45107914133573473
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  0.45107914133573473
Overall MSE Fusion - average :  None


# Results on Data Augmentation Techniques

In [20]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_cis-pd_dyskinesia_lamb_1.0_2.csv'

# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred1]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "dyskinesia"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.5132973963760121
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  0.5132973963760121
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  0.5132973963760121
Overall MSE Fusion - average :  None


In [106]:


list_files = [
#     "kfold_prediction_cis-pd_dyskinesia_rfr_combhpfnoinact.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_combhpfnoinact_resample_0.9.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_combhpfnoinact_resample_1.1.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_orig_2.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_original_resample_0.9.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_original_resample_1.1.csv",
# "kfold_prediction_cis-pd_on_off_rfr_combhpfnoinact.csv",
# "kfold_prediction_cis-pd_on_off_rfr_combhpfnoinact_resample_0.9.csv",
# "kfold_prediction_cis-pd_on_off_rfr_combhpfnoinact_resample_1.1.csv",
# "kfold_prediction_cis-pd_on_off_rfr_orig_2.csv",
# "kfold_prediction_cis-pd_on_off_rfr_original.csv",
# "kfold_prediction_cis-pd_on_off_rfr_original_resample_0.9.csv",
# "kfold_prediction_cis-pd_on_off_rfr_original_resample_1.1.csv",
"kfold_prediction_cis-pd_tremor_rfr_combhpfnoinact.csv",
"kfold_prediction_cis-pd_tremor_rfr_combhpfnoinact_resample_0.9.csv",
"kfold_prediction_cis-pd_tremor_rfr_combhpfnoinact_resample_1.1.csv",
"kfold_prediction_cis-pd_tremor_rfr_orig_2.csv",
"kfold_prediction_cis-pd_tremor_rfr_original.csv",
"kfold_prediction_cis-pd_tremor_rfr_original_resample_0.9.csv",
"kfold_prediction_cis-pd_tremor_rfr_original_resample_1.1.csv"]

In [114]:
# list_files = [
#     "kfold_prediction_cis-pd_dyskinesia_rfr_combhpfnoinact_rotate_1_and_2.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_combhpfnoinact_rotate_1.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_combhpfnoinact_rotate_2.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_original_rotate_1_and_2.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_original_rotate_1.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_original_rotate_2.csv",
# "kfold_prediction_cis-pd_on_off_rfr_combhpfnoinact_rotate_1_and_2.csv",
# "kfold_prediction_cis-pd_on_off_rfr_combhpfnoinact_rotate_1.csv",
# "kfold_prediction_cis-pd_on_off_rfr_combhpfnoinact_rotate_2.csv",
# "kfold_prediction_cis-pd_on_off_rfr_original_rotate_1_and_2.csv",
# "kfold_prediction_cis-pd_on_off_rfr_original_rotate_1.csv",
# "kfold_prediction_cis-pd_on_off_rfr_original_rotate_2.csv",
# "kfold_prediction_cis-pd_tremor_rfr_combhpfnoinact_rotate_1_and_2.csv",
# "kfold_prediction_cis-pd_tremor_rfr_combhpfnoinact_rotate_1.csv",
# "kfold_prediction_cis-pd_tremor_rfr_combhpfnoinact_rotate_2.csv",
# "kfold_prediction_cis-pd_tremor_rfr_original_rotate_1_and_2.csv",
# "kfold_prediction_cis-pd_tremor_rfr_original_rotate_1.csv",
# "kfold_prediction_cis-pd_tremor_rfr_original_rotate_2.csv"]

In [124]:
list_files = [
#     "kfold_prediction_cis-pd_dyskinesia_original.noise_mu_0_sig_0.1.csv",
#               "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_resample_0.9.csv",
#               "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1.csv",
#               "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.csv",
#               "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_resample_1.1.csv",
              
#               "kfold_prediction_cis-pd_on_off_combhpfnoinact.csv",
#               "kfold_prediction_cis-pd_on_off_original.noise_mu_0_sig_0.1.csv",
#               "kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.1.csv",
#               "kfold_prediction_cis-pd_on_off_combhpfnoinact_resample_0.9.csv",
#               "kfold_prediction_cis-pd_on_off_combhpfnoinact_resample_1.1.csv",
              
              "kfold_prediction_cis-pd_tremor_combhpfnoinact.csv",
              "kfold_prediction_cis-pd_tremor_original.noise_mu_0_sig_0.1.csv",
              "kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.1.csv",
              "kfold_prediction_cis-pd_tremor_combhpfnoinact_resample_0.9.csv",
              "kfold_prediction_cis-pd_tremor_combhpfnoinact_resample_1.1.csv"]

In [132]:
list_files = [
#     "kfold_prediction_cis-pd_dyskinesia_rfr_combhpfnoinact.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_combhpfnoinact.noise_mu_0_sig_0.1.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_combhpfnoinact_resample_0.9.csv",
# "kfold_prediction_cis-pd_dyskinesia_rfr_original.noise_mu_0_sig_0.1.csv",
# "kfold_prediction_cis-pd_on_off_rfr_combhpfnoinact.csv",
# "kfold_prediction_cis-pd_on_off_rfr_combhpfnoinact.noise_mu_0_sig_0.1.csv",
# "kfold_prediction_cis-pd_on_off_rfr_combhpfnoinact_resample_0.9.csv",
# "kfold_prediction_cis-pd_on_off_rfr_original.noise_mu_0_sig_0.1.csv",
# "kfold_prediction_cis-pd_tremor_rfr_combhpfnoinact.csv",
# "kfold_prediction_cis-pd_tremor_rfr_combhpfnoinact.noise_mu_0_sig_0.1.csv",
# "kfold_prediction_cis-pd_tremor_rfr_combhpfnoinact_resample_0.9.csv",
# "kfold_prediction_cis-pd_tremor_rfr_original.noise_mu_0_sig_0.1.csv"]

In [63]:
list_files = ["kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_resample_0.9_run3.csv"]

In [108]:
list_files = ["/repetition/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_resample_0.85_1.csv",
              "/repetition/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_resample_0.95_1.csv",
              "/repetition/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_resample_1.05_1.csv",
              "/repetition/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_resample_1.15_1.csv",
              "/repetition/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_rotate_3.csv",
              "/repetition/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_rotate_4.csv",
              "/repetition/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact_rotate_5.csv"]

# list_files = ["/repetition/kfold_prediction_cis-pd_on_off_combhpfnoinact_resample_0.85_1.csv",
#               "/repetition/kfold_prediction_cis-pd_on_off_combhpfnoinact_resample_0.95_1.csv",
#               "/repetition/kfold_prediction_cis-pd_on_off_combhpfnoinact_resample_1.05_1.csv",
#               "/repetition/kfold_prediction_cis-pd_on_off_combhpfnoinact_resample_1.15_1.csv",
#               "/repetition/kfold_prediction_cis-pd_on_off_combhpfnoinact_rotate_3.csv",
#               "/repetition/kfold_prediction_cis-pd_on_off_combhpfnoinact_rotate_4.csv",
#               "/repetition/kfold_prediction_cis-pd_on_off_combhpfnoinact_rotate_5.csv"]
# list_files = ["/repetition/kfold_prediction_cis-pd_tremor_combhpfnoinact_resample_0.85_1.csv",
#               "/repetition/kfold_prediction_cis-pd_tremor_combhpfnoinact_resample_0.95_1.csv",
#               "/repetition/kfold_prediction_cis-pd_tremor_combhpfnoinact_resample_1.05_1.csv",
#               "/repetition/kfold_prediction_cis-pd_tremor_combhpfnoinact_resample_1.15_1.csv",
#              "/repetition/kfold_prediction_cis-pd_tremor_combhpfnoinact_rotate_3.csv",
#              "/repetition/kfold_prediction_cis-pd_tremor_combhpfnoinact_rotate_4.csv",
#              "/repetition/kfold_prediction_cis-pd_tremor_combhpfnoinact_rotate_5.csv"]

### Experimenting with balancing datasets with data augmentation

In [129]:
# list_files = ["/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_balance.csv",
#              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_balance_4.csv"]

# list_files = ["/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_balance.csv",
#              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_balance_4.csv"]


list_files = ["/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_balance.csv",
             "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_balance_4.csv"]

### Experimenting with bounds for rotation

In [163]:
# xgb_rotate_bound/

# list_files = ["kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_5.csv",
# "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_10.csv",
# "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_15.csv",
# "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_20.csv",
# "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_25.csv",
# "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_30.csv",
# "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_35.csv",
#              "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_40.csv",
#              "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_45.csv"]

# list_files = ["kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_5.csv",
# "kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_10.csv",
# "kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_15.csv",
# "kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_20.csv",
# "kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_25.csv",
# "kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_30.csv",
# "kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_35.csv",
#              "kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_40.csv",
#              "kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_45.csv"]

list_files = ["kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_5.csv",
              "kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_10.csv",
"kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_15.csv",
"kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_20.csv",
"kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_25.csv",
"kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_30.csv",
"kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_35.csv",
             "kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_40.csv",
             "kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_45.csv"]

In [56]:
list_files = ["kfold_prediction_cis-pd_on_off_combhpfnoinact.csv",
              "kfold_prediction_cis-pd_on_off.csv",
              "kfold_prediction_cis-pd_on_off_orig_2.csv",
              "/xgb_lofo/kfold_prediction_cis-pd_on_off_orig.csv",
             "/xgb_lofo/kfold_prediction_cis-pd_on_off_combhpfnoinact.csv"]

In [25]:
# list_files = ["/xgb_lofo/kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_noise_excl25.csv",
#              "/xgb_lofo/kfold_prediction_cis-pd_on_off_combhpfnoinact.rotate_bound_noise_incl25.csv"]

# list_files = ["/xgb_lofo/kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_noise_excl25.csv",
#              "/xgb_lofo/kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_noise_incl25.csv"]

list_files = ["/xgb_lofo/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_noise_excl25.csv",
             "/xgb_lofo/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_noise_incl25.csv"]

In [30]:
list_files = ["/xgb_lofo/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_noise_5_15_30.csv"]

In [54]:
# list_files = ["/xgb_hpf_orignoinact/kfold_prediction_cis-pd_dyskinesia_high_pass.csv",
#              "/xgb_hpf_orignoinact/kfold_prediction_cis-pd_dyskinesia_orignoinact.csv"]

list_files = ["/xgb_hpf_orignoinact/kfold_prediction_cis-pd_on_off_high_pass.csv",
             "/xgb_hpf_orignoinact/kfold_prediction_cis-pd_on_off_orignoinact.csv"]

# list_files = ["/xgb_hpf_orignoinact/kfold_prediction_cis-pd_tremor_high_pass.csv",
#              "/xgb_hpf_orignoinact/kfold_prediction_cis-pd_tremor_orignoinact.csv"]

### Experimenting with lambda

In [252]:
# list_files = ["/xgb_linear_comb_feat/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.lamb_0.001.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_dyskinesia_lamb_0.001.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.lamb_0.2.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_dyskinesia_lamb_0.2.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_dyskinesia_lamb_0.4.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.lamb_0.4.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_dyskinesia_lamb_0.5.csv",
#                 "/xgb_linear_comb_feat/kfold_prediction_cis-pd_dyskinesia_lamb_0.8.csv",
#                 "/xgb_linear_comb_feat/kfold_prediction_cis-pd_dyskinesia_lamb_1.0.csv"]


# list_files = ["/xgb_linear_comb_feat/kfold_prediction_cis-pd_tremor_lamb_0.001.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_tremor_combhpfnoinact.lamb_0.001.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_tremor_lamb_0.2.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_tremor_combhpfnoinact.lamb_0.2.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_tremor_lamb_0.4.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_tremor_combhpfnoinact.lamb_0.4.csv",
#               "/xgb_linear_comb_feat/kfold_prediction_cis-pd_tremor_lamb_0.5.csv",
#             "/xgb_linear_comb_feat/kfold_prediction_cis-pd_tremor_lamb_0.8.csv",
#             "/xgb_linear_comb_feat/kfold_prediction_cis-pd_tremor_lamb_1.0.csv"]

list_files = ["/xgb_linear_comb_feat/kfold_prediction_cis-pd_on_off_lamb_0.001.csv",
              "/xgb_linear_comb_feat/kfold_prediction_cis-pd_on_off_combhpfnoinact.lamb_0.001.csv",
              "/xgb_linear_comb_feat/kfold_prediction_cis-pd_on_off_lamb_0.2.csv",
              "/xgb_linear_comb_feat/kfold_prediction_cis-pd_on_off_combhpfnoinact.lamb_0.2.csv",
              "/xgb_linear_comb_feat/kfold_prediction_cis-pd_on_off_lamb_0.4.csv",
              "/xgb_linear_comb_feat/kfold_prediction_cis-pd_on_off_combhpfnoinact.lamb_0.4.csv",
              "/xgb_linear_comb_feat/kfold_prediction_cis-pd_on_off_lamb_0.8.csv",
                "/xgb_linear_comb_feat/kfold_prediction_cis-pd_on_off_lamb_0.5.csv",
                "/xgb_linear_comb_feat/kfold_prediction_cis-pd_on_off_lamb_1.0.csv"]


### Data augmentation: Noise experiments

In [265]:
# onoff 

list_files = ["/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.1_2.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.1_3.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.1_4.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.1_5.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.2_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.3_1.csv",
             "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.4_1.csv",
             "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.5_1.csv"]

list_files = ["/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_noise_mu_0_sig_0.001_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_noise_mu_0_sig_0.01_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.01_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.001_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0.1_sig_0.1_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_noise_mu_0_sig_0.1_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_noise_mu_0_sig_0.2_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_noise_mu_0_sig_0.3_1.csv",
             "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_noise_mu_0_sig_0.4_1.csv",
             "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_on_off_noise_mu_0_sig_0.5_1.csv"]

In [269]:
#tremor

list_files = ["/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_noise_mu_0_sig_0.1_1.csv",
"kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.1_2.csv",
    "kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.1_3.csv",
             "kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.1_4.csv",
"kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.1_5.csv",
"kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.2_1.csv",
"kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.3_1.csv"]


list_files = ["/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_noise_mu_0_sig_0.001_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_noise_mu_0_sig_0.01_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.01_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.001_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_noise_mu_0_sig_0.1_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_noise_mu_0_sig_0.2_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_noise_mu_0_sig_0.3_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_noise_mu_0_sig_0.4_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_noise_mu_0_sig_0.5_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0.1_sig_0.1_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.4_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_tremor_combhpfnoinact.noise_mu_0_sig_0.5_1.csv"]

In [267]:
#dyskinesia 

list_files = ["kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1_2.csv",
              "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1_3.csv",
             "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1_4.csv",
              "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1_5.csv",
              "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.2_1.csv",
              "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.3_1.csv"]

list_files = ["/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_noise_mu_0_sig_0.001_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_noise_mu_0_sig_0.01_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.01_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.001_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_noise_mu_0_sig_0.1_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_noise_mu_0_sig_0.2_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_noise_mu_0_sig_0.3_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_noise_mu_0_sig_0.4_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_noise_mu_0_sig_0.5_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.4_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.5_1.csv",
              "/xgb_noise_diffrent_sigma/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0.1_sig_0.1_1.csv"]

## Combination of data augmentation techniques 

In [76]:
# shrink, rotation double
# list_files = ["kfold_prediction_cis-pd_on_off_combhpfnoinact.three.csv"]
# list_files = ["kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.three.csv"]
list_files = ["kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.combinaison.csv",
            "/xgb_comb_rotate_noise/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.comb_4.csv"]
# list_files = ["kfold_prediction_cis-pd_on_off_combhpfnoinact.combinaison.csv",
#              "/xgb_comb_rotate_noise/kfold_prediction_cis-pd_on_off_combhpfnoinact.comb_4.csv"]
# list_files = ["kfold_prediction_cis-pd_tremor_combhpfnoinact.combinaison.csv",
#              "/xgb_comb_rotate_noise/kfold_prediction_cis-pd_tremor_combhpfnoinact.comb_4.csv"]


In [169]:
# test4.perpatient.csv : [(train_spk, train_y)]

list_files = ["/getpreds_perpatient/kfold_prediction_cis-pd.on_off_dev_folds_inverse_order.perpatient.csv",
              "/getpreds_perpatient/kfold_prediction_cis-pd.on_off_dev_folds.perpatient.csv",
              "/getpreds_perpatient/kfold_prediction_cis-pd.on_off_dev_folds_verif.perpatient.csv",
                "/getpreds_perpatient/kfold_prediction_cis-pd.on_off_test4.perpatient.csv",
             "/getpreds_perpatient/kfold_prediction_cis-pd.on_off_test_only_test_spk_y.perpatient.csv",
             "/getpreds_perpatient/kfold_prediction_cis-pd.on_off_test_both_train_test_spk_y.perpatient.csv",
             "/getpreds_perpatient/kfold_prediction_cis-pd_on_off_test_without_stop.csv"]

# list_files = ["/getpreds_perpatient/kfold_prediction_cis-pd.tremor_dev_folds_inverse_order.perpatient.csv",
#               "/getpreds_perpatient/kfold_prediction_cis-pd.tremor_dev_folds.perpatient.csv",
#               "/getpreds_perpatient/kfold_prediction_cis-pd.tremor_dev_folds_verif.perpatient.csv",
#                 "/getpreds_perpatient/kfold_prediction_cis-pd.tremor_test.perpatient.csv",
#              "/submission4_preds/kfold_prediction_tremor.csv",
#              "/getpreds_perpatient/kfold_prediction_cis-pd.tremor_test4.perpatient.csv",
#                 "/getpreds_perpatient/kfold_prediction_cis-pd.tremor_test_only_test_spk_y.perpatient.csv",
#              "/getpreds_perpatient/kfold_prediction_cis-pd.tremor_test_both_train_test_spk_y.perpatient.csv",
#              "/getpreds_perpatient/kfold_prediction_cis-pd_tremor_test_without_stop.csv"]

# list_files = ["/getpreds_perpatient/kfold_prediction_cis-pd.dyskinesia_dev_folds_inverse_order.perpatient.csv",
#               "/getpreds_perpatient/kfold_prediction_cis-pd.dyskinesia_dev_folds.perpatient.csv",
#               "/getpreds_perpatient/kfold_prediction_cis-pd.dyskinesia_dev_folds.perpatient.csv",
#               "/getpreds_perpatient/kfold_prediction_cis-pd.dyskinesia_test.perpatient.csv",
#              "/getpreds_perpatient/kfold_prediction_cis-pd.dyskinesia_test4.perpatient.csv",
#              "/getpreds_perpatient/kfold_prediction_cis-pd.dyskinesia_test_only_test_spk_y.perpatient.csv",
#              "/getpreds_perpatient/kfold_prediction_cis-pd.dyskinesia_test_both_train_test_spk_y.perpatient.csv",
#              "/getpreds_perpatient/kfold_prediction_cis-pd_dyskinesia_test_without_stop.csv"]

In [323]:
# list_files = ["/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.lamb_0.001.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.lamb_0.2.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.lamb_0.4.csv",
#               "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.lamb_0.5.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.lamb_0.8.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.lamb_1.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_lamb_0.001.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_lamb_0.2.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_lamb_0.4.csv",
#               "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_lamb_0.5.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_lamb_0.8.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_dyskinesia_lamb_1.csv"]



list_files = ["/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_combhpfnoinact.lamb_0.001.csv",
"/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_combhpfnoinact.lamb_0.2.csv",
"/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_combhpfnoinact.lamb_0.4.csv",
              "/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_combhpfnoinact.lamb_0.5.csv",
"/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_combhpfnoinact.lamb_0.8.csv",
"/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_combhpfnoinact.lamb_1.csv",
"/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_lamb_0.001.csv",
"/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_lamb_0.2.csv",
"/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_lamb_0.4.csv",
              "/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_lamb_0.5.csv",
"/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_lamb_0.8.csv",
"/xgb_linear_comb_random/kfold_prediction_cis-pd_on_off_lamb_1.csv"]

# list_files = ["/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_combhpfnoinact.lamb_0.001.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_combhpfnoinact.lamb_0.2.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_combhpfnoinact.lamb_0.4.csv",
#               "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_combhpfnoinact.lamb_0.5.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_combhpfnoinact.lamb_0.8.csv",
#               "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_combhpfnoinact.lamb_1.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_lamb_0.001.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_lamb_0.2.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_lamb_0.4.csv",
#               "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_lamb_0.5.csv",
# "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_lamb_0.8.csv",
#              "/xgb_linear_comb_random/kfold_prediction_cis-pd_tremor_lamb_1.csv"]

# Function to loop over individual files to get their final scores

In [3]:
# vPredictions = sPathGithub+'/tsfresh/submit/'+"/getpreds_perpatient/kfold_prediction_cis-pd.on_off_dev_folds_inverse_order.perpatient.csv"
# vPredictions = read csv 

def marie_average_fusion(lFilesPred, sDirOut, fileName, bRound, sFileLabels, sTypeLabel, bTestSubset=False):
    """
    Performs Fusion for CIS-PD Approach 1 (tsfresh + xgboost) and Approach 2 (SVR Per Patient)
    
    Keyword arguments:
    TODO
    """
    
    # Read file labels (true labels)
    if not bTestSubset:
        print("sFileLabels : ", sFileLabels)
        with open(sFileLabels, mode='r') as infile:
            reader = csv.reader(infile)
            dID = {rows[0]:rows[1] for rows in reader} #participant ID
        with open(sFileLabels, mode='r') as infile:
            reader = csv.reader(infile)
            dOnOff= {rows[0]:rows[2] for rows in reader} #on-off label
        with open(sFileLabels, mode='r') as infile:
            reader = csv.reader(infile)
            dDys={rows[0]:rows[3] for rows in reader} #dyskinesia label
        with open(sFileLabels, mode='r') as infile:
            reader = csv.reader(infile)
            dTrem={rows[0]:rows[4] for rows in reader} #tremor label

        #label selection
        if sTypeLabel=='on_off':
            dLabels=dOnOff
        elif sTypeLabel=='tremor':
            dLabels=dTrem
        elif sTypeLabel=='dyskinesia':
            dLabels=dDys
        else:
            print('sTypeLabel undefined')

    lID=[] # measurement_id 
    lDicts=[] 
    vLabels = [] # true label
    vParID=[] # participant ID
    vPred=[]

    with open(lFilesPred, mode='r') as infile:
        reader = csv.reader(infile)
        dPred = {rows[0]:rows[1] for rows in reader} #Prediction from the different classifiers (acoustic, w-embed)
    #creation of the matrix containing prediction from all classifiers
    bEnter=1
    for k in dPred: #first dictionary will be the lead
        if k!='measurement_id':
            lID.append(k) # measurement_id 
            vLabels.append(float(np.asarray(dLabels[k]))) #true labels
            vParID.append(float(np.asarray(dID[k]))) #participant ID
            vPred.append(float(np.asarray(dPred[k]))) #first predicted value
#             fPred=dPred.get(k)

    lID, vPrediction=zip(*sorted(zip(lID, vPred)))
    df = pd.DataFrame({'measurement_id': lID, sTypeLabel:vPrediction})
    df.to_csv(sDirOut+'submissionCisPD'+sTypeLabel+'.csv', index=False)
    print('Submission file was created: '+sDirOut+"submissionCisPD"+sTypeLabel+".csv")
    
    return vPred, vParID, vLabels

# get_final_score(vPredictions, vParID, vTrueLabels)

In [16]:
# list_files = ["/xgb_weitd_thing/kfold_prediction_cis-pd_on_off_weirdthing.csv",
#              "/xgb_weitd_thing/kfold_prediction_cis-pd_on_off_combhpfnoinact.weirdthing.csv"]

list_files = ["/xgb_weitd_thing/kfold_prediction_cis-pd_tremor_weirdthing.csv",
             "/xgb_weitd_thing/kfold_prediction_cis-pd_tremor_combhpfnoinact.weirdthing.csv",
             "/xgb_weitd_thing/kfold_prediction_cis-pd_tremor_weirdthing_add_avg.csv",
             "/xgb_weitd_thing/kfold_prediction_cis-pd_tremor_combhpfnoinact.weirdthing_add_avg.csv"]

list_files = ["/xgb_weitd_thing/kfold_prediction_cis-pd_dyskinesia_weirdthing.csv",
             "/xgb_weitd_thing/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.weirdthing.csv",
             "/xgb_weitd_thing/kfold_prediction_cis-pd_dyskinesia_weirdthing_add_avg.csv",
             "/xgb_weitd_thing/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.weirdthing_add_avg.csv"]

### Repeating Rotation Bounds 5 times to get Avg and Std Dev

In [44]:
# Get the files 1 to 5 

# kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_25_2.csv
# kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_25_3.csv
# kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_25_4.csv
# kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_25_5.csv
# kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_bound_25.csv

# find files with cis-pd_tremor_combhpfnoinact and rotate_bound_25 

# Get a list of all files starting with objs
fold_folder = "/tsfresh/submit/xgb_rotate_bound/"
lObjsFiles = {}
subChallenge="dyskinesia"
summary = ""
for bound in [5, 10, 15, 20, 25, 30, 35, 40, 45]:
    lObjsFiles[bound] = glob.glob(sPathGithub + fold_folder + "/kfold_*cis-pd_"+subChallenge+"_combhpfnoinact.rotate_bound_"+str(bound)+"*")
    
    df_results = get_results(lObjsFiles[bound], subChallenge, bFullPath=True)
    print("For bound " + str(bound))
    the_mean = df_results.mean()[0]
    print('Mean: '+ str(the_mean))
    the_std = df_results.std()[0]
    print('Std: '+ str(the_std))
    summary += str(the_mean) + "(" + str(the_std) + ") \n"
print(summary)



/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_5.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.48380007679199794
Overall MSE Classif. 1 - tsfresh:  0.48380007679199794

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_5_2.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4876557184876095
Overall MSE Classif. 1 - tsfresh:  0.4876557184876095

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/

Unnamed: 0,0,1
0,0.4838,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
1,0.487656,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
2,0.487136,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
3,0.485219,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
4,0.486844,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...


For bound 5
Mean: 0.486130832134808
Std: 0.00158963563706705
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_10.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.48519628190194364
Overall MSE Classif. 1 - tsfresh:  0.48519628190194364

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_10_2.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4876738559770477
Overall MSE Classif. 1 - tsfresh:  0.48767

Unnamed: 0,0,1
0,0.485196,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
1,0.487674,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
2,0.485319,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
3,0.484825,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
4,0.484267,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...


For bound 10
Mean: 0.4854563435563704
Std: 0.0013053116480835636
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_15.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.48365167158051614
Overall MSE Classif. 1 - tsfresh:  0.48365167158051614

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_15_2.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.48276670506562575
Overall MSE Classif. 1 - tsfresh:  0.

Unnamed: 0,0,1
0,0.483652,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
1,0.482767,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
2,0.486048,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
3,0.487626,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
4,0.483114,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...


For bound 15
Mean: 0.4846410516152012
Std: 0.0021041987124091927
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_20.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4841850106266582
Overall MSE Classif. 1 - tsfresh:  0.4841850106266582

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_20_2.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.48278404335490405
Overall MSE Classif. 1 - tsfresh:  0.48

Unnamed: 0,0,1
0,0.484185,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
1,0.482784,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
2,0.485626,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
3,0.486827,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
4,0.487157,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...


For bound 20
Mean: 0.4853156754877899
Std: 0.001834143828080135
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_25.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4868767251158731
Overall MSE Classif. 1 - tsfresh:  0.4868767251158731

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_25_2.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.48209358853210577
Overall MSE Classif. 1 - tsfresh:  0.482

Unnamed: 0,0,1
0,0.486877,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
1,0.482094,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
2,0.488511,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
3,0.483715,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
4,0.4887,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...


For bound 25
Mean: 0.4859791989341839
Std: 0.0029513154724161474
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_30.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.48312114572072173
Overall MSE Classif. 1 - tsfresh:  0.48312114572072173

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_30_2.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.48543029409707245
Overall MSE Classif. 1 - tsfresh:  0.

Unnamed: 0,0,1
0,0.483121,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
1,0.48543,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
2,0.483673,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
3,0.489401,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
4,0.485226,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...


For bound 30
Mean: 0.4853701262005595
Std: 0.00246011288955651
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_35.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.48670142190201743
Overall MSE Classif. 1 - tsfresh:  0.48670142190201743

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_35_2.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4866820390160808
Overall MSE Classif. 1 - tsfresh:  0.486

Unnamed: 0,0,1
0,0.486701,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
1,0.486682,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
2,0.484143,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
3,0.487274,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
4,0.486649,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...


For bound 35
Mean: 0.48628995398857294
Std: 0.001227775150515715
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_40.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4862233035229286
Overall MSE Classif. 1 - tsfresh:  0.4862233035229286

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_40_2.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4884984668529636
Overall MSE Classif. 1 - tsfresh:  0.488

Unnamed: 0,0,1
0,0.486223,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
1,0.488498,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
2,0.48216,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
3,0.487371,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
4,0.485944,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...


For bound 40
Mean: 0.48603946809780796
Std: 0.0023934713003350293
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_45.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4898151612129488
Overall MSE Classif. 1 - tsfresh:  0.4898151612129488

-------------
/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/submit/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_45_2.csv
sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4887209654534923
Overall MSE Classif. 1 - tsfresh:  0.48

Unnamed: 0,0,1
0,0.489815,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
1,0.488721,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
2,0.489088,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
3,0.486954,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...
4,0.490678,/home/mpgill/BeatPD/BeatPD-CLSP-JHU/tsfresh/su...


For bound 45
Mean: 0.4890511564147847
Std: 0.0013908938486587197
0.486130832134808(0.00158963563706705) 
0.4854563435563704(0.0013053116480835636) 
0.4846410516152012(0.0021041987124091927) 
0.4853156754877899(0.001834143828080135) 
0.4859791989341839(0.0029513154724161474) 
0.4853701262005595(0.00246011288955651) 
0.48628995398857294(0.001227775150515715) 
0.48603946809780796(0.0023934713003350293) 
0.4890511564147847(0.0013908938486587197) 



In [42]:
# tsfresh predictions file on test kfolds 
# sFilePred1=sPathGithub+'/tsfresh/submit/rfr_preds/kfold_prediction_cis-pd_on_off.csv'

def get_results(list_files, subChallenge, bFullPath=True):
    df_results = []

    for list_file in list_files:
    #     sFilePred1="/export/fs02/mpgill/null_model/on_off.csv"
        if bFullPath is True:
            sFilePred1 = list_file
        else:
            sFilePred1 = sPathGithub+'/tsfresh/submit/'+list_file
        print(list_file)
        # SVR predictions files on test kfolds 
        # sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

    #     lFilesPred=[sFilePred1,sFilePred1]
        lFilesPred = sFilePred1 

        # Path to labels on CIS-PD 
        sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

        # Folder where we want to save the csv file with the average results 
        dest_dir=sDirOut 

        # Name of the CSV file to be created with the average 
        fileName ="dyskinesia_teeerrrest"

        sTypeLabel = subChallenge

        # Flag to round to the nearest integer or not 
        # BUG, FIXME: It doesn't seem like this changes anything? 
        bRound=0


        # Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
        vRes1, vParID, vLabels = marie_average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)
#         vRes2, vRes3, vAverage, vParID, vLabels  = average_fusion([lFilesPred, lFilesPred], dest_dir, fileName, bRound, sFileLabels, sTypeLabel)


        score = get_final_score(np.asarray(vRes1), np.array(vParID).astype(int), vLabels)
        print('Overall MSE Classif. 1 - tsfresh: ', score)
        df_results.append([score, list_file])
    #     print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(np.asarray(vRes3), np.array(vParID).astype(int), vLabels))

        print()
        print('-------------')
    df_results = pd.DataFrame(df_results)
    display(df_results)
    
    return df_results

# Fusion i-vectors à 1.13 + tsfresh per patient tuning  

In [87]:
# TODO 
# dysk + gaussian noise a eu 0.4827 (rep 1)
dysk_gaussian = "kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1.csv"

svr_res ="/export/b19/mpgill/kaldi/egs/beatPDivec/dysk_orig_auto60_400fl_scratch/exp/ivec_650/resiVecSVR_Fold/preds_per_patient.csv"

# dysk + rotate noise 30 25 bound a eu 0.4831
dysk_bound_25 = "/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_25.csv"
dysk_bound_30 = "/xgb_rotate_bound/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.rotate_bound_30.csv"

sFilePred1=svr_res#sPathGithub+'/tsfresh/submit/submission4_preds/kfold_prediction_dyskinesia.csv'


# tsfresh predictions file on test kfolds 
sFilePred2=sPathGithub+'/tsfresh/submit/'+dysk_bound_30

# SVR predictions files on test kfolds 
# sFilePred2=sPathKaldi+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred2]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "dyskinesia"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))



sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
1188
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.5144468970875267
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  0.4831211457207217
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  0.4919636851676098
Overall MSE Fusion - average :  None


# Kruskal-Wallis H Test

In [328]:
def print_stat(stat, p):
    print('Statistics=%.3f, p=%.3f' % (stat, p))
    # interpret
    alpha = 0.05
    if p > alpha:
        print('Same distributions (fail to reject H0)')
    else:
        print('Different distributions (reject H0)')

In [351]:
# Kruskal-Wallis H-test
from numpy.random import seed
from numpy.random import randn
from scipy.stats import kruskal
from scipy.stats import wilcoxon
# seed the random number generator
seed(1)
# seed the random number generator

# generate three independent samples
# data1 = 5 * randn(100) + 50
# data2 = 5 * randn(100) + 50
# # data3 = 5 * randn(100) + 52

# print(data1)
# print(type(data1))

# PREDS
# tremor_combhpfnoinact_resample_09 = pd.read_csv(sPathGithub+'/tsfresh/submit/'+'kfold_prediction_cis-pd_tremor_combhpfnoinact_resample_0.9.csv')
# tremor_combhpfnoinact = pd.read_csv(sPathGithub+'/tsfresh/submit/'+'kfold_prediction_cis-pd_tremor_combhpfnoinact.csv')
# tremor_combhpfnoinact_rotate_1 = pd.read_csv(sPathGithub+'/tsfresh/submit/'+'kfold_prediction_cis-pd_tremor_combhpfnoinact_rotate_1.csv')

#MSEk 
tremor_combhpfnoinact_resample_09 = pd.read_csv(sPathGithub+'/tsfresh/submit/'+'/xgb_msek/msek_cis-pd_tremor_combhpfnoinact_resample_0.9.csv')
tremor_combhpfnoinact_resample_11 = pd.read_csv(sPathGithub+'/tsfresh/submit/'+'/xgb_msek/msek_cis-pd_tremor_combhpfnoinact_resample_1.1.csv')
tremor_combhpfnoinact = pd.read_csv(sPathGithub+'/tsfresh/submit/'+'/xgb_msek/msek_cis-pd_tremor_combhpfnoinact.csv')
tremor_combhpfnoinact_rotate_1 = pd.read_csv(sPathGithub+'/tsfresh/submit/'+'/xgb_msek/msek_cis-pd_tremor_combhpfnoinact_rotate_1.csv')



# compare samples
print('-------- tremor_combhpfnoinact.tremor, tremor_combhpfnoinact.resample11 ------')
stat, p = kruskal(tremor_combhpfnoinact, tremor_combhpfnoinact_resample_11)
print_stat(stat, p)
stat, p = wilcoxon(tremor_combhpfnoinact.values.squeeze(), tremor_combhpfnoinact_resample_11.values.squeeze())
print_stat(stat, p)


print('-------- tremor_combhpfnoinact_resample_09.tremor, tremor_combhpfnoinact.tremor ------')
stat, p = kruskal(tremor_combhpfnoinact_resample_09, tremor_combhpfnoinact)
print_stat(stat, p)
stat, p = wilcoxon(tremor_combhpfnoinact_resample_09.values.squeeze(), tremor_combhpfnoinact.values.squeeze())
print_stat(stat, p)

print('------ tremor_combhpfnoinact_resample_09.tremor, tremor_combhpfnoinact_rotate_1.tremor ---------')
stat, p = kruskal(tremor_combhpfnoinact_resample_09, tremor_combhpfnoinact_rotate_1)
print_stat(stat, p)
stat, p = wilcoxon(tremor_combhpfnoinact_resample_09.values.squeeze(), tremor_combhpfnoinact_rotate_1.values.squeeze())
print_stat(stat, p)


-------- tremor_combhpfnoinact.tremor, tremor_combhpfnoinact.resample11 ------
Statistics=0.013, p=0.908
Same distributions (fail to reject H0)
Statistics=16.000, p=0.071
Same distributions (fail to reject H0)
-------- tremor_combhpfnoinact_resample_09.tremor, tremor_combhpfnoinact.tremor ------
Statistics=0.013, p=0.908
Same distributions (fail to reject H0)
Statistics=38.000, p=0.937
Same distributions (fail to reject H0)
------ tremor_combhpfnoinact_resample_09.tremor, tremor_combhpfnoinact_rotate_1.tremor ---------
Statistics=0.003, p=0.954
Same distributions (fail to reject H0)
Statistics=37.000, p=0.875
Same distributions (fail to reject H0)


In [325]:
on_off_noise_rep2 = pd.read_csv(sPathGithub+'/tsfresh/submit/xgb_noise_diffrent_sigma/'+"msek_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.1_3.csv") #"kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.1_2.csv")
on_off_combhpfnoinact_noise_03 = pd.read_csv(sPathGithub+'/tsfresh/submit/xgb_noise_diffrent_sigma/'+"msek_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.3_1.csv")#"kfold_prediction_cis-pd_on_off_combhpfnoinact.noise_mu_0_sig_0.3_1.csv")

stat, p = kruskal(on_off_noise_rep2, on_off_combhpfnoinact_noise_03)
print_stat(stat, p)



NameError: name 'kruskal' is not defined

In [29]:
dyskinesia_combhpfnoinact = pd.read_csv(sPathGithub+'/tsfresh/submit/'+'kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.csv')
dyskinesia_combhpfnoinact_noise_mu_0_sig_01 = pd.read_csv(sPathGithub+'/tsfresh/submit/'+'kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1.csv')

print('------- dyskinesia_combhpfnoinact  dyskinesia_combhpfnoinact_noise_mu_0_sig_01 -------')
stat, p = kruskal(dyskinesia_combhpfnoinact.dyskinesia, dyskinesia_combhpfnoinact_noise_mu_0_sig_01.dyskinesia)
print_stat(stat, p)


------- dyskinesia_combhpfnoinact  dyskinesia_combhpfnoinact_noise_mu_0_sig_01 -------
Statistics=0.586, p=0.444
Same distributions (fail to reject H0)


# Trying to reproduce Fusion for Baseline 

In [91]:
sFilePred1=sPathGithub+'/tsfresh/submit/submission4_preds/kfold_prediction_on_off.csv'
sFilePred2="/export/b19/mpgill/kaldi/egs/beatPDivec/on_off_hpf_auto30/exp/ivec_600/resiVecPerPatientSVR_Fold_all/preds_per_patient.csv"


lFilesPred=[sFilePred1,sFilePred2]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "on_off"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
1767
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDon_off.csv
--- MSEscore ---
Final score :  1.1508101723596895
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  1.1406808039374068
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  1.1179727806077415
Overall MSE Fusion - average :  None


In [89]:
sFilePred1=sPathGithub+'/tsfresh/submit/submission4_preds/kfold_prediction_tremor.csv'
sFilePred2=sPathGithub+'/tsfresh/submit/xgb_comb_rotate_noise/kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_1_resample09.csv'

lFilesPred=[sFilePred1,sFilePred2]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "tremor"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
1462
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDtremor.csv
--- MSEscore ---
Final score :  0.45400906360229143
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  0.43757835974178083
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  0.442315955236196
Overall MSE Fusion - average :  None


## Fusion for dyskinesia with Nanxin Per Patient Tuning Results

In [94]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/submission4_preds/kfold_prediction_dyskinesia.csv'
# sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1.csv'
# sFilePred1=sPathGithub+'/tsfresh/submit/'+'kfold_prediction_cis-pd_dyskinesia_original.csv'

sPathKaldiLaure = "/export/c08/lmorove1/"
# SVR predictions files on test kfolds 
# sFilePred1 = sPathKaldiLaure+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'
sFilePred2=sPathKaldiLaure+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred2]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "dyskinesia"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4830357155225596
Overall MSE Classif. 1 - tsfresh:  None
--- MSEscore ---
Final score :  0.4810183429568722
Overall MSE Classif. 2 - ivec:  None
--- MSEscore ---
Final score :  0.4700143231441346
Overall MSE Fusion - average :  None


In [69]:
# tsfresh predictions file on test kfolds 
sFilePred1=sPathGithub+'/tsfresh/submit/submission4_preds/kfold_prediction_dyskinesia.csv'
sFilePred1=sPathGithub+'/tsfresh/submit/getpreds_perpatient/kfold_prediction_cis-pd.dyskinesia_dev_folds_thesis.perpatient.csv'
# sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1.csv'
# sFilePred1=sPathGithub+'/tsfresh/submit/'+'kfold_prediction_cis-pd_dyskinesia_original.csv'

sPathKaldiLaure = "/export/c08/lmorove1/"
# SVR predictions files on test kfolds 
# sFilePred1 = sPathKaldiLaure+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'
sFilePred2=sPathKaldiLaure+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'

lFilesPred=[sFilePred1,sFilePred2]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "dyskinesia"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDdyskinesia.csv
--- MSEscore ---
Final score :  0.4978139997592901
Overall MSE Classif. 1 - tsfresh:  0.4978139997592901
--- MSEscore ---
Final score :  0.48101834295687207
Overall MSE Classif. 2 - ivec:  0.48101834295687207
--- MSEscore ---
Final score :  0.4720796751517645
Overall MSE Fusion - average :  0.4720796751517645


## Fusion of tsfresh + i-vectors for tremor

In [68]:
# tsfresh predictions file on test kfolds 
# sFilePred1=sPathGithub+'/tsfresh/submit/submission4_preds/kfold_prediction_dyskinesia.csv'
# sFilePred1=sPathGithub+"/tsfresh/submit/xgb_comb_rotate_noise/kfold_prediction_cis-pd_tremor_combhpfnoinact.rotate_1_resample09.csv"
sFilePred1=sPathGithub+'/tsfresh/submit/getpreds_perpatient/kfold_prediction_cis-pd.tremor_dev_folds_thesis.perpatient.csv'
# sFilePred1=sPathGithub+'/tsfresh/submit/getpreds_perpatient/kfold_prediction_cis-pd.tremor_dev_folds_verif.perpatient.csv'
# kfold_prediction_cis-pd_tremor_combhpfnoinact.csv
# sFilePred1=sPathGithub+'/tsfresh/submit/kfold_prediction_cis-pd_dyskinesia_combhpfnoinact.noise_mu_0_sig_0.1.csv'
# sFilePred1=sPathGithub+'/tsfresh/submit/'+'kfold_prediction_cis-pd_dyskinesia_original.csv'

sPathKaldiLaure = "/export/c08/lmorove1/"
# SVR predictions files on test kfolds à
# sFilePred1 = sPathKaldiLaure+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'
# sFilePred2=sPathKaldiLaure+'/kaldi/egs/beatPDivec/dysk_noinact_auto30/exp/ivec_650/resiVecPerPatientSVR_Fold_all_goodparams/preds_per_patient.csv'
sFilePred2 = '/export/b19/mpgill/kaldi/egs/beatPDivec/trem_hpf_auto30/exp/ivec_700/resiVecSVR_Fold_all/preds_per_patient.csv'
lFilesPred=[sFilePred1,sFilePred2]

# Path to labels on CIS-PD 
sFileLabels=sPathData+'/cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv'

# Folder where we want to save the csv file with the average results 
dest_dir=sDirOut 

# Name of the CSV file to be created with the average 
fileName ="dyskinesia_teeerrrest"

sTypeLabel = "tremor"

# Flag to round to the nearest integer or not 
# BUG, FIXME: It doesn't seem like this changes anything? 
bRound=0


# Fusion with average of predictions get_final_score(vPredictions, vParID, vTrueLabels)
vRes1, vRes2, vAverage, vParID, vLabels = average_fusion(lFilesPred, dest_dir, fileName, bRound, sFileLabels, sTypeLabel)

print('Overall MSE Classif. 1 - tsfresh: ', get_final_score(vRes1, np.array(vParID).astype(int), vLabels))
print('Overall MSE Classif. 2 - ivec: ', get_final_score(vRes2, vParID, vLabels))
print('Overall MSE Fusion - average : ', get_final_score(vAverage, vParID, vLabels))

sFileLabels :  /home/sjoshi/codes/python/BeatPD/data/BeatPD//cis-pd.data_labels/CIS-PD_Training_Data_IDs_Labels.csv
Number of analyzed file inputs: 2
Submission file was created: /export/b19/mpgill/BeatPD_predictions_tryingout/submissionCisPDtremor.csv
--- MSEscore ---
Final score :  0.4374039804373796
Overall MSE Classif. 1 - tsfresh:  0.4374039804373796
--- MSEscore ---
Final score :  0.4538593286572797
Overall MSE Classif. 2 - ivec:  0.4538593286572797
--- MSEscore ---
Final score :  0.4299434990435165
Overall MSE Fusion - average :  0.4299434990435165


In [46]:
def average_fusion(lFilesPred, sDirOut, fileName, bRound, sFileLabels, sTypeLabel, bTestSubset=False):
    """
    Performs Fusion for CIS-PD Approach 1 (tsfresh + xgboost) and Approach 2 (SVR Per Patient)
    
    Keyword arguments:
    TODO
    """
    
    # Read file labels (true labels)
    if not bTestSubset:
        print("sFileLabels : ", sFileLabels)
        with open(sFileLabels, mode='r') as infile:
            reader = csv.reader(infile)
            dID = {rows[0]:rows[1] for rows in reader} #participant ID
        with open(sFileLabels, mode='r') as infile:
            reader = csv.reader(infile)
            dOnOff= {rows[0]:rows[2] for rows in reader} #on-off label
        with open(sFileLabels, mode='r') as infile:
            reader = csv.reader(infile)
            dDys={rows[0]:rows[3] for rows in reader} #dyskinesia label
        with open(sFileLabels, mode='r') as infile:
            reader = csv.reader(infile)
            dTrem={rows[0]:rows[4] for rows in reader} #tremor label

        #label selection
        if sTypeLabel=='on_off':
            dLabels=dOnOff
        elif sTypeLabel=='tremor':
            dLabels=dTrem
        elif sTypeLabel=='dyskinesia':
            dLabels=dDys
        else:
            print('sTypeLabel undefined')
        
    iNumFiles=len(lFilesPred)
    print('Number of analyzed file inputs: '+ str(iNumFiles))
    vPredIter=np.zeros((1,iNumFiles))
    mPredictions=np.zeros((1,iNumFiles))
    lID=[] # measurement_id 
    lDicts=[] 
    vLabels = [] # true label
    vParID=[] # participant ID

    for sFilePred in lFilesPred:
        with open(sFilePred, mode='r') as infile:
            reader = csv.reader(infile)
            dPred = {rows[0]:rows[1] for rows in reader} #Prediction from the different classifiers (acoustic, w-embed)
        lDicts.append(dPred)
    #creation of the matrix containing prediction from all classifiers
    bEnter=1
    for k in lDicts[0]: #first dictionary will be the lead
        if k!='measurement_id':
            lID.append(k) # measurement_id 
            if not bTestSubset:
                vLabels.append(float(np.asarray(dLabels[k]))) #true labels
                vParID.append(float(np.asarray(dID[k]))) #participant ID
            vPredIter[0,0]=float(np.asarray(lDicts[0][k])) #first predicted value
            # Go through the second and next predictions files
            for j in range(1, iNumFiles):
                # fPred will contain the prediction of the predictions file we're going through
                fPred=lDicts[j].get(k)
                if fPred: # if we found a prediction
                    # we add it in the array
                    vPredIter[0,j]=float(np.asarray(fPred))
                else:
                    print(['Unknown key:' + k])
                    vPredIter[0,j]=float(np.asarray(lDicts[0][k]))
            if bEnter==1:
                # if it's the first prediction file, we initialize it
                mPredictions=vPredIter.copy() # mPredictions initialization.
                bEnter=0
            else:
                # We add the new predictions as a new column if it's not the first file we're going through
                mPredictions=np.append(mPredictions,vPredIter,axis=0)

    vAverage=np.mean(mPredictions,axis=1)
    if bRound==1:
        vPrediction=np.round(vAverage)
    else:
        vPrediction=vAverage
    
    vRes1=mPredictions[:,[0]]
    vRes2=mPredictions[:,[1]]

    lID, vPrediction=zip(*sorted(zip(lID, vPrediction)))
    df = pd.DataFrame({'measurement_id': lID, sTypeLabel:vPrediction})
    df.to_csv(sDirOut+'submissionCisPD'+sTypeLabel+'.csv', index=False)
    print('Submission file was created: '+sDirOut+"submissionCisPD"+sTypeLabel+".csv")
    
    return vRes1, vRes2, vAverage, vParID, vLabels

In [3]:
import ipdb