In [1]:
import pandas as pd 
import pickle 
from sklearn.metrics import confusion_matrix, precision_recall_curve, average_precision_score
from sklearn.metrics import auc, roc_auc_score, roc_curve, f1_score, accuracy_score, recall_score, precision_score, brier_score_loss
import numpy as np

In [2]:
test_datasets = [
    "test_df_global_to_save",
    'test_df_validation_1_to_save',
    'test_df_validation_2_to_save',
]
seed = 289

In [3]:
raw_results_dir = '../fusion_model/'

df_test_datasets = []
for i in range(len(test_datasets)):
    with open(f'{raw_results_dir}{test_datasets[i]}_{seed}.pkl', 'rb') as pkl:
        f = pickle.load(pkl)
        all_labels = f['all_labels']
        all_final_predictions = f['all_final_predictions']
        all_pred_scores = f['all_pred_scores']
        uncertain_indices = f['uncertain_indices']
        all_final_std = f['all_final_std']
        
        all_pred_finger = all_pred_scores[0]
        all_pred_speech = all_pred_scores[1]
        all_pred_smile = all_pred_scores[2]
        
        print(len(all_labels), len(all_final_predictions), len(uncertain_indices), len(all_pred_finger))

    test_data = pd.read_csv(f'../../data/{test_datasets[i]}.csv')
    test_data['true_label'] = all_labels
    test_data['pred_score_finger'] = all_pred_finger
    test_data['pred_score_speech'] = all_pred_speech
    test_data['pred_score_smile'] = all_pred_smile
    test_data['pred_score_fusion'] = all_final_predictions
    test_data['uncertain_indices'] = uncertain_indices
    test_data['test_split'] = test_datasets[i][8:-8]
    test_data['pred_std_fusion'] = all_final_std

    
    
    if test_datasets[i] == 'test_df_validation_1_to_save':
        test_data = test_data[~test_data['row_id'].isin(['vdbmsErHFueM305Ch97LzcQOvvt2#2024-05-31'])]
        
    if test_datasets[i] == 'test_df_validation_2_to_save':
        test_data = test_data[~test_data['row_id'].isin(['vdbmsErHFueM305Ch97LzcQOvvt2#2023-04-26', 'vdbmsErHFueM305Ch97LzcQOvvt2#2023-05-01'])]


    df_test_datasets.append(test_data)

test_data = pd.concat(df_test_datasets)
test_data


162 162 162 162
92 92 92 92
69 69 69 69


Unnamed: 0,row_id,id,label,true_label,pred_score_finger,pred_score_speech,pred_score_smile,pred_score_fusion,uncertain_indices,test_split,pred_std_fusion
0,NIHNT179KNNF4#2022-03-24,NIHNT179KNNF4,1.0,1.0,0.613548,0.568245,0.885688,0.988402,False,global,0.001296
1,NIHNT179KNNF4#2023-06-30,NIHNT179KNNF4,1.0,1.0,0.472465,0.779543,0.280119,0.665972,False,global,0.032354
2,NIHNT823CHAC3#2022-05-20,NIHNT823CHAC3,0.0,0.0,0.671538,0.671377,0.905939,0.976564,False,global,0.003919
3,NIHNT823CHAC3#2021-05-07,NIHNT823CHAC3,0.0,0.0,0.710134,0.303872,0.577415,0.540549,False,global,0.023983
4,NIHNT823CHAC3#2021-11-01,NIHNT823CHAC3,0.0,0.0,0.217497,0.467424,0.863133,0.112835,False,global,0.013366
...,...,...,...,...,...,...,...,...,...,...,...
62,xSKwE2YgQFaXtUTTjbOyFhx3oCm2#2024-04-12,xSKwE2YgQFaXtUTTjbOyFhx3oCm2,1.0,1.0,0.831928,0.483484,0.604220,0.889474,False,validation_2,0.018489
63,xtsnpDyq8BYQUtRaZjSjSCTGVoe2#2024-12-02,xtsnpDyq8BYQUtRaZjSjSCTGVoe2,0.0,0.0,0.424025,0.074897,0.347749,0.273126,False,validation_2,0.037375
64,yWXLyTJ9MwUskybFC1AqHA1yoN73#2024-12-02,yWXLyTJ9MwUskybFC1AqHA1yoN73,0.0,0.0,0.449658,0.151100,0.311693,0.302878,False,validation_2,0.043463
65,yxycFfgyJjMYeRRaw9ENTsXeE4H2#2024-12-02,yxycFfgyJjMYeRRaw9ENTsXeE4H2,0.0,0.0,0.352672,0.127602,0.127513,0.023653,False,validation_2,0.011292


In [4]:
def safe_divide(numerator, denominator):
    if denominator == 0:
        return 0
    else:
        return numerator / denominator

In [5]:
def compute_metrics(y_true, y_pred_scores, threshold = 0.5):
    labels = np.asarray(y_true).reshape(-1)
    pred_scores = np.asarray(y_pred_scores).reshape(-1)
    preds = (pred_scores >= threshold)


    metrics = {}
    metrics['accuracy'] = accuracy_score(labels, preds)
    metrics['average_precision'] = average_precision_score(labels, pred_scores)
    metrics['auroc'] = roc_auc_score(labels, pred_scores)
    metrics['f1_score'] = f1_score(labels, preds)
    
    tn, fp, fn, tp = confusion_matrix(labels, preds).ravel()
    metrics["confusion_matrix"] = {"tn":tn, "fp":fp, "fn":fn, "tp":tp}
    metrics["weighted_accuracy"] = (safe_divide(tp, tp + fp) + safe_divide(tn, tn + fn)) / 2.0

    '''
    True positive rate or recall or sensitivity: probability of identifying a positive case 
    (often called the power of a test)
    '''
    metrics['TPR'] = metrics['recall'] = metrics['sensitivity'] = recall_score(labels, preds)
    
    '''
    False positive rate: probability of falsely identifying someone as positive, who is actually negative
    '''
    metrics['FPR'] = safe_divide(fp, fp+tn)
    
    '''
    Positive Predictive Value: probability that a patient with a positive test result 
    actually has the disease
    '''
    metrics['PPV'] = metrics['precision'] = precision_score(labels, preds)
    
    '''
    Negative predictive value: probability that a patient with a negative test result 
    actually does not have the disease
    '''
    metrics['NPV'] = safe_divide(tn, tn+fn)
    
    '''
    True negative rate or specificity: probability of a negative test result, 
    conditioned on the individual truly being negative
    '''
    metrics['TNR'] = metrics['specificity'] = safe_divide(tn,(tn+fp))

    
    return metrics

In [6]:
compute_metrics(test_data['true_label'], test_data['pred_score_fusion'], threshold = 0.5)

{'accuracy': 0.803125,
 'average_precision': 0.8682044700620333,
 'auroc': 0.8699507389162561,
 'f1_score': 0.8264462809917356,
 'confusion_matrix': {'tn': 107, 'fp': 38, 'fn': 25, 'tp': 150},
 'weighted_accuracy': 0.8042392005157962,
 'TPR': 0.8571428571428571,
 'recall': 0.8571428571428571,
 'sensitivity': 0.8571428571428571,
 'FPR': 0.2620689655172414,
 'PPV': 0.7978723404255319,
 'precision': 0.7978723404255319,
 'NPV': 0.8106060606060606,
 'TNR': 0.7379310344827587,
 'specificity': 0.7379310344827587}

In [7]:
test_data_certain = test_data[~test_data['uncertain_indices']]
test_data_certain
compute_metrics(test_data_certain['true_label'], test_data_certain['pred_score_fusion'], threshold = 0.5)

{'accuracy': 0.8083067092651757,
 'average_precision': 0.8703813965186898,
 'auroc': 0.8740436034553681,
 'f1_score': 0.8305084745762712,
 'confusion_matrix': {'tn': 106, 'fp': 37, 'fn': 23, 'tp': 147},
 'weighted_accuracy': 0.810309234917425,
 'TPR': 0.8647058823529412,
 'recall': 0.8647058823529412,
 'sensitivity': 0.8647058823529412,
 'FPR': 0.25874125874125875,
 'PPV': 0.7989130434782609,
 'precision': 0.7989130434782609,
 'NPV': 0.8217054263565892,
 'TNR': 0.7412587412587412,
 'specificity': 0.7412587412587412}

In [8]:
test_data_certain_global = test_data_certain[test_data_certain['test_split'] == 'global']
compute_metrics(test_data_certain_global['true_label'], test_data_certain_global['pred_score_fusion'], threshold = 0.5)

{'accuracy': 0.80625,
 'average_precision': 0.8757657899381531,
 'auroc': 0.8751417004048583,
 'f1_score': 0.841025641025641,
 'confusion_matrix': {'tn': 47, 'fp': 18, 'fn': 13, 'tp': 82},
 'weighted_accuracy': 0.8016666666666666,
 'TPR': 0.8631578947368421,
 'recall': 0.8631578947368421,
 'sensitivity': 0.8631578947368421,
 'FPR': 0.27692307692307694,
 'PPV': 0.82,
 'precision': 0.82,
 'NPV': 0.7833333333333333,
 'TNR': 0.7230769230769231,
 'specificity': 0.7230769230769231}

In [9]:
test_data_certain_val_1 = test_data_certain[test_data_certain['test_split'] == 'validation_1']
compute_metrics(test_data_certain_val_1['true_label'], test_data_certain_val_1['pred_score_fusion'], threshold = 0.5)

{'accuracy': 0.8021978021978022,
 'average_precision': 0.8760579460263827,
 'auroc': 0.8717201166180758,
 'f1_score': 0.8235294117647058,
 'confusion_matrix': {'tn': 31, 'fp': 11, 'fn': 7, 'tp': 42},
 'weighted_accuracy': 0.8041211519364448,
 'TPR': 0.8571428571428571,
 'recall': 0.8571428571428571,
 'sensitivity': 0.8571428571428571,
 'FPR': 0.2619047619047619,
 'PPV': 0.7924528301886793,
 'precision': 0.7924528301886793,
 'NPV': 0.8157894736842105,
 'TNR': 0.7380952380952381,
 'specificity': 0.7380952380952381}

In [10]:
test_data_certain_val_2 = test_data_certain[test_data_certain['test_split'] == 'validation_2']
compute_metrics(test_data_certain_val_2['true_label'], test_data_certain_val_2['pred_score_fusion'], threshold = 0.5)

{'accuracy': 0.8225806451612904,
 'average_precision': 0.803277236328533,
 'auroc': 0.8589743589743589,
 'f1_score': 0.8070175438596491,
 'confusion_matrix': {'tn': 28, 'fp': 8, 'fn': 3, 'tp': 23},
 'weighted_accuracy': 0.8225806451612903,
 'TPR': 0.8846153846153846,
 'recall': 0.8846153846153846,
 'sensitivity': 0.8846153846153846,
 'FPR': 0.2222222222222222,
 'PPV': 0.7419354838709677,
 'precision': 0.7419354838709677,
 'NPV': 0.9032258064516129,
 'TNR': 0.7777777777777778,
 'specificity': 0.7777777777777778}

In [11]:
metadata = pd.read_csv('../../data/metadata_updated.csv')
# if Protocol is ParkTest, then the Participant ID is the Participant ID[20:]
def get_participant_id(row):
    if row['Protocol'] == 'ParkTest':
        return row['Participant_ID'][20:]
    else:
        return row['Participant_ID']

metadata['Participant_ID'] = metadata.apply(get_participant_id, axis=1)
metadata[metadata['Protocol'] == 'ParkTest'].head()

Unnamed: 0,Protocol,Participant_ID,Task,gender,age,race,pd,Diagnosis,gender_normalized,age_normalized,age_numeric,age_processed,race_normalized,env
543,ParkTest,530Z49,ahhhh,female,76.0,white,yes,1,Female,60 - 79,76.0,76.0,White,Home-Global
544,ParkTest,004Z53,ahhhh,male,76.0,white,no,0,Male,60 - 79,76.0,76.0,White,Home-Global
545,ParkTest,948Z87,ahhhh,female,74.0,white,yes,1,Female,60 - 79,74.0,74.0,White,Home-Global
546,ParkTest,872Z33,ahhhh,female,73.0,white,yes,1,Female,60 - 79,73.0,73.0,White,Home-Global
547,ParkTest,280Z18,ahhhh,female,61.0,white,yes,1,Female,60 - 79,61.0,61.0,White,Home-Global


In [12]:
# Merge test_data with selected columns from metadata on participant ID
test_data_w_metadata = pd.merge(
    test_data,
    metadata[[
        'Participant_ID',
        'Protocol',
        'gender_normalized',
        'age_processed',
        'age_normalized',
        'race_normalized',
        'Diagnosis'
    ]],
    left_on='id',
    right_on='Participant_ID',
    how='left'  # Use 'left' to preserve all test_data rows even if metadata is missing
)

In [13]:
test_data_w_metadata

Unnamed: 0,row_id,id,label,true_label,pred_score_finger,pred_score_speech,pred_score_smile,pred_score_fusion,uncertain_indices,test_split,pred_std_fusion,Participant_ID,Protocol,gender_normalized,age_processed,age_normalized,race_normalized,Diagnosis
0,NIHNT179KNNF4#2022-03-24,NIHNT179KNNF4,1.0,1.0,0.613548,0.568245,0.885688,0.988402,False,global,0.001296,NIHNT179KNNF4,SuperPD,Female,70.0,60 - 79,White,1
1,NIHNT179KNNF4#2023-06-30,NIHNT179KNNF4,1.0,1.0,0.472465,0.779543,0.280119,0.665972,False,global,0.032354,NIHNT179KNNF4,SuperPD,Female,70.0,60 - 79,White,1
2,NIHNT823CHAC3#2022-05-20,NIHNT823CHAC3,0.0,0.0,0.671538,0.671377,0.905939,0.976564,False,global,0.003919,NIHNT823CHAC3,SuperPD,Female,62.0,60 - 79,Black or African American,0
3,NIHNT823CHAC3#2021-05-07,NIHNT823CHAC3,0.0,0.0,0.710134,0.303872,0.577415,0.540549,False,global,0.023983,NIHNT823CHAC3,SuperPD,Female,62.0,60 - 79,Black or African American,0
4,NIHNT823CHAC3#2021-11-01,NIHNT823CHAC3,0.0,0.0,0.217497,0.467424,0.863133,0.112835,False,global,0.013366,NIHNT823CHAC3,SuperPD,Female,62.0,60 - 79,Black or African American,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,xSKwE2YgQFaXtUTTjbOyFhx3oCm2#2024-04-12,xSKwE2YgQFaXtUTTjbOyFhx3oCm2,1.0,1.0,0.831928,0.483484,0.604220,0.889474,False,validation_2,0.018489,xSKwE2YgQFaXtUTTjbOyFhx3oCm2,ValidationStudy,Male,72.0,60 - 79,White,1
316,xtsnpDyq8BYQUtRaZjSjSCTGVoe2#2024-12-02,xtsnpDyq8BYQUtRaZjSjSCTGVoe2,0.0,0.0,0.424025,0.074897,0.347749,0.273126,False,validation_2,0.037375,xtsnpDyq8BYQUtRaZjSjSCTGVoe2,ValidationStudy,Male,51.0,40 - 59,White,0
317,yWXLyTJ9MwUskybFC1AqHA1yoN73#2024-12-02,yWXLyTJ9MwUskybFC1AqHA1yoN73,0.0,0.0,0.449658,0.151100,0.311693,0.302878,False,validation_2,0.043463,yWXLyTJ9MwUskybFC1AqHA1yoN73,ValidationStudy,Male,61.0,60 - 79,Others,0
318,yxycFfgyJjMYeRRaw9ENTsXeE4H2#2024-12-02,yxycFfgyJjMYeRRaw9ENTsXeE4H2,0.0,0.0,0.352672,0.127602,0.127513,0.023653,False,validation_2,0.011292,yxycFfgyJjMYeRRaw9ENTsXeE4H2,ValidationStudy,Female,59.0,40 - 59,White,0


In [14]:
test_data_w_metadata.to_csv('../../data/test_data_w_metadata.csv', index=False)

In [15]:
df = pd.read_csv('../../data/test_data_w_metadata.csv')
df.head()

Unnamed: 0,row_id,id,label,true_label,pred_score_finger,pred_score_speech,pred_score_smile,pred_score_fusion,uncertain_indices,test_split,pred_std_fusion,Participant_ID,Protocol,gender_normalized,age_processed,age_normalized,race_normalized,Diagnosis
0,NIHNT179KNNF4#2022-03-24,NIHNT179KNNF4,1.0,1.0,0.613548,0.568245,0.885688,0.988402,False,global,0.001296,NIHNT179KNNF4,SuperPD,Female,70.0,60 - 79,White,1
1,NIHNT179KNNF4#2023-06-30,NIHNT179KNNF4,1.0,1.0,0.472465,0.779543,0.280119,0.665972,False,global,0.032354,NIHNT179KNNF4,SuperPD,Female,70.0,60 - 79,White,1
2,NIHNT823CHAC3#2022-05-20,NIHNT823CHAC3,0.0,0.0,0.671538,0.671377,0.905939,0.976564,False,global,0.003919,NIHNT823CHAC3,SuperPD,Female,62.0,60 - 79,Black or African American,0
3,NIHNT823CHAC3#2021-05-07,NIHNT823CHAC3,0.0,0.0,0.710134,0.303872,0.577415,0.540549,False,global,0.023983,NIHNT823CHAC3,SuperPD,Female,62.0,60 - 79,Black or African American,0
4,NIHNT823CHAC3#2021-11-01,NIHNT823CHAC3,0.0,0.0,0.217497,0.467424,0.863133,0.112835,False,global,0.013366,NIHNT823CHAC3,SuperPD,Female,62.0,60 - 79,Black or African American,0


In [16]:
# split the row_id column using # and set the 2nd half as the date
df['date'] = df['row_id'].str.split('#').str[1]
# convert the date column to datetime
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,row_id,id,label,true_label,pred_score_finger,pred_score_speech,pred_score_smile,pred_score_fusion,uncertain_indices,test_split,pred_std_fusion,Participant_ID,Protocol,gender_normalized,age_processed,age_normalized,race_normalized,Diagnosis,date
0,NIHNT179KNNF4#2022-03-24,NIHNT179KNNF4,1.0,1.0,0.613548,0.568245,0.885688,0.988402,False,global,0.001296,NIHNT179KNNF4,SuperPD,Female,70.0,60 - 79,White,1,2022-03-24
1,NIHNT179KNNF4#2023-06-30,NIHNT179KNNF4,1.0,1.0,0.472465,0.779543,0.280119,0.665972,False,global,0.032354,NIHNT179KNNF4,SuperPD,Female,70.0,60 - 79,White,1,2023-06-30
2,NIHNT823CHAC3#2022-05-20,NIHNT823CHAC3,0.0,0.0,0.671538,0.671377,0.905939,0.976564,False,global,0.003919,NIHNT823CHAC3,SuperPD,Female,62.0,60 - 79,Black or African American,0,2022-05-20
3,NIHNT823CHAC3#2021-05-07,NIHNT823CHAC3,0.0,0.0,0.710134,0.303872,0.577415,0.540549,False,global,0.023983,NIHNT823CHAC3,SuperPD,Female,62.0,60 - 79,Black or African American,0,2021-05-07
4,NIHNT823CHAC3#2021-11-01,NIHNT823CHAC3,0.0,0.0,0.217497,0.467424,0.863133,0.112835,False,global,0.013366,NIHNT823CHAC3,SuperPD,Female,62.0,60 - 79,Black or African American,0,2021-11-01


In [17]:
metadata_facial = pd.read_csv('../../data/metadata_facial.csv')
metadata_speech = pd.read_csv('../../data/metadata_speech.csv')
metadata_finger_tapping = pd.read_csv('../../data/metadata_finger_tapping.csv')
metadata_finger_tapping_left = pd.read_csv('../../data/metadata_finger_tapping_left.csv')
metadata_finger_tapping_right = pd.read_csv('../../data/metadata_finger_tapping_right.csv',)

In [18]:
test_data_w_metadata = df.copy()

# Ensure date is formatted consistently to match the filename pattern
test_data_w_metadata['date_str'] = pd.to_datetime(test_data_w_metadata['date']).dt.strftime('%Y-%m-%d')

def find_filename_by_substring(row, metadata_df):
    id_str = str(row['id'])
    date_str = row['date_str']
    
    matched = metadata_df[
        metadata_df['Filename'].str.contains(id_str) & 
        metadata_df['Filename'].str.contains(date_str)
    ]
    
    return matched['Filename'].values[0] if not matched.empty else None

# Assign filenames from each metadata group using substring search
test_data_w_metadata['facial_filename'] = test_data_w_metadata.apply(lambda row: find_filename_by_substring(row, metadata_facial), axis=1)
test_data_w_metadata['speech_filename'] = test_data_w_metadata.apply(lambda row: find_filename_by_substring(row, metadata_speech), axis=1)
test_data_w_metadata['finger_tapping_filename'] = test_data_w_metadata.apply(lambda row: find_filename_by_substring(row, metadata_finger_tapping), axis=1)
test_data_w_metadata['finger_tapping_left_filename'] = test_data_w_metadata.apply(lambda row: find_filename_by_substring(row, metadata_finger_tapping_left), axis=1)
test_data_w_metadata['finger_tapping_right_filename'] = test_data_w_metadata.apply(lambda row: find_filename_by_substring(row, metadata_finger_tapping_right), axis=1)

# Drop helper column if not needed
test_data_w_metadata.drop(columns='date_str', inplace=True)


In [19]:
test_data_w_metadata.head()

Unnamed: 0,row_id,id,label,true_label,pred_score_finger,pred_score_speech,pred_score_smile,pred_score_fusion,uncertain_indices,test_split,...,age_processed,age_normalized,race_normalized,Diagnosis,date,facial_filename,speech_filename,finger_tapping_filename,finger_tapping_left_filename,finger_tapping_right_filename
0,NIHNT179KNNF4#2022-03-24,NIHNT179KNNF4,1.0,1.0,0.613548,0.568245,0.885688,0.988402,False,global,...,70.0,60 - 79,White,1,2022-03-24,2022-03-24T13%3A32%3A36.977Z_NIHNT179KNNF4_smi...,2022-03-24T13%3A32%3A36.977Z_NIHNT179KNNF4_qui...,,2022-03-24T13%3A32%3A36.977Z_NIHNT179KNNF4_fin...,2022-03-24T13%3A32%3A36.977Z_NIHNT179KNNF4_fin...
1,NIHNT179KNNF4#2023-06-30,NIHNT179KNNF4,1.0,1.0,0.472465,0.779543,0.280119,0.665972,False,global,...,70.0,60 - 79,White,1,2023-06-30,2023-06-30T15%3A13%3A51.098Z_NIHNT179KNNF4_smi...,2023-06-30T15%3A13%3A51.098Z_NIHNT179KNNF4_qui...,,2023-06-30T15%3A13%3A51.098Z_NIHNT179KNNF4_fin...,2023-06-30T15%3A13%3A51.098Z_NIHNT179KNNF4_fin...
2,NIHNT823CHAC3#2022-05-20,NIHNT823CHAC3,0.0,0.0,0.671538,0.671377,0.905939,0.976564,False,global,...,62.0,60 - 79,Black or African American,0,2022-05-20,2022-05-20T19%3A15%3A10.454Z_NIHNT823CHAC3_smi...,2022-05-20T19%3A15%3A10.454Z_NIHNT823CHAC3_qui...,,2022-05-20T19%3A15%3A10.454Z_NIHNT823CHAC3_fin...,2022-05-20T19%3A15%3A10.454Z_NIHNT823CHAC3_fin...
3,NIHNT823CHAC3#2021-05-07,NIHNT823CHAC3,0.0,0.0,0.710134,0.303872,0.577415,0.540549,False,global,...,62.0,60 - 79,Black or African American,0,2021-05-07,NIHNT823CHAC3-smile-2021-05-07T21-05-27-387Z-.mp4,NIHNT823CHAC3-quick_brown_fox-2021-05-07T20-59...,NIHNT823CHAC3-finger_tapping-2021-05-07T21-00-...,,
4,NIHNT823CHAC3#2021-11-01,NIHNT823CHAC3,0.0,0.0,0.217497,0.467424,0.863133,0.112835,False,global,...,62.0,60 - 79,Black or African American,0,2021-11-01,NIHNT823CHAC3-smile-2021-11-01T19-07-54-512Z-.mp4,NIHNT823CHAC3-quick_brown_fox-2021-11-01T19-01...,NIHNT823CHAC3-finger_tapping-2021-11-01T19-01-...,,


In [20]:
# Ensure predictions are interpreted as binary by thresholding at 0.5
test_data_w_metadata['misclassified_ufnet'] = test_data_w_metadata['true_label'] != (test_data_w_metadata['pred_score_fusion'] >= 0.5).astype(int)
test_data_w_metadata['misclassified_facial'] = test_data_w_metadata['true_label'] != (test_data_w_metadata['pred_score_smile'] >= 0.5).astype(int)
test_data_w_metadata['misclassified_speech'] = test_data_w_metadata['true_label'] != (test_data_w_metadata['pred_score_speech'] >= 0.5).astype(int)
test_data_w_metadata['misclassified_finger'] = test_data_w_metadata['true_label'] != (test_data_w_metadata['pred_score_finger'] >= 0.5).astype(int)


In [21]:
labeling_neurologist = pd.read_csv('../../data/labeling_neurologists_PARK.csv')
labeling_neurologist.columns               

Index(['row_id', 'Unnamed: 1', 'PARK labels', 'Ray Labels',
       'Ray's correctness', 'Ruth Lables', 'Ruth's correctness',
       'Jamie Lables', 'Jamie's correctness', 'updated_at', 'PARK Labels',
       'True Labels', 'Ray Label', 'Ruth Label', 'Jamie Label'],
      dtype='object')

In [22]:
test_data_w_metadata_neurologist = pd.merge(
    test_data_w_metadata,
    labeling_neurologist[['row_id', 'Ray Label', 'Ruth Label', 'Jamie Label']],
    on='row_id',
    how='left'
)
test_data_w_metadata_neurologist.head()

Unnamed: 0,row_id,id,label,true_label,pred_score_finger,pred_score_speech,pred_score_smile,pred_score_fusion,uncertain_indices,test_split,...,finger_tapping_filename,finger_tapping_left_filename,finger_tapping_right_filename,misclassified_ufnet,misclassified_facial,misclassified_speech,misclassified_finger,Ray Label,Ruth Label,Jamie Label
0,NIHNT179KNNF4#2022-03-24,NIHNT179KNNF4,1.0,1.0,0.613548,0.568245,0.885688,0.988402,False,global,...,,2022-03-24T13%3A32%3A36.977Z_NIHNT179KNNF4_fin...,2022-03-24T13%3A32%3A36.977Z_NIHNT179KNNF4_fin...,False,False,False,False,1.0,0.0,0.0
1,NIHNT179KNNF4#2023-06-30,NIHNT179KNNF4,1.0,1.0,0.472465,0.779543,0.280119,0.665972,False,global,...,,2023-06-30T15%3A13%3A51.098Z_NIHNT179KNNF4_fin...,2023-06-30T15%3A13%3A51.098Z_NIHNT179KNNF4_fin...,False,True,False,True,1.0,1.0,0.0
2,NIHNT823CHAC3#2022-05-20,NIHNT823CHAC3,0.0,0.0,0.671538,0.671377,0.905939,0.976564,False,global,...,,2022-05-20T19%3A15%3A10.454Z_NIHNT823CHAC3_fin...,2022-05-20T19%3A15%3A10.454Z_NIHNT823CHAC3_fin...,True,True,True,True,0.0,0.0,0.0
3,NIHNT823CHAC3#2021-05-07,NIHNT823CHAC3,0.0,0.0,0.710134,0.303872,0.577415,0.540549,False,global,...,NIHNT823CHAC3-finger_tapping-2021-05-07T21-00-...,,,True,True,False,True,,,
4,NIHNT823CHAC3#2021-11-01,NIHNT823CHAC3,0.0,0.0,0.217497,0.467424,0.863133,0.112835,False,global,...,NIHNT823CHAC3-finger_tapping-2021-11-01T19-01-...,,,False,True,False,False,,,


In [23]:
test_data_w_metadata_neurologist = test_data_w_metadata_neurologist.rename(columns={
    'row_id': 'unique_row_id', 
    'id': 'participant_id',  
    'Protocol': 'protocol', 
    'gender_normalized': 'gender',
    'age_processed': 'age', 
    'age_normalized': 'age_group', 
    'race_normalized': 'race', 
    'facial_filename': 'filename_smile', 
    'speech_filename': 'filename_speech', 
    'finger_tapping_filename': 'filename_finger_tapping',
    'finger_tapping_left_filename': 'filename_finger_tapping_left', 
    'finger_tapping_right_filename': 'filename_finger_tapping_right',
    'misclassified_ufnet': 'misclassified_fusion', 
    'misclassified_facial': 'misclassified_smile', 
    'Ray Label': 'neurologist_label_ray', 
    'Ruth Label': 'neurologist_label_ruth', 
    'Jamie Label': 'neurologist_label_jamie',
    'uncertain_indices': 'uncertain_flag',
    'pred_std_fusion': 'pred_std_fusion'
})

In [24]:
columns_to_keep = ['unique_row_id', 'participant_id', 'date', 'protocol', 'test_split',
        'gender', 'age', 'age_group', 'race', 
        'filename_smile', 'filename_speech', 'filename_finger_tapping',
        'filename_finger_tapping_left', 'filename_finger_tapping_right',
        'pred_score_smile', 'pred_score_speech', 'pred_score_finger', 'pred_score_fusion', 'true_label', 
        'misclassified_smile', 'misclassified_speech', 'misclassified_finger', 'misclassified_fusion', 'uncertain_flag', 'pred_std_fusion',
        'neurologist_label_ray', 'neurologist_label_ruth', 'neurologist_label_jamie']

test_data_big_csv = test_data_w_metadata_neurologist[columns_to_keep]
test_data_big_csv.to_csv('../../data/test_data_big.csv', index=False)

In [25]:
ls -l ../../data

total 9856
-rw-r--r-- 1 tadnan people    1066 Oct 14 17:33 demographic_table.csv
-rw-r--r-- 1 tadnan people    1925 Sep  1 12:04 dev_set_participants.txt
-rw-r--r-- 1 tadnan people    7704 Oct  9 11:21 df_stage_data.csv
drwxr-xr-x 2 tadnan people    4096 Sep  1 14:03 [0m[01;34mfacial_expression_smile[0m/
drwxr-xr-x 2 tadnan people    4096 Sep  1 14:01 [01;34mfinger_tapping[0m/
-rw-r--r-- 1 tadnan people 4507133 Oct 14 17:31 full_fusion_dataset.csv
-rw-r--r-- 1 tadnan people    6119 Sep  1 14:14 labeling_neurologists_PARK.csv
-rw-r--r-- 1 tadnan people   99230 Oct  9 11:19 metadata_demographic.csv
-rw-r--r-- 1 tadnan people  391766 Sep  1 14:09 metadata_facial.csv
-rw-r--r-- 1 tadnan people  202241 Sep  1 14:09 metadata_finger_tapping.csv
-rw-r--r-- 1 tadnan people  300859 Sep  1 14:09 metadata_finger_tapping_left.csv
-rw-r--r-- 1 tadnan people  303484 Sep  1 14:09 metadata_finger_tapping_right.csv
-rw-r--r-- 1 tadnan people  443546 Sep  1 14:13 metadata_speech.csv
-rw-r--r-- 1 tad