# FUNCTION TO CALCULATE BEST MODEL, FEATURE COMBINATION AND PARTICIPANT GROUPS 

In [2]:
import pandas as pd
import numpy as np

def evaluate_models(data):
    # Convert the data into a DataFrame
    df = pd.DataFrame(data)
    # Assign weights to each metric
    weights = {
#         'Accuracy': 0.2,
        'Weighted F1': 0.23,
        'Precision_1': 0.23,
        'Recall_1': 0.23,
        'AUROC': 0.31
    }

    df_copy = df.copy()
    metrics = ['Weighted F1', 'Precision_1', 'Recall_1', 'AUROC']

    # Calculate the composite score for each model
    df_copy['Composite Score'] = 0
    for metric in metrics:
        df_copy['Composite Score'] += df_copy[metric] * weights[metric]

    # Get the composite score for the chance model
    chance_score = df_copy[df_copy['Model'] == 'Chance']['Composite Score'].values[0]
    
    # Filter out the chance model and sort the remaining models by Composite Score in descending order
    sorted_df = df_copy[df_copy['Model'] != 'Chance'].sort_values(by='Composite Score', ascending=False)

    # Display the results
    print(sorted_df[['Model', 'Composite Score']])

    # Determine the best model that performs better than chance
    better_than_chance = sorted_df[sorted_df['Composite Score'] > chance_score]
    
    if not better_than_chance.empty:
        best_model = better_than_chance.iloc[0]['Model']
        print(f"The best model that performs better than chance is: {best_model}")
    else:
        best_model = sorted_df.iloc[0]['Model']
        print(f"No model performs better than chance. The model with the highest composite score is: {best_model}")


# ALL PARTICIPANTS

In [72]:
data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.68, 0.64, 0.68],
    'Weighted F1': [0.67, 0.71, 0.67, 0.71],
    'Precision_1': [0.79, 0.87, 0.86, 0.87],
    'Recall_1': [0.79, 0.70, 0.65, 0.70],
    'AUROC': [0.50, 0.71, 0.67, 0.71]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.63, 0.52, 0.65],
    'Weighted F1': [0.67, 0.65, 0.56, 0.66],
    'Precision_1': [0.79, 0.79, 0.80, 0.79],
    'Recall_1': [0.79, 0.72, 0.52, 0.75],
    'AUROC': [0.49, 0.51, 0.53, 0.50]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.67, 0.66, 0.66],
    'Weighted F1': [0.67, 0.69, 0.69, 0.68],
    'Precision_1': [0.79, 0.84, 0.88, 0.83],
    'Recall_1': [0.79, 0.73, 0.66, 0.72],
    'AUROC': [0.50, 0.64, 0.68, 0.63]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.66, 0.64, 0.67],
    'Weighted F1': [0.67, 0.66, 0.66, 0.67],
    'Precision_1': [0.79, 0.79, 0.80, 0.79],
    'Recall_1': [0.79, 0.78, 0.71, 0.78],
    'AUROC': [0.50, 0.52, 0.55, 0.51]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.71, 0.65, 0.70],
    'Weighted F1': [0.67, 0.72, 0.69, 0.71],
    'Precision_1': [0.79, 0.84, 0.88, 0.83],
    'Recall_1': [0.80, 0.77, 0.65, 0.78],
    'AUROC': [0.50, 0.69, 0.72, 0.67]
}

# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)



     Model  Composite Score
1       RF           0.7445
3  XGBoost           0.7445
2      SVM           0.7091
The best model that performs better than chance is: RF
     Model  Composite Score
3  XGBoost           0.6610
1       RF           0.6549
2      SVM           0.5967
No model performs better than chance. The model with the highest composite score is: XGBoost
     Model  Composite Score
2      SVM           0.7237
1       RF           0.7182
3  XGBoost           0.7082
The best model that performs better than chance is: SVM
     Model  Composite Score
1       RF           0.6741
3  XGBoost           0.6733
2      SVM           0.6696
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.7498
3  XGBoost           0.7413
2      SVM           0.7338
The best model that performs better than chance is: RF


# NEUROTYPICAL

In [73]:
data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.66, 0.71, 0.71, 0.71],
    'Weighted F1': [0.66, 0.73, 0.72, 0.73],
    'Precision_1': [0.78, 0.87, 0.85, 0.87],
    'Recall_1': [0.79, 0.74, 0.76, 0.74],
    'AUROC': [0.49, 0.71, 0.69, 0.71]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.66, 0.65, 0.53, 0.65],
    'Weighted F1': [0.67, 0.66, 0.57, 0.66],
    'Precision_1': [0.79, 0.79, 0.81, 0.80],
    'Recall_1': [0.78, 0.76, 0.53, 0.75],
    'AUROC': [0.50, 0.51, 0.54, 0.51]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.70, 0.66, 0.68],
    'Weighted F1': [0.67, 0.72, 0.69, 0.70],
    'Precision_1': [0.79, 0.85, 0.88, 0.84],
    'Recall_1': [0.79, 0.75, 0.66, 0.73],
    'AUROC': [0.52, 0.69, 0.69, 0.67]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.65, 0.61, 0.66],
    'Weighted F1': [0.66, 0.66, 0.64, 0.67],
    'Precision_1': [0.79, 0.79, 0.81, 0.79],
    'Recall_1': [0.79, 0.77, 0.65, 0.77],
    'AUROC': [0.52, 0.53, 0.58, 0.53]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.72, 0.66, 0.72],
    'Weighted F1': [0.67, 0.73, 0.69, 0.73],
    'Precision_1': [0.79, 0.84, 0.88, 0.84],
    'Recall_1': [0.79, 0.80, 0.65, 0.80],
    'AUROC': [0.49, 0.71, 0.74, 0.70]
}


# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)


     Model  Composite Score
1       RF           0.7583
3  XGBoost           0.7583
2      SVM           0.7498
The best model that performs better than chance is: RF
     Model  Composite Score
3  XGBoost           0.6664
1       RF           0.6664
2      SVM           0.6067
No model performs better than chance. The model with the highest composite score is: XGBoost
     Model  Composite Score
1       RF           0.7475
3  XGBoost           0.7298
2      SVM           0.7268
The best model that performs better than chance is: RF
     Model  Composite Score
3  XGBoost           0.6772
1       RF           0.6749
2      SVM           0.6628
The best model that performs better than chance is: XGBoost
     Model  Composite Score
1       RF           0.7652
3  XGBoost           0.7621
2      SVM           0.7400
The best model that performs better than chance is: RF


# NEURODIVERGENT

In [74]:
data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.66, 0.65, 0.59, 0.66],
    'Weighted F1': [0.66, 0.68, 0.63, 0.69],
    'Precision_1': [0.79, 0.88, 0.87, 0.88],
    'Recall_1': [0.78, 0.66, 0.57, 0.66],
    'AUROC': [0.49, 0.70, 0.68, 0.70]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.68, 0.64, 0.38, 0.62],
    'Weighted F1': [0.68, 0.65, 0.40, 0.64],
    'Precision_1': [0.80, 0.79, 0.80, 0.79],
    'Recall_1': [0.80, 0.74, 0.32, 0.71],
    'AUROC': [0.48, 0.50, 0.49, 0.50]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.66, 0.67, 0.66, 0.67],
    'Weighted F1': [0.66, 0.70, 0.69, 0.70],
    'Precision_1': [0.79, 0.84, 0.88, 0.84],
    'Recall_1': [0.78, 0.72, 0.66, 0.72],
    'AUROC': [0.51, 0.66, 0.68, 0.65]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.67, 0.42, 0.66],
    'Weighted F1': [0.67, 0.67, 0.41, 0.67],
    'Precision_1': [0.79, 0.80, 0.79, 0.80],
    'Recall_1': [0.79, 0.78, 0.36, 0.77],
    'AUROC': [0.50, 0.53, 0.48, 0.52]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.71, 0.66, 0.71],
    'Weighted F1': [0.67, 0.72, 0.69, 0.72],
    'Precision_1': [0.79, 0.85, 0.88, 0.84],
    'Recall_1': [0.79, 0.77, 0.65, 0.78],
    'AUROC': [0.50, 0.69, 0.70, 0.69]
}


# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)


     Model  Composite Score
3  XGBoost           0.7299
1       RF           0.7276
2      SVM           0.6869
The best model that performs better than chance is: XGBoost
     Model  Composite Score
1       RF           0.6564
3  XGBoost           0.6472
2      SVM           0.5015
No model performs better than chance. The model with the highest composite score is: RF
     Model  Composite Score
1       RF           0.7244
2      SVM           0.7237
3  XGBoost           0.7213
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.6818
3  XGBoost           0.6764
2      SVM           0.5076
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.7521
3  XGBoost           0.7521
2      SVM           0.7276
The best model that performs better than chance is: RF


# ADD or ADHD 

In [75]:
data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.68, 0.68, 0.63, 0.67],
    'Weighted F1': [0.68, 0.70, 0.66, 0.70],
    'Precision_1': [0.80, 0.87, 0.85, 0.88],
    'Recall_1': [0.80, 0.70, 0.64, 0.88],
    'AUROC': [0.50, 0.71, 0.67, 0.71]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.66, 0.65, 0.47, 0.65],
    'Weighted F1': [0.66, 0.66, 0.51, 0.66],
    'Precision_1': [0.79, 0.79, 0.81, 0.80],
    'Recall_1': [0.78, 0.76, 0.44, 0.74],
    'AUROC': [0.49, 0.53, 0.51, 0.53]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.68, 0.68, 0.66, 0.67],
    'Weighted F1': [0.67, 0.70, 0.69, 0.69],
    'Precision_1': [0.79, 0.85, 0.88, 0.85],
    'Recall_1': [0.80, 0.73, 0.66, 0.71],
    'AUROC': [0.50, 0.67, 0.68, 0.67]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.68, 0.56, 0.68],
    'Weighted F1': [0.67, 0.68, 0.60, 0.68],
    'Precision_1': [0.79, 0.80, 0.80, 0.81],
    'Recall_1': [0.80, 0.79, 0.60, 0.79],
    'AUROC': [0.52, 0.55, 0.54, 0.54]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.71, 0.66, 0.70],
    'Weighted F1': [0.67, 0.72, 0.69, 0.72],
    'Precision_1': [0.80, 0.84, 0.88, 0.85],
    'Recall_1': [0.78, 0.78, 0.66, 0.77],
    'AUROC': [0.49, 0.69, 0.72, 0.69]
}

# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)

     Model  Composite Score
3  XGBoost           0.7859
1       RF           0.7422
2      SVM           0.7022
The best model that performs better than chance is: XGBoost
     Model  Composite Score
1       RF           0.6726
3  XGBoost           0.6703
2      SVM           0.5629
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.7321
3  XGBoost           0.7252
2      SVM           0.7237
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.6926
3  XGBoost           0.6918
2      SVM           0.6274
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.7521
3  XGBoost           0.7521
2      SVM           0.7361
The best model that performs better than chance is: RF


# Autism, Asperger’s, or ASD 

In [76]:
data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.70, 0.64, 0.60, 0.62],
    'Weighted F1': [0.70, 0.67, 0.64, 0.66],
    'Precision_1': [0.81, 0.89, 0.88, 0.89],
    'Recall_1': [0.81, 0.63, 0.57, 0.60],
    'AUROC': [0.52, 0.71, 0.69, 0.70]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.68, 0.62, 0.45, 0.59],
    'Weighted F1': [0.68, 0.64, 0.49, 0.62],
    'Precision_1': [0.80, 0.80, 0.76, 0.79],
    'Recall_1': [0.80, 0.70, 0.44, 0.68],
    'AUROC': [0.50, 0.50, 0.46, 0.48]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.69, 0.67, 0.62, 0.65],
    'Weighted F1': [0.69, 0.69, 0.66, 0.68],
    'Precision_1': [0.81, 0.84, 0.88, 0.84],
    'Recall_1': [0.81, 0.73, 0.61, 0.70],
    'AUROC': [0.48, 0.64, 0.70, 0.65]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.66, 0.67, 0.42, 0.67],
    'Weighted F1': [0.66, 0.67, 0.46, 0.68],
    'Precision_1': [0.79, 0.80, 0.76, 0.81],
    'Recall_1': [0.79, 0.78, 0.40, 0.78],
    'AUROC': [0.49, 0.48, 0.46, 0.51]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.70, 0.69, 0.59, 0.69],
    'Weighted F1': [0.70, 0.71, 0.63, 0.71],
    'Precision_1': [0.81, 0.85, 0.89, 0.84],
    'Recall_1': [0.81, 0.76, 0.56, 0.76],
    'AUROC': [0.50, 0.66, 0.71, 0.66]
}

# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)

     Model  Composite Score
1       RF           0.7238
3  XGBoost           0.7115
2      SVM           0.6946
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.6472
3  XGBoost           0.6295
2      SVM           0.5313
No model performs better than chance. The model with the highest composite score is: RF
     Model  Composite Score
1       RF           0.7182
3  XGBoost           0.7121
2      SVM           0.7115
The best model that performs better than chance is: RF
     Model  Composite Score
3  XGBoost           0.6802
1       RF           0.6663
2      SVM           0.5152
The best model that performs better than chance is: XGBoost
     Model  Composite Score
1       RF           0.7382
3  XGBoost           0.7359
2      SVM           0.6985
The best model that performs better than chance is: RF


# Dyslexia, Dyspraxia, Dyscalculia, Dysgraphia 

In [77]:
data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.69, 0.78, 0.72, 0.78],
    'Weighted F1': [0.69, 0.80, 0.75, 0.80],
    'Precision_1': [0.82, 0.91, 0.91, 0.91],
    'Recall_1': [0.82, 0.82, 0.74, 0.81],
    'AUROC': [0.49, 0.82, 0.72, 0.83]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.75, 0.71, 0.54, 0.68],
    'Weighted F1': [0.74, 0.74, 0.55, 0.71],
    'Precision_1': [0.84, 0.87, 0.86, 0.87],
    'Recall_1': [0.86, 0.87, 0.54, 0.76],
    'AUROC': [0.53, 0.59, 0.49, 0.59]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.74, 0.76, 0.66, 0.77],
    'Weighted F1': [0.74, 0.78, 0.71, 0.78],
    'Precision_1': [0.85, 0.90, 0.93, 0.89],
    'Recall_1': [0.84, 0.82, 0.66, 0.83],
    'AUROC': [0.54, 0.81, 0.77, 0.81]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.77, 0.48, 0.72],
    'Weighted F1': [0.69, 0.76, 0.51, 0.71],
    'Precision_1': [0.83, 0.86, 0.88, 0.84],
    'Recall_1': [0.77, 0.87, 0.47, 0.84],
    'AUROC': [0.48, 0.59, 0.59, 0.50]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.75, 0.77, 0.67, 0.76],
    'Weighted F1': [0.74, 0.78, 0.71, 0.77],
    'Precision_1': [0.84, 0.88, 0.92, 0.88],
    'Recall_1': [0.87, 0.84, 0.67, 0.84],
    'AUROC': [0.50, 0.79, 0.77, 0.77]
}

# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)

     Model  Composite Score
3  XGBoost           0.8369
1       RF           0.8361
2      SVM           0.7752
The best model that performs better than chance is: XGBoost
     Model  Composite Score
1       RF           0.7533
3  XGBoost           0.7211
2      SVM           0.6004
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.8261
3  XGBoost           0.8261
2      SVM           0.7677
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.7556
3  XGBoost           0.7047
2      SVM           0.6107
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.8199
3  XGBoost           0.8114
2      SVM           0.7677
The best model that performs better than chance is: RF


# Any other language, reading, math, and nonverbal learning disorder 

In [78]:
data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.56, 0.60, 0.61, 0.61],
    'Weighted F1': [0.56, 0.62, 0.63, 0.63],
    'Precision_1': [0.71, 0.77, 0.80, 0.77],
    'Recall_1': [0.69, 0.67, 0.60, 0.66],
    'AUROC': [0.50, 0.66, 0.66, 0.65]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.62, 0.59, 0.53, 0.54],
    'Weighted F1': [0.62, 0.59, 0.52, 0.55],
    'Precision_1': [0.74, 0.75, 0.73, 0.72],
    'Recall_1': [0.75, 0.67, 0.61, 0.63],
    'AUROC': [0.47, 0.53, 0.50, 0.50]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.61, 0.64, 0.59, 0.58],
    'Weighted F1': [0.61, 0.65, 0.61, 0.59],
    'Precision_1': [0.74, 0.77, 0.81, 0.75],
    'Recall_1': [0.74, 0.72, 0.59, 0.68],
    'AUROC': [0.48, 0.59, 0.66, 0.59]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.58, 0.60, 0.52, 0.60],
    'Weighted F1': [0.59, 0.59, 0.50, 0.60],
    'Precision_1': [0.72, 0.73, 0.71, 0.72],
    'Recall_1': [0.71, 0.71, 0.57, 0.72],
    'AUROC': [0.55, 0.49, 0.46, 0.52]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.60, 0.61, 0.58, 0.62],
    'Weighted F1': [0.59, 0.63, 0.60, 0.62],
    'Precision_1': [0.73, 0.76, 0.79, 0.73],
    'Recall_1': [0.76, 0.69, 0.57, 0.77],
    'AUROC': [0.52, 0.63, 0.66, 0.59]
}

# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)

     Model  Composite Score
1       RF           0.6784
3  XGBoost           0.6753
2      SVM           0.6715
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.6266
3  XGBoost           0.5920
2      SVM           0.5828
No model performs better than chance. The model with the highest composite score is: RF
     Model  Composite Score
1       RF           0.6751
2      SVM           0.6669
3  XGBoost           0.6475
The best model that performs better than chance is: RF
     Model  Composite Score
3  XGBoost           0.6304
1       RF           0.6188
2      SVM           0.5520
No model performs better than chance. The model with the highest composite score is: XGBoost
     Model  Composite Score
1       RF           0.6737
3  XGBoost           0.6705
2      SVM           0.6554
The best model that performs better than chance is: RF


# Generalized anxiety disorder 

In [79]:
data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.70, 0.66, 0.58, 0.64],
    'Weighted F1': [0.70, 0.70, 0.63, 0.68],
    'Precision_1': [0.81, 0.89, 0.88, 0.89],
    'Recall_1': [0.82, 0.66, 0.56, 0.64],
    'AUROC': [0.53, 0.71, 0.68, 0.71]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.70, 0.67, 0.45, 0.67],
    'Weighted F1': [0.70, 0.68, 0.49, 0.69],
    'Precision_1': [0.82, 0.82, 0.80, 0.83],
    'Recall_1': [0.81, 0.77, 0.43, 0.75],
    'AUROC': [0.48, 0.52, 0.47, 0.55]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.70, 0.69, 0.64, 0.68],
    'Weighted F1': [0.70, 0.71, 0.68, 0.71],
    'Precision_1': [0.82, 0.86, 0.88, 0.86],
    'Recall_1': [0.81, 0.74, 0.65, 0.72],
    'AUROC': [0.49, 0.66, 0.67, 0.66]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.68, 0.70, 0.48, 0.69],
    'Weighted F1': [0.68, 0.70, 0.52, 0.70],
    'Precision_1': [0.81, 0.82, 0.78, 0.82],
    'Recall_1': [0.80, 0.81, 0.50, 0.80],
    'AUROC': [0.50, 0.53, 0.51, 0.52]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.69, 0.71, 0.63, 0.72],
    'Weighted F1': [0.69, 0.73, 0.67, 0.74],
    'Precision_1': [0.81, 0.85, 0.90, 0.85],
    'Recall_1': [0.81, 0.79, 0.62, 0.81],
    'AUROC': [0.49, 0.66, 0.70, 0.65]
}

# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)

     Model  Composite Score
1       RF           0.7376
3  XGBoost           0.7284
2      SVM           0.6869
The best model that performs better than chance is: RF
     Model  Composite Score
3  XGBoost           0.6926
1       RF           0.6833
2      SVM           0.5413
The best model that performs better than chance is: XGBoost
     Model  Composite Score
1       RF           0.7359
3  XGBoost           0.7313
2      SVM           0.7160
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.7002
3  XGBoost           0.6948
2      SVM           0.5721
The best model that performs better than chance is: RF
     Model  Composite Score
3  XGBoost           0.7535
1       RF           0.7497
2      SVM           0.7207
The best model that performs better than chance is: XGBoost


# Other

In [80]:
data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.67, 0.69, 0.66, 0.70],
    'Weighted F1': [0.67, 0.72, 0.69, 0.73],
    'Precision_1': [0.79, 0.92, 0.90, 0.90],
    'Recall_1': [0.80, 0.67, 0.65, 0.70],
    'AUROC': [0.53, 0.75, 0.73, 0.75]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.70, 0.59, 0.52, 0.57],
    'Weighted F1': [0.69, 0.61, 0.56, 0.60],
    'Precision_1': [0.80, 0.78, 0.77, 0.77],
    'Recall_1': [0.84, 0.67, 0.57, 0.66],
    'AUROC': [0.53, 0.43, 0.47, 0.42]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.66, 0.72, 0.68, 0.71],
    'Weighted F1': [0.66, 0.74, 0.71, 0.72],
    'Precision_1': [0.79, 0.86, 0.91, 0.85],
    'Recall_1': [0.77, 0.78, 0.66, 0.77],
    'AUROC': [0.51, 0.72, 0.71, 0.70]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.68, 0.66, 0.60, 0.64],
    'Weighted F1': [0.68, 0.65, 0.63, 0.65],
    'Precision_1': [0.80, 0.78, 0.80, 0.79],
    'Recall_1': [0.80, 0.79, 0.66, 0.75],
    'AUROC': [0.50, 0.46, 0.51, 0.48]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.68, 0.72, 0.68, 0.71],
    'Weighted F1': [0.68, 0.72, 0.71, 0.72],
    'Precision_1': [0.80, 0.84, 0.91, 0.84],
    'Recall_1': [0.81, 0.79, 0.66, 0.79],
    'AUROC': [0.49, 0.72, 0.74, 0.71]
}

# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)

     Model  Composite Score
3  XGBoost           0.7684
1       RF           0.7638
2      SVM           0.7415
The best model that performs better than chance is: XGBoost
     Model  Composite Score
1       RF           0.6071
3  XGBoost           0.5971
2      SVM           0.5827
No model performs better than chance. The model with the highest composite score is: RF
     Model  Composite Score
1       RF           0.7706
3  XGBoost           0.7552
2      SVM           0.7445
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.6532
3  XGBoost           0.6525
2      SVM           0.6388
No model performs better than chance. The model with the highest composite score is: RF
     Model  Composite Score
1       RF           0.7637
3  XGBoost           0.7606
2      SVM           0.7538
The best model that performs better than chance is: RF


## "Prefer not to respond" and " never been diagnosed with any listed diagnosis" 

In [81]:

data1 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.65, 0.66, 0.59, 0.66],
    'Weighted F1': [0.66, 0.68, 0.63, 0.69],
    'Precision_1': [0.77, 0.84, 0.83, 0.86],
    'Recall_1': [0.77, 0.69, 0.59, 0.66],
    'AUROC': [0.49, 0.67, 0.63, 0.65]
}

data2 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.64, 0.73, 0.47, 0.59],
    'Weighted F1': [0.65, 0.61, 0.50, 0.60],
    'Precision_1': [0.77, 0.75, 0.78, 0.74],
    'Recall_1': [0.75, 0.70, 0.44, 0.71],
    'AUROC': [0.49, 0.44, 0.54, 0.43]
}

data3 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.65, 0.65, 0.65, 0.65],
    'Weighted F1': [0.65, 0.67, 0.65, 0.67],
    'Precision_1': [0.76, 0.81, 0.85, 0.83],
    'Recall_1': [0.79, 0.71, 0.65, 0.69],
    'AUROC': [0.53, 0.62, 0.66, 0.64]
}

data4 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.63, 0.65, 0.47, 0.62],
    'Weighted F1': [0.64, 0.64, 0.49, 0.62],
    'Precision_1': [0.76, 0.76, 0.77, 0.75],
    'Recall_1': [0.76, 0.79, 0.44, 0.75],
    'AUROC': [0.53, 0.51, 0.52, 0.47]
}

data5 = {
    'Model': ['Chance', 'RF', 'SVM', 'XGBoost'],
    'Accuracy': [0.65, 0.65, 0.65, 0.65],
    'Weighted F1': [0.65, 0.66, 0.67, 0.66],
    'Precision_1': [0.77, 0.79, 0.85, 0.80],
    'Recall_1': [0.77, 0.74, 0.64, 0.73],
    'AUROC': [0.49, 0.61, 0.69, 0.64]
}

# Call the function with the data
evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)

     Model  Composite Score
1       RF           0.7160
3  XGBoost           0.7098
2      SVM           0.6668
The best model that performs better than chance is: RF
     Model  Composite Score
1       RF           0.6102
3  XGBoost           0.6048
2      SVM           0.5630
No model performs better than chance. The model with the highest composite score is: RF
     Model  Composite Score
3  XGBoost           0.7021
2      SVM           0.6991
1       RF           0.6959
The best model that performs better than chance is: XGBoost
     Model  Composite Score
1       RF           0.6618
3  XGBoost           0.6333
2      SVM           0.5522
The best model that performs better than chance is: RF
     Model  Composite Score
2      SVM           0.7107
3  XGBoost           0.7021
1       RF           0.6928
The best model that performs better than chance is: SVM


# BEST FEATURE COMBINATION

In [3]:
#all participants


data1 = {
    'Model': ['Chance', 'RF1', 'XGB', 'SVM', 'RF2', 'RF3'],
    'Accuracy': [0.67, 0.68, 0.65, 0.66, 0.66, 0.71],
    'Weighted F1': [0.67, 0.71, 0.66, 0.69, 0.66, 0.72],
    'Precision_1': [0.79, 0.87, 0.79, 0.88, 0.79, 0.84],
    'Recall_1': [0.79, 0.70, 0.75, 0.66, 0.78, 0.77],
    'AUROC': [0.50, 0.71, 0.50, 0.68, 0.52, 0.69],
}

#neurotypical
data2 = {
    'Model': ['Chance', 'RF1', 'XGB1', 'RF2', 'XGB2', 'RF3'],
    'Accuracy': [0.66, 0.71, 0.65, 0.71, 0.66, 0.72],
    'Weighted F1': [0.66, 0.73, 0.66, 0.72, 0.67, 0.73],
    'Precision_1': [0.78, 0.87, 0.80, 0.84, 0.79, 0.84],
    'Recall_1': [0.79, 0.74, 0.75, 0.77, 0.77, 0.80],
    'AUROC': [0.49, 0.71, 0.51, 0.69, 0.53, 0.71]
}

#neurodivergent
data3 = {
    'Model': ['Chance', 'XGB', 'RF1', 'RF2', 'RF3', 'RF4'],
    'Accuracy': [0.66, 0.66, 0.64, 0.67, 0.67, 0.71],
    'Weighted F1': [0.66, 0.69, 0.65, 0.70, 0.67, 0.72],
    'Precision_1': [0.79, 0.88, 0.79, 0.84, 0.80, 0.85],
    'Recall_1': [0.78, 0.66, 0.74, 0.72, 0.78, 0.77],
    'AUROC': [0.49, 0.70, 0.50, 0.66, 0.53, 0.69]
}

#ADD
data4 = {
    'Model': ['Chance', 'XGB', 'RF1', 'RF2', 'RF3', 'RF4'],
    'Accuracy': [0.68, 0.67, 0.65, 0.68, 0.68, 0.71],
    'Weighted F1': [0.68, 0.70, 0.66, 0.70, 0.68, 0.72],
    'Precision_1': [0.80, 0.88, 0.79, 0.85, 0.80, 0.84],
    'Recall_1': [0.80, 0.88, 0.76, 0.73, 0.79, 0.78],
    'AUROC': [0.50, 0.71, 0.53, 0.67, 0.55, 0.69]
}

#Autism
data5 = {
    'Model': ['Chance', 'RF1', 'RF2', 'RF3', 'XGB1', 'RF4'],
    'Accuracy': [0.70, 0.64, 0.62, 0.67, 0.67, 0.69],
    'Weighted F1': [0.70, 0.67, 0.64, 0.69, 0.68, 0.71],
    'Precision_1': [0.81, 0.89, 0.80, 0.84, 0.81, 0.85],
    'Recall_1': [0.81, 0.63, 0.70, 0.73, 0.78, 0.76],
    'AUROC': [0.52, 0.71, 0.50, 0.64, 0.51, 0.66]
}

#Dyslexia
data6 = {
    'Model': ['Chance', 'XGB', 'RF1', 'RF2', 'RF3', 'RF4'],
    'Accuracy': [0.69, 0.78, 0.71, 0.76, 0.77, 0.77],
    'Weighted F1': [0.69, 0.80, 0.74, 0.78, 0.76, 0.78],
    'Precision_1': [0.82, 0.91, 0.87, 0.90, 0.86, 0.88],
    'Recall_1': [0.82, 0.81, 0.87, 0.82, 0.87, 0.84],
    'AUROC': [0.49, 0.83, 0.59, 0.81, 0.59, 0.79]
}

#Any other language
data7 = {
    'Model': ['Chance', 'RF1', 'RF2', 'RF3', 'XGB', 'RF4'],
    'Accuracy': [0.56, 0.60, 0.59, 0.64, 0.60, 0.61],
    'Weighted F1': [0.56, 0.62, 0.59, 0.65, 0.60, 0.63],
    'Precision_1': [0.71, 0.77, 0.75, 0.77, 0.72, 0.76],
    'Recall_1': [0.69, 0.67, 0.67, 0.72, 0.72, 0.69],
    'AUROC': [0.50, 0.66, 0.53, 0.59, 0.52, 0.63]
}

#Generalized anxiety disorder
data8 = {
    'Model': ['Chance', 'RF1', 'XGB1', 'RF2', 'RF3', 'XGB2'],
    'Accuracy': [0.70, 0.66, 0.64, 0.69, 0.70, 0.72],
    'Weighted F1': [0.70, 0.70, 0.68, 0.71, 0.70, 0.74],
    'Precision_1': [0.81, 0.89, 0.89, 0.86, 0.82, 0.85],
    'Recall_1': [0.82, 0.66, 0.64, 0.74, 0.81, 0.81],
    'AUROC': [0.53, 0.71, 0.71, 0.66, 0.53, 0.65]
}

#Other
data9 = {
    'Model': ['Chance', 'XGB', 'RF1', 'RF2', 'RF3', 'RF4'],
    'Accuracy': [0.67, 0.70, 0.59, 0.72, 0.66, 0.72],
    'Weighted F1': [0.67, 0.73, 0.61, 0.74, 0.65, 0.72],
    'Precision_1': [0.79, 0.90, 0.78, 0.86, 0.78, 0.84],
    'Recall_1': [0.80, 0.70, 0.67, 0.78, 0.79, 0.79],
    'AUROC': [0.53, 0.75, 0.43, 0.72, 0.46, 0.72]
}

#No response/never diagnosed
data10 = {
    'Model': ['Chance', 'RF1', 'RF2', 'XGB', 'RF3', 'SVM'],
    'Accuracy': [0.65, 0.66, 0.73, 0.65, 0.65, 0.65],
    'Weighted F1': [0.66, 0.68, 0.61, 0.67, 0.64, 0.67],
    'Precision_1': [0.77, 0.84, 0.75, 0.83, 0.76, 0.85],
    'Recall_1': [0.77, 0.69, 0.70, 0.69, 0.79, 0.64],
    'AUROC': [0.49, 0.67, 0.44, 0.64, 0.51, 0.69]
}

evaluate_models(data1)
evaluate_models(data2)
evaluate_models(data3)
evaluate_models(data4)
evaluate_models(data5)
evaluate_models(data6)
evaluate_models(data7)
evaluate_models(data8)
evaluate_models(data9)
evaluate_models(data10)

  Model  Composite Score
5   RF3           0.7498
1   RF1           0.7445
3   SVM           0.7237
4   RF2           0.6741
2   XGB           0.6610
The best model that performs better than chance is: RF3
  Model  Composite Score
5   RF3           0.7652
1   RF1           0.7583
3   RF2           0.7498
4  XGB2           0.6772
2  XGB1           0.6664
The best model that performs better than chance is: RF3
  Model  Composite Score
5   RF4           0.7521
1   XGB           0.7299
3   RF2           0.7244
4   RF3           0.6818
2   RF1           0.6564
The best model that performs better than chance is: RF4
  Model  Composite Score
1   XGB           0.7859
5   RF4           0.7521
3   RF2           0.7321
4   RF3           0.6926
2   RF1           0.6726
The best model that performs better than chance is: XGB
  Model  Composite Score
5   RF4           0.7382
1   RF1           0.7238
3   RF3           0.7182
4  XGB1           0.6802
2   RF2           0.6472
The best model that perfor

# BEST PARTICIPANT GROUPS

In [5]:
import pandas as pd
import numpy as np

data = {
    'Model': ['RF1', 'RF2', 'RF3', 'XGB1', 'RF4', 'XGB2', 'RF5', 'XGB3', 'RF6', 'RF7'],
    'Base rate': [0.79, 0.79, 0.79, 0.79, 0.80, 0.84, 0.73, 0.81, 0.80, 0.76],
    'Weighted F1': [0.72, 0.73, 0.72, 0.70, 0.71, 0.80, 0.62, 0.74, 0.74, 0.68],
    'Precision_1': [0.84, 0.84, 0.85, 0.88, 0.85, 0.91, 0.77, 0.85, 0.86, 0.84],
    'Recall_1': [0.77, 0.80, 0.77, 0.88, 0.76, 0.81, 0.67, 0.81, 0.78, 0.69],
    'AUROC': [0.69, 0.71, 0.69, 0.71, 0.66, 0.83, 0.66, 0.65, 0.72, 0.67]
}

def evaluate_models(data):
    # Convert the data into a DataFrame
    df = pd.DataFrame(data)
    # Assign weights to each metric
    weights = {
#         'Accuracy': 0.2,
        'Weighted F1': 0.23,
        'Precision_1': 0.23,
        'Recall_1': 0.23,
        'AUROC': 0.31
    }

    # Normalize the metrics using min-max normalization
    normalized_df = df.copy()
    metrics = ['Weighted F1', 'Precision_1', 'Recall_1', 'AUROC']

    for metric in metrics:
        min_value = df[metric].min()
        max_value = df[metric].max()
        if min_value != max_value:
            normalized_df[metric] = (df[metric] - min_value) / (max_value - min_value)
        else:
            normalized_df[metric] = 0  # or any other constant value since all values are the same

    # Calculate the composite score for each model
    normalized_df['Composite Score'] = 0
    for metric in metrics:
        normalized_df['Composite Score'] += normalized_df[metric] * weights[metric]

    # Sort the models by Composite Score in descending order
    sorted_df = normalized_df.sort_values(by='Composite Score', ascending=False)

    # Display the results
    print(sorted_df[['Model', 'Composite Score']])

    # Determine the best model based on the highest composite score
    best_model = sorted_df.iloc[0]['Model']
    print(f"The model with the highest composite score is: {best_model}")

evaluate_models(data)


  Model  Composite Score
5  XGB2         0.923333
3  XGB1         0.616270
8   RF6         0.542222
1   RF2         0.501270
7  XGB3         0.438095
2   RF3         0.437619
0   RF1         0.421190
4   RF4         0.362222
9   RF7         0.248016
6   RF5         0.017222
The model with the highest composite score is: XGB2
