Import Libraries for Visualization

In [8]:
import pandas as pd
import json

Load results

In [9]:
def load_results(results_path_no_smote):
    with open(results_path_no_smote, 'r') as file:
        data = json.load(file)
    results = {}
    for feature_set, models in data.items():
        results[feature_set] = models
    return results

def load_pred_results(results_path_no_smote):
    with open(results_path_no_smote, 'r') as file:
        data = json.load(file)
    pred_results_no_smote = {}
    for feature_set, models in data.items():
        pred_results_no_smote[feature_set] = models
    return pred_results_no_smote

Summary for Model Performance without SMOTE

In [10]:
results_path_noSMOTE = '../results_noSMOTE/model_results_noSMOTE.json'
model_results_noSMOTE = load_results(results_path_noSMOTE)
pred_results_noSMOTE = load_pred_results(results_path_noSMOTE)

Save results as Pandas Dataframe and csv

In [11]:
def metrics(results):
    
    metrics_no_smote = []
    for feature_set, models in results.items():
        for model, data in models.items():
            metrics_no_smote.append({
                'Feature_Set': feature_set,
                'Model': model,
                'CV Accuracy': data['cv_accuracy'],
                'Accuracy': data['test_accuracy'],
                'Precision Score': data['test_precision'],
                'Recall Score': data['test_recall'],
                'F1 Score': data['test_f1'],
                "Cohen's Kappa Score": data['cohen_kappa'],
            })

    df = pd.DataFrame(metrics_no_smote)
    return df
    
metrics_df_noSMOTE = metrics(model_results_noSMOTE)

#Save to CSV
metrics_df_noSMOTE.to_csv('../results_noSMOTE/model_results_noSMOTE.csv', index=False)
metrics_df_noSMOTE

Unnamed: 0,Feature_Set,Model,CV Accuracy,Accuracy,Precision Score,Recall Score,F1 Score,Cohen's Kappa Score
0,F_fanger,ET,0.730706,0.767184,0.768854,0.775352,0.770933,0.648626
1,F_fanger,RF,0.742148,0.749446,0.75047,0.758466,0.753536,0.622026
2,F_fanger,SVC,0.703119,0.68071,0.680637,0.697933,0.680407,0.521605
3,F_selected,ET,0.824312,0.837209,0.84196,0.83936,0.840578,0.75318
4,F_selected,RF,0.815159,0.837209,0.844154,0.838457,0.840702,0.752989
5,F_selected,SVC,0.844281,0.817276,0.820257,0.821814,0.820984,0.723503
6,F_accessible,ET,0.821124,0.798226,0.79864,0.806431,0.801724,0.695711
7,F_accessible,RF,0.808761,0.804878,0.806249,0.80831,0.807222,0.70481
8,F_accessible,SVC,0.817301,0.780488,0.781802,0.787483,0.78426,0.668528


Summary for repeatedCV for Model Performance


In [12]:
results_path_repeatedCV = '../results_repeatedCV/model_results_repeatedCV.json'
model_results_repeatedCV = load_results(results_path_repeatedCV)
pred_results_repeatedCV = load_pred_results(results_path_repeatedCV)

In [13]:
def metrics_repeated(results):
    rows = []

    for feature_set, models in results.items():
        for model_name, data in models.items():

            rows.append({
                'Feature_Set': feature_set,
                'Model': model_name,

                # Repeated CV mean/stdev metrics
                'RepCV_Accuracy_Mean': data.get('rep_cv_accuracy_mean'),
                'RepCV_Accuracy_Std': data.get('rep_cv_accuracy_std'),

                'RepCV_Precision_Mean': data.get('rep_cv_precision_mean'),
                'RepCV_Precision_Std': data.get('rep_cv_precision_std'),

                'RepCV_Recall_Mean': data.get('rep_cv_recall_mean'),
                'RepCV_Recall_Std': data.get('rep_cv_recall_std'),

                'RepCV_F1_Mean': data.get('rep_cv_f1_mean'),
                'RepCV_F1_Std': data.get('rep_cv_f1_std'),

                'RepCV_Kappa_Mean': data.get('rep_cv_cohen_kappa_mean'),
                'RepCV_Kappa_Std': data.get('rep_cv_cohen_kappa_std'),
            })

    df = pd.DataFrame(rows)
    return df


# Build DataFrame
metrics_df_repeatedCV = metrics_repeated(model_results_repeatedCV)

# Save to CSV
metrics_df_repeatedCV.to_csv('../results_repeatedCV/model_results_repeatedCV.csv', index=False)

metrics_df_repeatedCV


Unnamed: 0,Feature_Set,Model,RepCV_Accuracy_Mean,RepCV_Accuracy_Std,RepCV_Precision_Mean,RepCV_Precision_Std,RepCV_Recall_Mean,RepCV_Recall_Std,RepCV_F1_Mean,RepCV_F1_Std,RepCV_Kappa_Mean,RepCV_Kappa_Std
0,F_fanger,ET,0.752051,0.020759,0.75382,0.020447,0.764254,0.019451,0.755743,0.02058,0.626779,0.031019
1,F_fanger,RF,0.760711,0.018866,0.762648,0.019255,0.771552,0.018069,0.765055,0.018745,0.639383,0.028306
2,F_fanger,SVC,0.700922,0.018596,0.701596,0.019324,0.718609,0.016931,0.700867,0.019483,0.552049,0.02738
3,F_selected,ET,0.823979,0.02374,0.83304,0.021636,0.824084,0.024586,0.827199,0.023378,0.732425,0.036427
4,F_selected,RF,0.822773,0.022079,0.830765,0.020365,0.82403,0.022786,0.826365,0.021749,0.730835,0.033832
5,F_selected,SVC,0.830239,0.02149,0.834983,0.019249,0.832147,0.022794,0.832876,0.021086,0.742582,0.032983
6,F_accessible,ET,0.825032,0.01668,0.828106,0.016419,0.830535,0.017286,0.828409,0.01663,0.735446,0.02532
7,F_accessible,RF,0.818367,0.015774,0.822639,0.016282,0.823098,0.016709,0.82174,0.015928,0.72516,0.024015
8,F_accessible,SVC,0.828095,0.020324,0.831812,0.01947,0.833005,0.020397,0.831414,0.019852,0.739988,0.030835
