In [3]:
import pandas as pd
import numpy as np

In [4]:
# if validation == 1, sort new dataframe by abs(obsv-pred) from highest to lowest
def analyze_validation_predictions(df):
    """
    Analyzes the difference between predicted and actual LOES scores for validation data.
    
    Parameters:
    df (pandas.DataFrame): DataFrame for model results .csv
    
    Returns:
    pandas.DataFrame: A new DataFrame containing only validation data (validation=1),
                     with columns for anonymized_subject_id, anonymized_session_id, scan,
                     loes-score, predicted_loes_score, and the difference between scores,
                     sorted by the absolute difference in descending order.
    """
    # Filter rows where validation equals 1
    validation_df = df[df['validation'] == 1].copy()
    
    # Calculate the difference between predicted and actual LOES scores
    validation_df['difference'] = validation_df['loes-score'] - validation_df['predicted_loes_score']
    
    # Select only the columns of interest
    result_df = validation_df[['anonymized_subject_id', 'anonymized_session_id', 'scan', 
                              'loes-score', 'predicted_loes_score', 'difference']]
    
    # Sort by the absolute value of the difference in descending order
    result_df = result_df.reindex(result_df['difference'].abs().sort_values(ascending=False).index)
    
    return result_df

In [5]:
# load in the model results
model17_results = pd.read_csv('./model17/model17.csv')
model18_results = pd.read_csv('./model18/model18.csv')
model19_results = pd.read_csv('./model19/model19.csv')
model20_results = pd.read_csv('./model20/model20.csv')
model21_results = pd.read_csv('./model21/model21.csv')

In [6]:
# for each model, analyze the validation predictions
for model, results in zip(['model17', 'model18', 'model19', 'model20', 'model21'], 
                          [model17_results, model18_results, model19_results, model20_results, model21_results]):
    print(f"Analyzing {model} predictions...")
    result_df = analyze_validation_predictions(results)
    result_df.to_csv(f'./{model}/{model}_validation_predictions.csv', index=False)
    print(f"Saved {model}_validation_predictions.csv to {model} folder\n")

Analyzing model17 predictions...
Saved model17_validation_predictions.csv to model17 folder

Analyzing model18 predictions...
Saved model18_validation_predictions.csv to model18 folder

Analyzing model19 predictions...
Saved model19_validation_predictions.csv to model19 folder

Analyzing model20 predictions...
Saved model20_validation_predictions.csv to model20 folder

Analyzing model21 predictions...
Saved model21_validation_predictions.csv to model21 folder

