In [13]:
import pandas as pd
import numpy as np

In [14]:
model15_results = pd.read_csv("./model15/model15.csv")
model14_results = pd.read_csv("./model14/model14_512.csv")

In [15]:
# if validation == 1, sort new dataframe by abs(obsv-pred) from highest to lowest
def analyze_validation_predictions(df):
    """
    Analyzes the difference between predicted and actual LOES scores for validation data.
    
    Parameters:
    df (pandas.DataFrame): DataFrame for model results .csv
    
    Returns:
    pandas.DataFrame: A new DataFrame containing only validation data (validation=1),
                     with columns for anonymized_subject_id, anonymized_session_id, scan,
                     loes-score, predicted_loes_score, and the difference between scores,
                     sorted by the absolute difference in descending order.
    """
    # Filter rows where validation equals 1
    validation_df = df[df['validation'] == 1].copy()
    
    # Calculate the difference between predicted and actual LOES scores
    validation_df['difference'] = validation_df['loes-score'] - validation_df['predicted_loes_score']
    
    # Select only the columns of interest
    result_df = validation_df[['anonymized_subject_id', 'anonymized_session_id', 'scan', 
                              'loes-score', 'predicted_loes_score', 'difference']]
    
    # Sort by the absolute value of the difference in descending order
    result_df = result_df.reindex(result_df['difference'].abs().sort_values(ascending=False).index)
    
    return result_df

In [16]:
model15_diff_df = analyze_validation_predictions(model15_results)
model14_diff_df = analyze_validation_predictions(model14_results)

In [17]:
model15_diff_df.to_csv('./model15/model15_high_diff.csv', index=False)
model14_diff_df.to_csv('./model14/model14_512_high_diff.csv', index=False)