## Model results

### Import libraries

In [None]:
import pandas as pd
import os
from IPython.display import display
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
import numpy as np
from tqdm import tqdm
import seaborn as sns
import pygwalker as pyg
from matplotlib import pyplot as plt
import warnings

### Load results

In [None]:
# Path to results
results_path = '/home/juandres/aml/CheXBias/data/processed/'

# Choose model 
model_group = 'densenet121'

# Get all trained models
models = os.listdir(os.path.join(results_path,model_group))

### Ground truth

In [None]:
df_gt = pd.read_csv('/home/juandres/aml/CheXBias/data/raw/CheXpert-v1.0/train_VisualCheXbert.csv')

### Get Results

In [None]:
# General df with results
results_per_class = pd.DataFrame()

# Iterate over all models
for model in tqdm(models): #['epochs_5_subsample_2_sexproportion_6040_noresch.csv']:

    # Load model results
    df_model = pd.read_csv(os.path.join(results_path,model_group,model))

    # Get classes
    classes = df_model.columns[1:]

    # Get proportion of female data

    proportion_input = model.split('_')[5]
    if proportion_input == '0100':
        proportion = 100
    elif proportion_input == '1000':
        proportion = 0
    else:
        proportion = 100 - int(proportion_input[:2])

    # Get metrics per class
    metrics = {}

    for class_name in classes:
        true_labels = df_gt[class_name]
        predicted_labels = df_model[class_name]
        
        accuracy = accuracy_score(true_labels, predicted_labels)
        precision = precision_score(true_labels, predicted_labels)
        recall = recall_score(true_labels, predicted_labels)
        f1 = f1_score(true_labels, predicted_labels)
        
        metrics[class_name] = {
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1': f1,
            'Female Percentage' :  int(proportion)
        }

    # Create a DataFrame from the metrics dictionary
    metrics_per_class_df = pd.DataFrame(metrics).T
    
    # Concatenate results
    results_per_class = pd.concat([results_per_class, metrics_per_class_df], axis=0)

results_per_class = results_per_class.reset_index()
results_per_class['Female Percentage'] = results_per_class['Female Percentage'].astype(int)

In [None]:
results_per_class

### General Results

In [None]:

sns.set(style="darkgrid")

warnings.filterwarnings("ignore", "is_categorical_dtype")
warnings.filterwarnings("ignore", "use_inf_as_na")

fig, ax = plt.subplots(2,2,figsize = (10,8))

sns.boxplot(data = results_per_class,x = 'Female Percentage',y = 'Accuracy',ax=ax[0,0], palette="crest")
sns.boxplot(data = results_per_class,x = 'Female Percentage',y = 'Precision',ax=ax[0,1],  palette="crest")
sns.boxplot(data = results_per_class,x = 'Female Percentage',y = 'Recall',ax=ax[1,0],  palette="crest")
sns.boxplot(data = results_per_class,x = 'Female Percentage',y = 'F1',ax=ax[1,1],  palette="crest")



In [None]:
# Define marker styles for each class
marker_styles = ['o', 's', 'D', '^', 'v', '<', '>', 'p', '*', 'H', 'X', 'd']

# Create a 2x2 grid of subplots
fig, axes = plt.subplots(2, 2, figsize=(8, 8))

# Define the classes for the subplots
classes = ['Accuracy', 'Precision', 'Recall', 'F1']

# Loop through the classes and corresponding subplots
for class_name, ax in zip(classes, axes.flat):
    # Create point plot for the current class
    sns.pointplot(data=results_per_class, x='Female Percentage', y=class_name, hue='index', dodge=True, palette='crest', markers=marker_styles, linestyles='-', ax=ax)
    
    # Remove legend for individual subplots
    ax.legend_.remove()

# Create a single legend for all subplots and place it at the outer left
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, bbox_to_anchor=(1.05, 1), loc='upper left')


# Adjust layout to prevent overlapping
plt.tight_layout()

# Show the plot
plt.show()


#### Results per Image

In [None]:
def calculate_metrics(y_pred, y):
    # Accuracy
    accuracy = np.mean(y_pred == y)

    # True Positives, False Positives, True Negatives, False Negatives
    tp = np.sum(np.logical_and(y_pred == 1, y == 1))
    fp = np.sum(np.logical_and(y_pred == 1, y == 0))
    tn = np.sum(np.logical_and(y_pred == 0, y == 0))
    fn = np.sum(np.logical_and(y_pred == 0, y == 1))

    # Precision, Recall, F1 Score
    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    
    return accuracy, precision, recall, f1

In [None]:
len(df_model)*len(models)

In [None]:
# General df with results
results_per_class = pd.DataFrame()
all_df = pd.DataFrame()
all_metrics = []

df_gt = df_gt.sort_values(by='Path')
df_gt = df_gt.reset_index(drop=True)

# Iterate over all models
for num_model,model in tqdm(enumerate(models)): #['epochs_5_subsample_2_sexproportion_6040_noresch.csv']:

    # Get proportion of female data
    proportion_input = model.split('_')[5]
    if proportion_input == '0100':
        proportion = 100
    elif proportion_input == '1000':
        proportion = 0
    else:
        proportion = 100 - int(proportion_input[:2])

    # Load model results
    df_model = pd.read_csv(os.path.join(results_path,model_group,model))   

    # Get classes
    classes = df_model.columns[1:] 

    # Save df in all_df
    df_model['Proportion'] = proportion    
    
    # Organize dataframes
    df_model = df_model.sort_values(by='Path')
    df_model = df_model.reset_index(drop=True)
    
    # Add to GT other features
    df_model['Sex'] = df_gt['Sex']
    df_model['Frontal/Lateral'] = df_gt['Frontal/Lateral']
    df_model['AP/PA'] = df_gt['AP/PA']
    df_model['Age'] = df_gt['Age']

    # Add df
    all_df = pd.concat([all_df, df_model], axis=0)        

    # Convert to matrix
    gt_matrix = np.array(df_gt[classes].astype(int))
    model_matrix = np.array(df_model1[classes].astype(int))    

    metrics = np.zeros((gt_matrix.shape[0],4))

    for i in range(gt_matrix.shape[0]):
        metrics[i,0], metrics[i,1], metrics[i,2], metrics[i,3] = calculate_metrics(model_matrix[i],gt_matrix[i])

    all_metrics.append(metrics)
    
concatenated_matrix = np.vstack(all_metrics)
for i,metric in enumerate(metrics_names):
    all_df[metric] = concatenated_matrix[:,i]

In [None]:
all_df

In [None]:
# Define the age ranges and labels (you can adjust the step size as needed)

fig, ax = plt.subplots(2, 2, figsize=(10, 10))
age_step = 5  # or 10 for 10-year age groups
age_ranges = range(0, max(all_df['Age']) + age_step, age_step)
age_labels = [f"{start}-{start + age_step - 1}" for start in age_ranges[:-1]]

# Create a new column in the DataFrame to store the age groups
all_df['Age Group'] = pd.cut(all_df['Age'], bins=age_ranges, labels=age_labels, right=False)

# Plot Accuracy with rotated x-axis labels
sns.barplot(data=all_df[all_df['Proportion'] == 50], x='Age Group', y='Accuracy', ax=ax[0, 0], palette='crest')
ax[0, 0].set_title('Accuracy')
ax[0, 0].set_xticklabels(ax[0, 0].get_xticklabels(), rotation=45, ha='right')

# Plot Precision with rotated x-axis labels
sns.barplot(data=all_df[all_df['Proportion'] == 50], x='Age Group', y='Precission', ax=ax[0, 1], palette='crest')
ax[0, 1].set_title('Precission')
ax[0, 1].set_xticklabels(ax[0, 1].get_xticklabels(), rotation=45, ha='right')

# Plot Recall with rotated x-axis labels
sns.barplot(data=all_df[all_df['Proportion'] == 50], x='Age Group', y='Recall', ax=ax[1, 0], palette='crest')
ax[1, 0].set_title('Recall')
ax[1, 0].set_xticklabels(ax[1, 0].get_xticklabels(), rotation=45, ha='right')

# Plot F1 Score with rotated x-axis labels
sns.barplot(data=all_df[all_df['Proportion'] == 50], x='Age Group', y='F1 Score', ax=ax[1, 1], palette='crest')
ax[1, 1].set_title('F1 Score')
ax[1, 1].set_xticklabels(ax[1, 1].get_xticklabels(), rotation=45, ha='right')

plt.suptitle('Metrics vs Age Group - Proportion 50/50')

# Adjust layout
plt.tight_layout()

# Show the plots
plt.show()



In [None]:
all_df.groupby('Age').count()

In [None]:
sns.histplot(data= all_df, x = 'Age', bins=20).set_title('Age Distribution')

In [None]:
# walker = pyg.walk(df_gt)