In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import textwrap

In [8]:
preds = pd.read_feather('model_predictions.feather')
print(preds.shape)
preds.head()

(3737025, 7)


Unnamed: 0,model_name,true_weight,predicted_weight,set_type,additional_params,patient_id,pixel_array_file
0,vit_b_16_scaling_factor_pixel_spacing_slice_th...,61.0,75.226959,Train,,ANONYM-SMUXNZ_159,Scan_3774.npy
1,vit_b_16_scaling_factor_pixel_spacing_slice_th...,60.0,69.510887,Train,,ANONYM-GS4VD_115,Scan_4161.npy
2,vit_b_16_scaling_factor_pixel_spacing_slice_th...,52.0,72.877876,Train,,ANONYM-LWQW5V_143,Scan_343.npy
3,vit_b_16_scaling_factor_pixel_spacing_slice_th...,89.0,81.838478,Train,,ANONYM-XT8UJZ_117,Scan_1163.npy
4,vit_b_16_scaling_factor_pixel_spacing_slice_th...,85.0,84.898613,Train,,ANONYM-SMUXNZ_310,Scan_3925.npy


In [9]:
# drop duplicates
preds_clean = preds.drop_duplicates()
preds_clean.shape

(146550, 7)

In [10]:
# get only validation set
preds_clean = preds_clean[preds_clean['set_type'] == 'Validation']
preds_clean.shape

(29350, 7)

In [17]:
# get worst, mid and best models
worst = "resnet50_scaling_factor_sagittal_scaling"
mid = "vit_b_16_coronal"
best = "resnet50_scale_multiplied_scaling_factor_sagittal_scaling"

preds_filtered = preds_clean[preds_clean['model_name'].isin([worst, mid, best])]
preds_filtered.shape

(1761, 7)

In [22]:
# rename models
preds_filtered = preds_filtered.replace(worst, "Worst_2D")
preds_filtered = preds_filtered.replace(mid, "Mid_2D")
preds_filtered = preds_filtered.replace(best, "Best_2D")
preds_filtered

Unnamed: 0,model_name,true_weight,predicted_weight,set_type,additional_params,patient_id,pixel_array_file
81481,Worst_2D,82.0,96.614189,Validation,,ANONYM-W6HAEI_27,Scan_862.npy
81482,Worst_2D,99.0,128.024490,Validation,,ANONYM-SMUXNZ_108,Scan_3722.npy
81483,Worst_2D,70.0,91.631378,Validation,,ANONYM-B4PMUQ_125,Scan_3042.npy
81484,Worst_2D,105.0,133.372101,Validation,,ANONYM-6LL37I_386,Scan_4904.npy
81485,Worst_2D,50.0,81.763969,Validation,,ANONYM-5ZVPRM_131,Scan_1403.npy
...,...,...,...,...,...,...,...
615505,Best_2D,78.0,74.715340,Validation,,ANONYM-OCOPMB_369,Scan_759.npy
615506,Best_2D,70.0,71.003868,Validation,,ANONYM-6LL37I_182,Scan_4699.npy
615507,Best_2D,67.0,72.089722,Validation,,ANONYM-XT8UJZ_152,Scan_1199.npy
615508,Best_2D,93.0,88.111885,Validation,,ANONYM-6LL37I_16,Scan_4531.npy


In [23]:
# remove unnecessary columns
preds_filtered = preds_filtered.drop(columns=['set_type', 'additional_params', 'patient_id', 'pixel_array_file'])
preds_filtered

Unnamed: 0,model_name,true_weight,predicted_weight
81481,Worst_2D,82.0,96.614189
81482,Worst_2D,99.0,128.024490
81483,Worst_2D,70.0,91.631378
81484,Worst_2D,105.0,133.372101
81485,Worst_2D,50.0,81.763969
...,...,...,...
615505,Best_2D,78.0,74.715340
615506,Best_2D,70.0,71.003868
615507,Best_2D,67.0,72.089722
615508,Best_2D,93.0,88.111885


In [24]:
# rename columns to scheme
preds_filtered = preds_filtered.rename(columns={'model_name': 'Model', 'predicted_weight':'Prediction', 'true_weight': 'Actual'})
preds_filtered

Unnamed: 0,Model,Actual,Prediction
81481,Worst_2D,82.0,96.614189
81482,Worst_2D,99.0,128.024490
81483,Worst_2D,70.0,91.631378
81484,Worst_2D,105.0,133.372101
81485,Worst_2D,50.0,81.763969
...,...,...,...
615505,Best_2D,78.0,74.715340
615506,Best_2D,70.0,71.003868
615507,Best_2D,67.0,72.089722
615508,Best_2D,93.0,88.111885


In [25]:
# add deviation column
preds_filtered['Deviation'] = preds_filtered['Prediction'] - preds_filtered['Actual']
preds_filtered

Unnamed: 0,Model,Actual,Prediction,Deviation
81481,Worst_2D,82.0,96.614189,14.614189
81482,Worst_2D,99.0,128.024490,29.024490
81483,Worst_2D,70.0,91.631378,21.631378
81484,Worst_2D,105.0,133.372101,28.372101
81485,Worst_2D,50.0,81.763969,31.763969
...,...,...,...,...
615505,Best_2D,78.0,74.715340,-3.284660
615506,Best_2D,70.0,71.003868,1.003868
615507,Best_2D,67.0,72.089722,5.089722
615508,Best_2D,93.0,88.111885,-4.888115


In [26]:
# final df
final_df = preds_filtered[['Model', 'Prediction', 'Actual', 'Deviation']]
# reset index
final_df = final_df.reset_index(drop=True)
final_df

Unnamed: 0,Model,Prediction,Actual,Deviation
0,Worst_2D,96.614189,82.0,14.614189
1,Worst_2D,128.024490,99.0,29.024490
2,Worst_2D,91.631378,70.0,21.631378
3,Worst_2D,133.372101,105.0,28.372101
4,Worst_2D,81.763969,50.0,31.763969
...,...,...,...,...
1756,Best_2D,74.715340,78.0,-3.284660
1757,Best_2D,71.003868,70.0,1.003868
1758,Best_2D,72.089722,67.0,5.089722
1759,Best_2D,88.111885,93.0,-4.888115


In [27]:
# write to csv
final_df.to_csv('../preds_2d.csv', index=False)