In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import textwrap

In [4]:
preds = pd.read_feather('model_predictions.feather')
print(preds.shape)
preds.head()

(43965, 7)


Unnamed: 0,model_name,true_weight,predicted_weight,set_type,additional_params,patient_id,pixel_array_file
0,resnet_18_best,61.0,73.903252,Train,,ANONYM-SMUXNZ_159,Scan_3774.npy
1,resnet_18_best,60.0,74.511299,Train,,ANONYM-GS4VD_115,Scan_4161.npy
2,resnet_18_best,52.0,67.541008,Train,,ANONYM-LWQW5V_143,Scan_343.npy
3,resnet_18_best,89.0,83.687981,Train,,ANONYM-XT8UJZ_117,Scan_1163.npy
4,resnet_18_best,85.0,93.206604,Train,,ANONYM-SMUXNZ_310,Scan_3925.npy


In [5]:
# drop duplicates
preds_clean = preds.drop_duplicates()
preds_clean.shape

(14655, 7)

In [6]:
# get only validation set
preds_clean = preds_clean[preds_clean['set_type'] == 'Validation']
preds_clean.shape

(2935, 7)

In [14]:
# get worst, mid and best models
worst = "resnet_50_scratch_best"
mid = "resnet_18_best"
best = "resnet_50_pre_best"

preds_filtered = preds_clean[preds_clean['model_name'].isin([worst, mid, best])]
preds_filtered.shape

(1761, 7)

In [15]:
# rename models
preds_filtered = preds_filtered.replace(worst, "Worst_3D")
preds_filtered = preds_filtered.replace(mid, "Mid_3D")
preds_filtered = preds_filtered.replace(best, "Best_3D")
preds_filtered

Unnamed: 0,model_name,true_weight,predicted_weight,set_type,additional_params,patient_id,pixel_array_file
2344,Mid_3D,82.0,88.795021,Validation,,ANONYM-W6HAEI_27,Scan_862.npy
2345,Mid_3D,99.0,98.479301,Validation,,ANONYM-SMUXNZ_108,Scan_3722.npy
2346,Mid_3D,70.0,65.589607,Validation,,ANONYM-B4PMUQ_125,Scan_3042.npy
2347,Mid_3D,105.0,97.008263,Validation,,ANONYM-6LL37I_386,Scan_4904.npy
2348,Mid_3D,50.0,87.231682,Validation,,ANONYM-5ZVPRM_131,Scan_1403.npy
...,...,...,...,...,...,...,...
43960,Best_3D,78.0,71.526482,Validation,,ANONYM-OCOPMB_369,Scan_759.npy
43961,Best_3D,70.0,80.157043,Validation,,ANONYM-6LL37I_182,Scan_4699.npy
43962,Best_3D,67.0,68.332962,Validation,,ANONYM-XT8UJZ_152,Scan_1199.npy
43963,Best_3D,93.0,85.626701,Validation,,ANONYM-6LL37I_16,Scan_4531.npy


In [16]:
# remove unnecessary columns
preds_filtered = preds_filtered.drop(columns=['set_type', 'additional_params', 'patient_id', 'pixel_array_file'])
preds_filtered

Unnamed: 0,model_name,true_weight,predicted_weight
2344,Mid_3D,82.0,88.795021
2345,Mid_3D,99.0,98.479301
2346,Mid_3D,70.0,65.589607
2347,Mid_3D,105.0,97.008263
2348,Mid_3D,50.0,87.231682
...,...,...,...
43960,Best_3D,78.0,71.526482
43961,Best_3D,70.0,80.157043
43962,Best_3D,67.0,68.332962
43963,Best_3D,93.0,85.626701


In [17]:
# rename columns to scheme
preds_filtered = preds_filtered.rename(columns={'model_name': 'Model', 'predicted_weight':'Prediction', 'true_weight': 'Actual'})
preds_filtered

Unnamed: 0,Model,Actual,Prediction
2344,Mid_3D,82.0,88.795021
2345,Mid_3D,99.0,98.479301
2346,Mid_3D,70.0,65.589607
2347,Mid_3D,105.0,97.008263
2348,Mid_3D,50.0,87.231682
...,...,...,...
43960,Best_3D,78.0,71.526482
43961,Best_3D,70.0,80.157043
43962,Best_3D,67.0,68.332962
43963,Best_3D,93.0,85.626701


In [18]:
# add deviation column
preds_filtered['Deviation'] = preds_filtered['Prediction'] - preds_filtered['Actual']
preds_filtered

Unnamed: 0,Model,Actual,Prediction,Deviation
2344,Mid_3D,82.0,88.795021,6.795021
2345,Mid_3D,99.0,98.479301,-0.520699
2346,Mid_3D,70.0,65.589607,-4.410393
2347,Mid_3D,105.0,97.008263,-7.991737
2348,Mid_3D,50.0,87.231682,37.231682
...,...,...,...,...
43960,Best_3D,78.0,71.526482,-6.473518
43961,Best_3D,70.0,80.157043,10.157043
43962,Best_3D,67.0,68.332962,1.332962
43963,Best_3D,93.0,85.626701,-7.373299


In [19]:
# final df
final_df = preds_filtered[['Model', 'Prediction', 'Actual', 'Deviation']]
# reset index
final_df = final_df.reset_index(drop=True)
final_df

Unnamed: 0,Model,Prediction,Actual,Deviation
0,Mid_3D,88.795021,82.0,6.795021
1,Mid_3D,98.479301,99.0,-0.520699
2,Mid_3D,65.589607,70.0,-4.410393
3,Mid_3D,97.008263,105.0,-7.991737
4,Mid_3D,87.231682,50.0,37.231682
...,...,...,...,...
1756,Best_3D,71.526482,78.0,-6.473518
1757,Best_3D,80.157043,70.0,10.157043
1758,Best_3D,68.332962,67.0,1.332962
1759,Best_3D,85.626701,93.0,-7.373299


In [20]:
# write to csv
final_df.to_csv('../preds_3d.csv', index=False)