In [36]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# import libraries

In [37]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler
from statsmodels.api import add_constant

sns.set()

# read data

In [38]:
df_results = pd.read_csv('results.csv', index_col=0)
df_clinical = pd.read_csv('clinical_results.csv', index_col=0)

In [39]:
df_results.head()

Unnamed: 0,fold,patient,dice,TP,TN,FP,FN,n_pred,n_ref,precision,recall,type,view,dilated_dice
0,fold_0,1525150.nii,0.0,0,8623817,0,183,0,183,0.0,0.0,normal,axial,0.0
1,fold_0,1837909.nii,0.009756,4,9877584,0,812,4,816,1.0,0.004902,normal,axial,0.023171
2,fold_0,1856568.nii,0.580206,2984,5374842,717,3601,3701,6585,0.806269,0.453151,normal,axial,0.715341
3,fold_0,324035.nii,0.0,0,4504974,231,395,231,395,0.0,0.0,normal,axial,0.0
4,fold_0,390232.nii,0.299465,616,5378646,772,2110,1388,2726,0.443804,0.225972,normal,axial,0.402042


# Preprocess data

In [41]:
df_clinical['Rutgeerts'] = df_clinical['Rutgeerts'].astype('str')

def convert(x):
    if x == 'nan':
        return float('nan')
    else:
        return float(x[-1])
        
df_clinical['Rutgeerts'] = df_clinical['Rutgeerts'].apply(lambda x: convert(x))

In [42]:
df_clinical = df_clinical.drop(['StudyDate', 'Data Colon', 'PCR data', 'Calc data'], axis=1)
df_clinical = df_clinical[['PatientID', 'delta_colon', 'Rutgeerts',
                           'SES CD', 'PCR', 'Calc', 'delta_pcr', 'delta_calc'
                        ]]

In [43]:
df_results['patient'] = df_results['patient'].apply(lambda x: int(x.split('.')[0]))
df_results

Unnamed: 0,fold,patient,dice,TP,TN,FP,FN,n_pred,n_ref,precision,recall,type,view,dilated_dice
0,fold_0,1525150,0.000000,0,8623817,0,183,0,183,0.000000,0.000000,normal,axial,0.000000
1,fold_0,1837909,0.009756,4,9877584,0,812,4,816,1.000000,0.004902,normal,axial,0.023171
2,fold_0,1856568,0.580206,2984,5374842,717,3601,3701,6585,0.806269,0.453151,normal,axial,0.715341
3,fold_0,324035,0.000000,0,4504974,231,395,231,395,0.000000,0.000000,normal,axial,0.000000
4,fold_0,390232,0.299465,616,5378646,772,2110,1388,2726,0.443804,0.225972,normal,axial,0.402042
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,fold_4,5283984,0.379966,1927,56991720,2409,3880,4336,5807,0.444419,0.331841,da,sagittal,0.500049
352,fold_4,928807,0.221811,360,7200954,1206,1320,1566,1680,0.229885,0.214286,da,sagittal,0.416513
353,fold_4,9288071,0.000000,0,56991489,0,8447,0,8447,0.000000,0.000000,da,sagittal,0.000000
354,fold_4,94628,0.249403,2296,56983820,394,13426,2690,15722,0.853532,0.146037,da,sagittal,0.382142


In [44]:
df_join = df_clinical.merge(df_results, left_on='PatientID', right_on='patient')
df_join

Unnamed: 0,PatientID,delta_colon,Rutgeerts,SES CD,PCR,Calc,delta_pcr,delta_calc,fold,patient,...,TN,FP,FN,n_pred,n_ref,precision,recall,type,view,dilated_dice
0,9571,-3.0,,,,,,,fold_4,9571,...,5605318,408,674,408,674,0.000000,0.000000,normal,axial,0.000000
1,9571,-3.0,,,,,,,fold_4,9571,...,5912564,255,574,462,781,0.448052,0.265045,normal,coronal,0.768302
2,9571,-3.0,,,,,,,fold_4,9571,...,5166235,0,869,0,869,0.000000,0.000000,normal,sagittal,0.000000
3,9571,-3.0,,,,,,,fold_4,9571,...,5605430,296,674,296,674,0.000000,0.000000,da,axial,0.000000
4,9571,-3.0,,,,,,,fold_4,9571,...,5912377,442,484,739,781,0.401894,0.380282,da,coronal,0.765789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,1412113,2.0,4.0,,,,,,fold_4,1412113,...,7562704,262,714,614,1066,0.573290,0.330206,normal,coronal,0.685119
346,1412113,2.0,4.0,,,,,,fold_4,1412113,...,8559803,181,1230,247,1296,0.267206,0.050926,normal,sagittal,0.238496
347,1412113,2.0,4.0,,,,,,fold_4,1412113,...,5137397,17,1648,155,1786,0.890323,0.077268,da,axial,0.259660
348,1412113,2.0,4.0,,,,,,fold_4,1412113,...,7561009,1957,817,2206,1066,0.112874,0.233583,da,coronal,0.294315


In [45]:
for metric in ['dice','dilated_dice']:
    for train_type in ['normal', 'da']:
        for view in ['axial', 'coronal', 'sagittal']:
                for clinical_metric in ['Rutgeerts', 'SES CD']:

                    temp = df_join[(df_join.view == view) & (df_join.type == train_type)][['patient', metric, 'n_ref', 'PCR', 'Calc', clinical_metric]]
                    temp = temp.dropna()

                    y = temp[[metric]]
                    x = temp[['n_ref', clinical_metric, 'PCR', 'Calc']]


                    scaler = MinMaxScaler()
                    scaler.fit(x)
                    x = pd.DataFrame(scaler.transform(x), columns=x.columns)

                    y = y.reset_index(drop=True)

                    model = sm.OLS(y, x).fit()

                    print(f'metric: {metric}')
                    print(f'view: {view}')
                    print(f'train type: {train_type}')
                    print(f'clinical metric: {clinical_metric}')

                    print()
                    print(model.summary())
                    print()


metric: dice
view: axial
train type: normal
clinical metric: Rutgeerts

                                 OLS Regression Results                                
Dep. Variable:                   dice   R-squared (uncentered):                   0.812
Model:                            OLS   Adj. R-squared (uncentered):              0.737
Method:                 Least Squares   F-statistic:                              10.83
Date:                Sun, 18 Feb 2024   Prob (F-statistic):                     0.00117
Time:                        16:06:55   Log-Likelihood:                          2.5054
No. Observations:                  14   AIC:                                      2.989
Df Residuals:                      10   BIC:                                      5.545
Df Model:                           4                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t     

