In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd /content/drive/MyDrive

/content/drive/MyDrive


In [None]:
original_file='/content/drive/MyDrive/knight_challenegs2022/knight.json'
import json
with open(original_file) as f:
        clinical_data = json.load(f)

In [None]:
!pip install SimpleITK

Collecting SimpleITK
  Downloading SimpleITK-2.1.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (48.4 MB)
[K     |████████████████████████████████| 48.4 MB 2.5 MB/s 
[?25hInstalling collected packages: SimpleITK
Successfully installed SimpleITK-2.1.1


In [None]:
import os
import json
import pandas as pd
import numpy as np

CLINICAL_NAMES = ['SubjectId', 
                  'age', 
                  'bmi', 
                  'gender', 
                  'gender_num', 
                  'comorbidities', 
                  'smoking_history', 
                  'radiographic_size', 
                  'preop_egfr',
                  #'pathology_t_stage', 
                  #'pathology_n_stage', 
                  #'pathology_m_stage',
                  #'age_when_quit_smoking',
                  'alcohol_use',
                  'chewing_tobacco_use',
                  #'pack_years',
                  'x_spacing','y_spacing','z_spacing',
                  'aua_risk_group', 
                  'task_1_label', 
                  'task_2_label' ]

def create_knight_clinical(original_file, processed_file=None):
    with open(original_file) as f:
        clinical_data = json.load(f)
        print(clinical_data)
    t_stage_count = np.zeros((5))
    aua_risk_count = np.zeros((5))
    df = pd.DataFrame(columns=CLINICAL_NAMES)
    for index, patient in enumerate(clinical_data):
        df.loc[index, 'SubjectId'] = patient['case_id']
        df.loc[index, 'age'] = patient['age_at_nephrectomy']
        df.loc[index, 'bmi'] = patient['body_mass_index']

        df.loc[index, 'gender'] = patient['gender']
        if patient['gender'] == 'male':    # 0:'male'  1:'female','transgender_male_to_female'
            df.loc[index, 'gender_num'] = 0
        else:
            df.loc[index, 'gender_num'] = 1

        df.loc[index, 'comorbidities'] = 0    # 0:no_comorbidities 1:comorbidities_exist
        for key, value in patient['comorbidities'].items():
            if value:
                df.loc[index, 'comorbidities'] = 1

        df.loc[index, 'smoking_history'] = patient['smoking_history']
        if patient['smoking_history']=='never_smoked':    # 0:'never_smoked' 1:'previous_smoker'  2:'current_smoker'
            df.loc[index, 'smoking_history'] = 0
        elif patient['smoking_history'] == 'previous_smoker':
            df.loc[index, 'smoking_history'] = 1
        elif patient['smoking_history'] == 'current_smoker':
            df.loc[index, 'smoking_history'] = 2

        df.loc[index, 'radiographic_size'] = patient['radiographic_size']
        
        # if patient['last_preop_egfr']['value'] == '>=90':
        #     df.loc[index, 'preop_egfr'] = 90
        
        # else:
        #     df.loc[index, 'preop_egfr'] = patient['last_preop_egfr']['value']
        
        #df.loc[index, 'preop_egfr'] = patient['last_preop_egfr']['value']

        #df.loc[index, 'pathology_t_stage'] = patient['pathology_t_stage']
        #df.loc[index, 'pathology_n_stage'] = patient['pathology_n_stage']
        #df.loc[index, 'pathology_m_stage'] = patient['pathology_m_stage']
        # df.loc[index, 'aua_risk_group'] = patient['aua_risk_group']
       
        # Task 1 labels:
        if patient['aua_risk_group'] in ['high_risk', 'very_high_risk']:    # 1:'3','4'  0:'0','1a','1b','2a','2b'
            df.loc[index, 'task_1_label'] = 1 # CanAT
        else:
            df.loc[index, 'task_1_label'] = 0 # NoAT

        # Task 2 labels:
        if patient['aua_risk_group']=='benign':
            df.loc[index, 'task_2_label'] = 0 
        elif patient['aua_risk_group']=='low_risk':
            df.loc[index, 'task_2_label'] = 1
        elif  patient['aua_risk_group']=='intermediate_risk':
            df.loc[index, 'task_2_label'] = 2
        elif patient['aua_risk_group']=='high_risk':
            df.loc[index, 'task_2_label'] = 3
        elif patient['aua_risk_group']=='very_high_risk':
            df.loc[index, 'task_2_label'] = 4
        else:
            ValueError('Wrong risk class')

        # former classification - deprecated
        #if patient['pathology_t_stage'] in ['3', '4']:    # 1:'3','4'  0:'0','1a','1b','2a','2b'
        #    df.loc[index, 'pathology_t_stage_classify'] = 1
        #else:
        #    df.loc[index, 'pathology_t_stage_classify'] = 0
        #t_stage = int(patient['pathology_t_stage'][0])
        #t_stage_count[t_stage] += 1
        aua_risk = int(df.loc[index, 'task_2_label'])
        aua_risk_count[aua_risk] += 1
        #df.loc[index, 'grade'] = patient['tumor_isup_grade']
        ##### new parameters
        #df.loc[index,'alcohol_use']=patient['alcohol_use']
        if patient['alcohol_use']=='two_or_less_daily':
            df.loc[index, 'alcohol_use'] = 1
        elif patient['alcohol_use']=='never_or_not_in_last_3mo':
            df.loc[index, 'alcohol_use'] = 2
        elif patient['alcohol_use']=='more_than_two_daily':
            df.loc[index, 'alcohol_use'] = 3
         
        #df.loc[index,'pack_years']=patient['pack_years']
        #df.loc[index,'age_when_quit_smoking']=patient['age_when_quit_smoking']
        if patient['chewing_tobacco_use']=='never_or_not_in_last_3mo':
            df.loc[index, 'chewing_tobacco_use'] = 1
        else:
            df.loc[index, 'chewing_tobacco_use'] = 0

        #df.loc[index,'chewing_tobacco_use']=patient['chewing_tobacco_use']
        #age_when_quit_smoking
        
        if patient["last_preop_egfr"] is None:
            df.loc[index, 'preop_egfr'] = 0
        #print('yes')
        elif patient['last_preop_egfr']['value']== '>=90':
            df.loc[index, 'preop_egfr'] = 90
        #df.loc[index, 'preop_egfr'] = 0
        else :
            df.loc[index, 'preop_egfr'] = patient['last_preop_egfr']['value']
        df.loc[index, 'x_spacing'] = patient['voxel_spacing']['x_spacing']
        df.loc[index, 'y_spacing'] = patient['voxel_spacing']['y_spacing']
        df.loc[index, 'z_spacing'] = patient['voxel_spacing']['z_spacing']
    # else:

    if processed_file is not None:
        # save csv file
        df.to_csv(processed_file, index=False)
        df = df.drop(['gender', 'pathology_t_stage', 'pathology_n_stage', 'pathology_m_stage'], axis=1)
        df.to_csv(os.path.splitext(processed_file)[0] + '_numeric.csv' , index=False)
    print(f'Pathology t-stage count summary: {t_stage_count}')
    print(f'AUA risk count summary: {aua_risk_count}')
    return df


original_file='/content/drive/MyDrive/knight_challenegs2022/knight.json'

df=create_knight_clinical(original_file, processed_file=None)

dd=df['chewing_tobacco_use'].unique()

print(df['chewing_tobacco_use'].unique())

[{'case_id': 'case_00000', 'age_at_nephrectomy': 49, 'gender': 'male', 'body_mass_index': 29.47, 'comorbidities': {'myocardial_infarction': False, 'congestive_heart_failure': False, 'peripheral_vascular_disease': False, 'cerebrovascular_disease': False, 'dementia': False, 'copd': False, 'connective_tissue_disease': False, 'peptic_ulcer_disease': False, 'uncomplicated_diabetes_mellitus': False, 'diabetes_mellitus_with_end_organ_damage': False, 'chronic_kidney_disease': False, 'hemiplegia_from_stroke': False, 'leukemia': False, 'malignant_lymphoma': False, 'localized_solid_tumor': False, 'metastatic_solid_tumor': False, 'mild_liver_disease': False, 'moderate_to_severe_liver_disease': False, 'aids': False}, 'smoking_history': 'never_smoked', 'age_when_quit_smoking': 'not_applicable', 'pack_years': 0, 'chewing_tobacco_use': 'never_or_not_in_last_3mo', 'alcohol_use': 'two_or_less_daily', 'intraoperative_complications': {'blood_transfusion': False, 'injury_to_surrounding_organ': False, 'card

In [None]:
#preop_egfr
#df['preop_egfr'].unique()
df['preop_egfr']=df['preop_egfr'].fillna(77)
df['alcohol_use']=df['alcohol_use'].fillna(0)
#df['pack_years']=df['pack_years'].fillna(0)
#df['age_when_quit_smoking']=df['age_when_quit_smoking'].fillna(0)
print(df['preop_egfr'].unique())
#print(df['age_when_quit_smoking'].unique())
print(df['alcohol_use'].unique())
#print(df['pack_years'].unique())
print(df['radiographic_size'].unique())
print(df['smoking_history'].unique())
print(df['bmi'].unique())
print(df['age'].unique())

In [None]:
print(df['preop_egfr'].unique())
#print(df['age_when_quit_smoking'].unique())
print(df['alcohol_use'].unique())
#print(df['pack_years'].unique())
print(df['radiographic_size'].unique())
print(df['smoking_history'].unique())
print(df['bmi'].unique())
print(df['age'].unique())

[77. 67. 86. 90. 60. 87. 84. 66. 71. 42. 80. 73. 55. 72. 82. 38. 69. 75.
 89. 76. 68. 88. 61. 41. 63. 62. 65. 48. 70. 78. 85. 49. 45. 74. 17. 47.
 54. 64. 56. 53. 50. 59. 46. 81. 57. 58. 83. 44. 79. 52. 51.]
[1 2 3 0]
[2.4 2.2 4.5 3.0 4.9 3.8 10.6 2.7 3.5 4.6 9.59 4.2 2.1 3.4 5.0 2.3 2.5 1.2
 4.0 5.7 10.4 9.2 1.8 9.19 5.4 15.0 2.6 4.8 5.9 10.1 5.5 1.6 14.1 7.2 7.8
 4.3 1.5 4.4 2.0 13.1 6.4 7.07 13.9 3.3 4.7 9.8 11.4 3.2 1.9 13.2 9.4 3.6
 6.1 7.4 4.1 3.7 6.0 9.0 10.0 6.3 16.2 2.8 5.6 7.7 1.7 5.1 11.5 1.66 3.48
 12.7 8.7 3.26 3.9 1.4 6.5 10.9 12.5 3.1 9.1 9.3 12.0 10.3 6.7 5.2 8.8
 14.2 8.1 10.2 15.9 11.8 13.3 8.0 5.86 2.9 6.6 11.0 12.1 6.9 1.3 7.6 7.3]
[0 1 2]
[29.47 33.71 28.13 38.69 28.59 35.96 24.42 37.01 26.82 22.9 33.13 31.47
 30.92 22.86 29.43 24.29 31.61 31.67 35.65 36.4 28.34 36.29 30.69 34.53
 32.21 33.5 37.4 27.1 23.4 28.86 21.87 32.61 29.53 39.68 31.39 29.05 31.7
 23.69 23.26 24.3 26.14 44.78 24.39 28.0 25.87 37.21 30.96 39.28 32.92
 25.32 33.28 40.6 29.7 37.84 48.42 26.19 37

In [None]:
df

Unnamed: 0,SubjectId,age,bmi,gender,gender_num,comorbidities,smoking_history,radiographic_size,preop_egfr,alcohol_use,chewing_tobacco_use,x_spacing,y_spacing,z_spacing,aua_risk_group,task_1_label,task_2_label
0,case_00000,49,29.47,male,0,0,0,2.4,77.0,1,1,0.919922,0.919922,0.5,,0,2
1,case_00001,50,33.71,male,0,0,1,2.2,67.0,1,1,0.798828,0.798828,0.5,,0,1
2,case_00002,74,28.13,male,0,1,2,4.5,77.0,2,1,0.939453,0.939453,1.0,,1,3
3,case_00003,44,38.69,female,1,0,1,3.0,86.0,1,1,0.855469,0.855469,1.0,,0,1
4,case_00004,73,28.59,male,0,1,1,3.0,77.0,2,1,0.976562,0.976562,4.0,,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,case_00295,48,43.9,male,0,0,2,3.3,87.0,1,1,0.919922,0.919922,0.5,,0,1
296,case_00296,51,17.8,female,1,0,1,2.8,90.0,2,1,0.621094,0.621094,5.0,,0,1
297,case_00297,64,23.37,male,0,1,1,4.2,50.0,2,1,0.74,0.74,5.0,,0,2
298,case_00298,40,22.45,male,0,0,1,5.9,77.0,1,1,0.734375,0.734375,0.5,,1,3


In [None]:
df.to_csv('updated_training_features.csv',index=False)

In [None]:
cd /content/drive/MyDrive/knight_challenegs2022

/content/drive/MyDrive/knight_challenegs2022


In [None]:
import os
import json
import pandas as pd
import numpy as np

#CLINICAL_NAMES = ['SubjectId', 'age', 'bmi', 'gender', 'gender_num', 'comorbidities', 'smoking_history', 'radiographic_size', 'preop_egfr',
                  #'pathology_t_stage', 'pathology_n_stage', 'pathology_m_stage','age_when_quit_smoking','pack_years', 'grade', 'aua_risk_group', 'task_1_label', 'task_2_label']

#"age_at_nephrectomy"
#"gender"
#"body_mass_index"
#"comorbidities"
#"smoking_history"
#"age_when_quit_smoking"
#"pack_years"
#"chewing_tobacco_use"
#"alcohol_use"
#"last_preop_egfr"
#"radiographic_size"
#"voxel_spacing"

CLINICAL_NAMES = ['SubjectId', 
                  'age', 
                  'bmi', 
                  'gender', 
                  'gender_num', 
                  'comorbidities', 
                  'smoking_history', 
                  'radiographic_size', 
                  'preop_egfr',
                  #'pathology_t_stage', 
                  #'pathology_n_stage', 
                  #'pathology_m_stage',
                  #'age_when_quit_smoking',
                  'alcohol_use',
                  'chewing_tobacco_use',
                  #'pack_years',
                  'x_spacing','y_spacing','z_spacing']
                  #'aua_risk_group', 
                  #'task_1_label', 
                  #'task_2_label' ]
                  #'grade', 
                  #'aua_risk_group', 'task_1_label', 'task_2_label']

# "age_at_nephrectomy"
# "gender"
# "body_mass_index"
# "comorbidities"
# "smoking_history"
# "age_when_quit_smoking"
# "pack_years"
# "chewing_tobacco_use"
# "alcohol_use"
# "last_preop_egfr"
# "radiographic_size"
# "voxel_spacing"


def create_knight_clinical(original_file, processed_file=None):
    with open(original_file) as f:
        clinical_data = json.load(f)
        print(clinical_data)
    t_stage_count = np.zeros((5))
    aua_risk_count = np.zeros((5))
    df = pd.DataFrame(columns=CLINICAL_NAMES)
    for index, patient in enumerate(clinical_data):
        df.loc[index, 'SubjectId'] = patient['case_id']
        df.loc[index, 'age'] = patient['age_at_nephrectomy']
        df.loc[index, 'bmi'] = patient['body_mass_index']

        df.loc[index, 'gender'] = patient['gender']
        if patient['gender'] == 'male':    # 0:'male'  1:'female','transgender_male_to_female'
            df.loc[index, 'gender_num'] = 0
        else:
            df.loc[index, 'gender_num'] = 1

        df.loc[index, 'comorbidities'] = 0    # 0:no_comorbidities 1:comorbidities_exist
        for key, value in patient['comorbidities'].items():
            if value:
                df.loc[index, 'comorbidities'] = 1

        df.loc[index, 'smoking_history'] = patient['smoking_history']
        if patient['smoking_history']=='never_smoked':    # 0:'never_smoked' 1:'previous_smoker'  2:'current_smoker'
            df.loc[index, 'smoking_history'] = 0
        elif patient['smoking_history'] == 'previous_smoker':
            df.loc[index, 'smoking_history'] = 1
        elif patient['smoking_history'] == 'current_smoker':
            df.loc[index, 'smoking_history'] = 2

        df.loc[index, 'radiographic_size'] = patient['radiographic_size']
        
        # if patient['last_preop_egfr']['value'] == '>=90':
        #     df.loc[index, 'preop_egfr'] = 90
        
        # else:
        #     df.loc[index, 'preop_egfr'] = patient['last_preop_egfr']['value']
        
        #df.loc[index, 'preop_egfr'] = patient['last_preop_egfr']['value']

        #df.loc[index, 'pathology_t_stage'] = patient['pathology_t_stage']
        #df.loc[index, 'pathology_n_stage'] = patient['pathology_n_stage']
        #df.loc[index, 'pathology_m_stage'] = patient['pathology_m_stage']
        # df.loc[index, 'aua_risk_group'] = patient['aua_risk_group']
       
        # # Task 1 labels:
        # if patient['aua_risk_group'] in ['high_risk', 'very_high_risk']:    # 1:'3','4'  0:'0','1a','1b','2a','2b'
        #     df.loc[index, 'task_1_label'] = 1 # CanAT
        # else:
        #     df.loc[index, 'task_1_label'] = 0 # NoAT

        # # Task 2 labels:
        # if patient['aua_risk_group']=='benign':
        #     df.loc[index, 'task_2_label'] = 0 
        # elif patient['aua_risk_group']=='low_risk':
        #     df.loc[index, 'task_2_label'] = 1
        # elif  patient['aua_risk_group']=='intermediate_risk':
        #     df.loc[index, 'task_2_label'] = 2
        # elif patient['aua_risk_group']=='high_risk':
        #     df.loc[index, 'task_2_label'] = 3
        # elif patient['aua_risk_group']=='very_high_risk':
        #     df.loc[index, 'task_2_label'] = 4
        # else:
        #     ValueError('Wrong risk class')

        # former classification - deprecated
        #if patient['pathology_t_stage'] in ['3', '4']:    # 1:'3','4'  0:'0','1a','1b','2a','2b'
        #    df.loc[index, 'pathology_t_stage_classify'] = 1
        #else:
        #    df.loc[index, 'pathology_t_stage_classify'] = 0
        #t_stage = int(patient['pathology_t_stage'][0])
        #t_stage_count[t_stage] += 1
        # aua_risk = int(df.loc[index, 'task_2_label'])
        # aua_risk_count[aua_risk] += 1
        # df.loc[index, 'grade'] = patient['tumor_isup_grade']
        ##### new parameters
        #df.loc[index,'alcohol_use']=patient['alcohol_use']
        if patient['alcohol_use']=='two_or_less_daily':
            df.loc[index, 'alcohol_use'] = 1
        elif patient['alcohol_use']=='never_or_not_in_last_3mo':
            df.loc[index, 'alcohol_use'] = 2
        elif patient['alcohol_use']=='more_than_two_daily':
            df.loc[index, 'alcohol_use'] = 3
         
        #df.loc[index,'pack_years']=patient['pack_years']
        #df.loc[index,'age_when_quit_smoking']=patient['age_when_quit_smoking']
        if patient['chewing_tobacco_use']=='never_or_not_in_last_3mo':
            df.loc[index, 'chewing_tobacco_use'] = 1
        else:
            df.loc[index, 'chewing_tobacco_use'] = 0

        #df.loc[index,'chewing_tobacco_use']=patient['chewing_tobacco_use']
        #age_when_quit_smoking
        
        if patient["last_preop_egfr"] is None:
            df.loc[index, 'preop_egfr'] = 0
        #print('yes')
        elif patient['last_preop_egfr']['value']== '>90':
            df.loc[index, 'preop_egfr'] = 90
        #df.loc[index, 'preop_egfr'] = 0
        else :
            df.loc[index, 'preop_egfr'] = patient['last_preop_egfr']['value']
        df.loc[index, 'x_spacing'] = patient['voxel_spacing']['x_spacing']
        df.loc[index, 'y_spacing'] = patient['voxel_spacing']['y_spacing']
        df.loc[index, 'z_spacing'] = patient['voxel_spacing']['z_spacing']
    # else:

    if processed_file is not None:
        # save csv file
        df.to_csv(processed_file, index=False)
        df = df.drop(['gender', 'pathology_t_stage', 'pathology_n_stage', 'pathology_m_stage'], axis=1)
        df.to_csv(os.path.splitext(processed_file)[0] + '_numeric.csv' , index=False)
    print(f'Pathology t-stage count summary: {t_stage_count}')
    print(f'AUA risk count summary: {aua_risk_count}')
    return df


original_file='/content/drive/MyDrive/knight_challenegs2022/features.json'

dftest=create_knight_clinical(original_file, processed_file=None)

#dd=df['chewing_tobacco_use'].unique()

print(df['chewing_tobacco_use'].unique())

[{'case_id': 'case_00400', 'age_at_nephrectomy': 88, 'body_mass_index': 29.18, 'comorbidities': {'myocardial_infarction': False, 'congestive_heart_failure': False, 'peripheral_vascular_disease': False, 'cerebrovascular_disease': False, 'dementia': False, 'copd': False, 'connective_tissue_disease': False, 'peptic_ulcer_disease': False, 'uncomplicated_diabetes_mellitus': False, 'diabetes_mellitus_with_end_organ_damage': False, 'chronic_kidney_disease': True, 'hemiplegia_from_stroke': False, 'leukemia': False, 'malignant_lymphoma': False, 'localized_solid_tumor': False, 'metastatic_solid_tumor': False, 'mild_liver_disease': False, 'moderate_to_severe_liver_disease': False, 'aids': False}, 'smoking_history': 'current_smoker', 'age_when_quit_smoking': None, 'pack_years': None, 'chewing_tobacco_use': 'never_or_not_in_last_3mo', 'alcohol_use': 'never_or_not_in_last_3mo', 'last_preop_egfr': {'days_before_surgery': 29, 'value': 57.0}, 'radiographic_size': None, 'voxel_spacing': {'x_spacing': 0.

In [None]:
#preop_egfr
#df['preop_egfr'].unique()
df=dftest
df['preop_egfr']=df['preop_egfr'].fillna(77)
df['bmi']=df['bmi'].fillna(44)
df['alcohol_use']=df['alcohol_use'].fillna(0)
df['radiographic_size']=df['radiographic_size'].fillna(4)
#df['pack_years']=df['pack_years'].fillna(0)
#df['age_when_quit_smoking']=df['age_when_quit_smoking'].fillna(0)

In [None]:
print(df['bmi'].unique())
print(df['preop_egfr'].unique())
#print(df['age_when_quit_smoking'].unique())
print(df['alcohol_use'].unique())
#print(df['pack_years'].unique())
print(df['radiographic_size'].unique())
print(df['smoking_history'].unique())
print(df['bmi'].unique())
print(df['age'].unique())
print(df['chewing_tobacco_use'].unique())

[29.18 36.96 25.53 59.86 31.38 49.82 40.67 30.87 27.61 40.06 37.12 22.53
 28.89 28.35 41.05 39.21 26.92 20.86 31.65 45.76 24.69 31.03 25.93 29.08
 31.95 22.05 27.12 28.06 24.7  38.84 26.15 24.59 29.34 29.95 25.88 38.8
 33.55 34.87 34.09 30.21 21.95 33.77 27.69 32.69 25.36 28.08 26.39 29.55
 26.08 21.84 28.52 31.58 33.08 28.62 28.59 25.69 30.93 46.64 29.78 33.99
 33.7   0.   31.31 33.18 23.29 46.22 32.37 38.07 25.2  20.59 28.07 26.26
 28.13 23.11 35.93 32.87 30.29 23.43 18.75 40.68 34.39 27.77 26.41 35.6
 72.23 33.8  24.13 22.52 32.33 34.19 29.32 29.43 45.19 33.84 24.97 20.66
 37.62 30.45 34.7 ]
[57. 75. 67. 39. 53. 76. 87. 86.  0. 84. 90. 83. 66. 64. 52. 59. 77. 65.
 56. 79. 62. 60. 50.  5. 55. 81. 80. 69. 74. 71. 73. 47. 78. 88. 61. 58.
 70. 85. 28. 54.]
[2 1 3]
[ 4.   3.   2.3  1.6  4.7  3.5  6.2 13.  11.   3.2  2.6  1.   2.5  3.1
  9.3 10.1  5.   8.   2.4  1.5 10.5 11.1 17.8  7.5  1.4  2.1  2.7  7.6
  0.5  2.   8.7  6.   1.2  5.1  6.1  4.3 15.   1.3  6.7  9.2  2.2  4.2
  2.8  3.7  9

In [None]:
df.to_csv('test_featurelatest.csv',index=False)

In [None]:
import pandas as pd
#path='C:\\Users\\Administrateur\\Desktop\\testfused_model\\fuse-med-ml-master\\fuse-med-ml-master\\baseline'
splits=pd.read_pickle('/content/drive/MyDrive/knight_challenegs2022/splits_final.pkl')
# For this example, we use split 0 out of the 5 available cross validation splits
split = splits[0]
train=split['train']
pdftrain=pd.DataFrame(train).rename(columns={0:'SubjectId'})

val=split['val']
pdfval=pd.DataFrame(val).rename(columns={0:'SubjectId'})


In [None]:
import pandas as pd
#path='C:\\Users\\Administrateur\\Desktop\\testfused_model\\fuse-med-ml-master\\fuse-med-ml-master\\baseline'
splits=pd.read_pickle('/content/drive/MyDrive/knight_challenegs2022/splits_final.pkl')
# For this example, we use split 0 out of the 5 available cross validation splits
split = splits[4]
train=split['train']
pdftrain=pd.DataFrame(train).rename(columns={0:'SubjectId'})

val=split['val']
pdfval=pd.DataFrame(val).rename(columns={0:'SubjectId'})

In [None]:
cd /content/drive/MyDrive/knight_challenegs2022

/content/drive/MyDrive/knight_challenegs2022


In [None]:
#pathtrin
import pandas as pd

pathd=pd.read_csv('/content/drive/MyDrive/knight_challenegs2022/updated_training_features.csv')

dftrain=pathd
pathfile=dftrain
traindatapd = pd.merge(pathfile, pdftrain, on=['SubjectId'], how='inner')
traindatapd .head()

valdatapd = pd.merge(pathfile, pdfval, on=['SubjectId'], how='inner')
valdatapd.head()

Unnamed: 0,SubjectId,age,bmi,gender,gender_num,comorbidities,smoking_history,radiographic_size,preop_egfr,alcohol_use,chewing_tobacco_use,x_spacing,y_spacing,z_spacing,aua_risk_group,task_1_label,task_2_label
0,case_00005,35,35.96,male,0,1,2,4.9,90.0,2,1,0.976562,0.976562,0.5,,1,3
1,case_00012,62,30.92,female,1,1,0,9.59,42.0,1,1,0.751953,0.751953,5.0,,1,4
2,case_00022,66,30.69,female,1,0,0,2.3,80.0,3,1,0.833984,0.833984,0.5,,0,1
3,case_00023,49,34.53,male,0,0,0,2.3,90.0,2,1,0.782,0.782,3.0,,0,1
4,case_00029,68,23.4,female,1,1,2,5.4,82.0,1,1,0.753906,0.753906,5.0,,0,2


In [None]:
valdatapd

Unnamed: 0,SubjectId,age,bmi,gender,gender_num,comorbidities,smoking_history,radiographic_size,preop_egfr,alcohol_use,chewing_tobacco_use,x_spacing,y_spacing,z_spacing,aua_risk_group,task_1_label,task_2_label
0,case_00005,35,35.96,male,0,1,2,4.9,90.0,2,1,0.976562,0.976562,0.5,,1,3
1,case_00012,62,30.92,female,1,1,0,9.59,42.0,1,1,0.751953,0.751953,5.0,,1,4
2,case_00022,66,30.69,female,1,0,0,2.3,80.0,3,1,0.833984,0.833984,0.5,,0,1
3,case_00023,49,34.53,male,0,0,0,2.3,90.0,2,1,0.782,0.782,3.0,,0,1
4,case_00029,68,23.4,female,1,1,2,5.4,82.0,1,1,0.753906,0.753906,5.0,,0,2
5,case_00034,49,39.68,male,0,0,1,4.8,90.0,1,1,0.835938,0.835938,5.0,,0,1
6,case_00036,51,29.05,male,0,0,2,5.9,87.0,1,0,0.798828,0.798828,3.0,,1,3
7,case_00038,51,23.69,male,0,0,1,5.5,77.0,2,1,0.779297,0.779297,5.0,,0,1
8,case_00041,46,26.14,female,1,0,0,2.5,89.0,2,1,0.677734,0.677734,3.0,,0,1
9,case_00043,62,24.39,male,0,0,0,3.0,77.0,2,1,0.738,0.738,3.0,,0,0


In [None]:
traindatapd.to_csv('trainf4new.csv',index=False)

In [None]:
valdatapd.to_csv('validf4new.csv',index=False)

In [None]:
cd /content/drive/MyDrive/knight_challenegs2022/models

/content/drive/MyDrive/knight_challenegs2022/models


In [None]:
!mkdir fold1model

In [None]:
cd /content/drive/MyDrive/knight_challenegs2022

/content/drive/MyDrive/knight_challenegs2022


In [None]:
import torch
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
import SimpleITK as sitk
import numpy as np
import os
import numpy as np
import skimage
#import skimage.io as io
import skimage.transform as transform
import torch

import numpy as np
import pandas as pd
import torch
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" 
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
import cv2
import matplotlib.pyplot as plt
import skimage 

#import logging
#import traceback
from typing import Optional, Tuple


def kits_normalization(input_image: np.ndarray):
    # first, clip to [-62, 310] (corresponds to 0.5 and 99.5 percentile in the foreground regions)
    # then, subtract 104.9 and divide by 75.3 (corresponds to mean and std in the foreground regions, respectively)
    clip_min = -62
    clip_max = 301
    mean_val = 104.0
    std_val = 75.3
    input_image = np.minimum(np.maximum(input_image, clip_min), clip_max)
    input_image -= mean_val
    input_image /= std_val
    return input_image

def normalize_to_range(input_image: np.ndarray, range: Tuple = (0.0, 1.0)):
    """
    Scales tensor to range
    @param input_image: image of shape (H x W x C)
    @param range:       bounds for normalization
    @return:            normalized image
    """
    max_val = input_image.max()
    min_val = input_image.min()
    if min_val == max_val == 0:
        return input_image
    input_image = input_image - min_val
    input_image = input_image / (max_val - min_val)
    input_image = input_image * (range[1] - range[0])
    input_image = input_image + range[0]
    return input_image

from typing import Iterable

def center_crop(np_image: np.ndarray,
                new_shape: Iterable[int],
                outside_val: float = 0
                ) -> np.ndarray:
    output_image = np.full(new_shape, outside_val, np_image.dtype)

    slices = tuple()
    offsets = tuple()
    for it, sh in enumerate(new_shape):
        size = sh // 2
        if it == 0:
            center = np_image.shape[it] - size
        else:
            center = (np_image.shape[it] // 2)
        start = center - size
        stop = center + size + (sh % 2)

        # computing what area of the original image will be in the cropped output
        slce = slice(max(0, start), min(np_image.shape[it], stop))
        slices += (slce,)

        # computing offset to pad if the crop is partly outside of the scan
        offset = slice(-min(0, start), 2 * size - max(0, (start + 2 * size) - np_image.shape[it]))
        offsets += (offset,)

    output_image[offsets] = np_image[slices]

    return output_image


def pad_image(image: np.ndarray, outer_height: int, outer_width: int, pad_value: Tuple):
    """
    Pastes input image in the middle of a larger one
    @param image:        image of shape (H x W x C)
    @param outer_height: final outer height
    @param outer_width:  final outer width
    @param pad_value:    value for padding around inner image
    @return:             padded image
    """
    inner_height, inner_width = image.shape[0], image.shape[1]
    h_offset = int((outer_height - inner_height) / 2.0)
    w_offset = int((outer_width - inner_width) / 2.0)
    outer_image = np.ones((outer_height, outer_width, 3), dtype=image.dtype) * pad_value
    outer_image[h_offset:h_offset + inner_height, w_offset:w_offset + inner_width, :] = image

    return outer_image


class deeeeset(Dataset):
    def __init__(self,in_out):
        self.in_out=in_out
        
        #self.new_shape=new_shape
        #self.input_data = input_data
        self.ids=self.in_out['SubjectId']
        self.label=self.in_out['task_1_label']
        
        self.feature=self.in_out.drop(['SubjectId','gender','aua_risk_group','task_1_label','task_2_label'],axis=1, inplace=True)

    
            
        
    def __getitem__(self,idx):
        
        feat=self.in_out.iloc[idx]
        x_feature=pd.DataFrame(feat).T
        
        # norm_img=get_array_img
        
        y_output=self.label[idx]
        x_f_array=np.array(x_feature)
        x_f_array_t=torch.from_numpy(x_f_array).float()
        x_f_array_t=torch.squeeze(x_f_array_t,axis=0)
        return (x_f_array_t,y_output)
    
    def __len__(self):
        return(len(self.label))
    


#input_image = center_crop(input_image, new_shape=new_shape) 
    
    
filet=pd.read_csv('/content/drive/MyDrive/knight_challenegs2022/trainf4new.csv')
#data='/home/imranr/knights/data'

filev=pd.read_csv('/content/drive/MyDrive/knight_challenegs2022/validf4new.csv')




len(filev['task_1_label']==0)
len(filev['task_1_label']==1)

#classes=['NoAT','CanAT']
c1=filet['task_1_label'].value_counts()[0]
#175

#c2=filet['task_1_label'].value_counts()[1]
#65

train_dataset=deeeeset(filet)
valid_dataset=deeeeset(filev)

print(len(train_dataset))
print(len(valid_dataset))

f,o=valid_dataset[2]

#print(i.shape)
#print(i.min())
#print(i.max())
print(o)
print(f)

# i,f, o=train_dataset[2]

# print(i.shape)
# print(i.min())
# print(i.max())
# print(o)
# print(f)

#%
from torch.utils.data import DataLoader

# train_dataloader = DataLoader(ob_d, batch_size=4, shuffle=True)
# #test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)
# for i, data in enumerate(train_dataloader):
#     img,f,l=data
#     print(img.shape)
#     print(f.shape)
#     print(l)
#     break
    

batch=4

train_dataloader = DataLoader(train_dataset, batch_size= batch,pin_memory=True,num_workers=6, shuffle=True)

valid_dataloader = DataLoader(valid_dataset, batch_size= batch,pin_memory=True,num_workers=6, shuffle=False)


import sys 
import os
import glob
import time
import random
import os
import glob
import time
import random
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Copyright (c) MONAI Consortium
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
(C) Copyright 2021 IBM Corp.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
   http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Created on June 30, 2021
"""

from typing import Tuple, Any

import torch.nn as nn
from torch import Tensor
from torch.hub import load_state_dict_from_url
from torchvision.models.video.resnet import VideoResNet, BasicBlock, Conv3DSimple, BasicStem, model_urls


class FuseBackboneResnet3D(VideoResNet):
    """
    3D model classifier (ResNet architecture"
    """

    def __init__(self, pretrained: bool = False, in_channels: int = 1, name: str = "r3d_18") -> None:
        """
        Create 3D ResNet model
        :param pretrained: Use pretrained weights
        :param in_channels: number of input channels
        :param name: model name. currently only 'r3d_18' is supported
        """
        # init parameters per required backbone
        init_parameters = {
            'r3d_18': {'block': BasicBlock,
                       'conv_makers': [Conv3DSimple] * 4,
                       'layers': [2, 2, 2, 2],
                       'stem': BasicStem},
        }[name]
        # init original model
        super().__init__(**init_parameters)

        # load pretrained parameters if required
        if pretrained:
            state_dict = load_state_dict_from_url(model_urls[name])
            self.load_state_dict(state_dict)

        # save input parameters
        self.pretrained = pretrained
        self.in_channels = in_channels
        # override the first convolution layer to support any number of input channels
        self.stem = nn.Sequential(
            nn.Conv3d(self.in_channels, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2),
                      padding=(1, 3, 3), bias=False),
            nn.BatchNorm3d(64),
            nn.ReLU(inplace=True)
        )

    def features(self, x: Tensor) -> Any:
        """
        Extract spatial features - given a 3D tensor
        :param x: Input tensor - shape: [batch_size, channels, z, y, x]
        :return: spatial features - shape [batch_size, n_features, z', y', x']
        """
        x = self.stem(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x

    def forward(self, x: Tensor) -> Tuple[Tensor, None, None, None]:  # type: ignore
        """
        Forward pass. 3D global classification given a volume
        :param x: Input volume. shape: [batch_size, channels, z, y, x]
        :return: logits for global classification. shape: [batch_size, n_classes].
        """
        x = self.features(x)
        return x
    
backbone=FuseBackboneResnet3D(pretrained=True)   
from typing import Optional, Sequence
import torch.nn as nn

class ClassifierMLP(nn.Module):
    def __init__(self, in_ch: int, num_classes: Optional[int], layers_description: Sequence[int]=(256,128), dropout_rate: float = 0.1):
        super().__init__()
        layer_list = []
        layer_list.append(nn.Linear(in_ch, layers_description[0]))
        layer_list.append(nn.ReLU())
        if dropout_rate is not None and dropout_rate > 0:
            layer_list.append(nn.Dropout(p=dropout_rate))
        last_layer_size = layers_description[0]
        for curr_layer_size in layers_description[1:]:
            layer_list.append(nn.Linear(last_layer_size, curr_layer_size))
            layer_list.append(nn.ReLU())
            if dropout_rate is not None and dropout_rate > 0:
                layer_list.append(nn.Dropout(p=dropout_rate))
            last_layer_size = curr_layer_size
        
        if num_classes is not None:
            layer_list.append(nn.Linear(last_layer_size, num_classes))
        
        self.classifier = nn.Sequential(*layer_list)

    def forward(self, x):
        x = self.classifier(x)
        return x
  
model=ClassifierMLP(12,2)  
import torch    

#inp=torch.rand(1,10)
#out=model(inp)
#print(out.shape)


model=nn.DataParallel(model)
model=model.to(device)
#loss_func=nn.BCEWithLogitsLoss()
from tqdm import tqdm
import torch.optim as optim
optimizer=optim.Adam(model.parameters(),lr=0.00001)

#classes=['NRG','RG']
classes=['NoAT','CanAT']
#c1=data['label'].value_counts()[0]
#c2=data['label'].value_counts()[1]
my_distribution=np.array([175,65])
class_weights = torch.from_numpy(np.divide(1, my_distribution)).float().to(device)
class_weights = class_weights / class_weights.sum()
for i, c in enumerate(classes):
  print('Weight for class %s: %f' % (c, class_weights.cpu().numpy()[i]))
loss_func = nn.CrossEntropyLoss(weight=class_weights)
#label=label.to(torch.int64)
################################ training functions ###################
def train_fn(model,train_loader):
    model.train()
    counter=0
    training_run_loss=0.0
    train_running_correct=0.0
    for i, data in tqdm(enumerate(train_loader),total=int(len(train_dataset)/train_loader.batch_size)):
        counter+=1
        # extract dataset
        feature,label=data
        #imge=imge.float()
        #label=label.float()
        label.to(torch.int64)
        feature=feature.float()
        feature=feature.to(device)
        #imge=imge.to(device)
        label=label.to(device)
        #imge=imge.cuda()
        #label=label.cuda()
        # zero_out the gradient
        optimizer.zero_grad()
        output=model(feature)
        loss=loss_func(output,label)
        training_run_loss+=loss.item()
        _,preds=torch.max(output.data,1)
        train_running_correct+=(preds==label).sum().item()
        loss.backward()
        optimizer.step()
    ###################### state computation ###################
    train_loss=training_run_loss/len(train_loader.dataset)
    train_loss_ep.append(train_loss)
    train_accuracy=100.* train_running_correct/len(train_loader.dataset)
    train_accuracy_ep.append(train_accuracy)
    print(f"Train Loss:{train_loss:.4f}, Train Acc:{train_accuracy:0.2f}")
    return train_loss_ep,train_accuracy_ep

########################## validation function ##################
def validation_fn(model,valid_loader):
  # evluation start
    print("validation start")
    
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    with torch.no_grad():
        for i,data in tqdm(enumerate(valid_loader),total=int(len(valid_loader)/valid_loader.batch_size)):
            feature,label=data
            #imge=imge.float()
            #label=label.float()
            label.to(torch.int64)
            feature=feature.float()
            feature=feature.to(device)
            #imge=imge.to(device)
            label=label.to(device)
            #imge=imge.cuda()
            #label=label.cuda()
            output=model(feature)
            loss=loss_func(output,label)
            val_running_loss+=loss.item()
            _,pred=torch.max(output.data,1)
            val_running_correct+=(pred==label).sum().item()
        val_loss=val_running_loss/len(valid_loader.dataset)
        val_loss_ep.append(val_loss)
        val_accuracy=100.* val_running_correct/(len(valid_loader.dataset))
        val_accuracy_ep.append(val_accuracy)
        print(f"Val Loss:{val_loss:0.4f}, Val_Acc:{val_accuracy:0.2f}")
        return val_loss_ep,val_accuracy_ep

import torch.optim as optim
optimizer=optim.Adam(model.parameters(),lr=0.0001)
train_loss_ep=[]
train_accuracy_ep=[]
val_loss_ep=[]
val_accuracy_ep=[]
lr = 3e-4
log = pd.DataFrame(index=[], columns=['epoch', 'lr', 'loss', 'accu', 'val_loss', 'val_accu'])
early_stop=20
epochs=500
best_acc = 0
name='fold4model'
trigger = 0
for epoch in range(epochs):
    print('Epoch [%d/%d]' %(epoch, epochs))
    # train for one epoch
    train_loss_ep,train_accuracy_ep=train_fn(model,train_dataloader)
    train_loss_ep1=np.mean(train_loss_ep)
    train_accuracy_ep1=np.mean(train_accuracy_ep)
    #y_pred,labels=Prediciton_fn(model,valid_loader)

    val_loss_ep,val_accuracy_ep=validation_fn(model,valid_dataloader)
    val_loss_ep1=np.mean(val_loss_ep)
    val_accuracy_ep1=np.mean(val_accuracy_ep)
    
    print('loss %.4f - accu %.4f - val_loss %.4f - val_accu %.4f'%(train_loss_ep1, train_accuracy_ep1, val_loss_ep1, val_accuracy_ep1))

    tmp = pd.Series([epoch,lr,train_loss_ep1,train_accuracy_ep1,val_loss_ep1,val_accuracy_ep1], index=['epoch', 'lr', 'loss', 'accu', 'val_loss', 'val_accu'])

    log = log.append(tmp, ignore_index=True)
    log.to_csv('models/%s/log.csv' %name, index=False)

    trigger += 1

    if val_accuracy_ep1 > best_acc:
        torch.save(model.state_dict(), 'models/%s/3dmodeltask1fold4.pth' %name)
        best_acc = val_accuracy_ep1
        print("=> saved best model")
        trigger = 0

    # early stopping
    if not early_stop is None:
        if trigger >= early_stop:
            print("=> early stopping")
            break

    torch.cuda.empty_cache() 

240
60
0
tensor([66.0000, 30.6900,  1.0000,  0.0000,  0.0000,  2.3000, 80.0000,  3.0000,
         1.0000,  0.8340,  0.8340,  0.5000])


  cpuset_checked))


Weight for class NoAT: 0.270833
Weight for class CanAT: 0.729167
Epoch [0/500]


100%|██████████| 60/60 [00:00<00:00, 85.82it/s] 

Train Loss:0.3032, Train Acc:52.08
validation start



15it [00:00, 58.02it/s]

Val Loss:0.1745, Val_Acc:76.67
loss 0.3032 - accu 52.0833 - val_loss 0.1745 - val_accu 76.6667
=> saved best model
Epoch [1/500]



100%|██████████| 60/60 [00:00<00:00, 90.46it/s] 

Train Loss:0.2302, Train Acc:52.08
validation start



15it [00:00, 65.54it/s]

Val Loss:0.1866, Val_Acc:76.67
loss 0.2667 - accu 52.0833 - val_loss 0.1806 - val_accu 76.6667
Epoch [2/500]



100%|██████████| 60/60 [00:00<00:00, 88.20it/s] 

Train Loss:0.2425, Train Acc:56.67
validation start



15it [00:00, 63.38it/s]

Val Loss:0.1844, Val_Acc:50.00
loss 0.2586 - accu 53.6111 - val_loss 0.1819 - val_accu 67.7778
Epoch [3/500]



100%|██████████| 60/60 [00:00<00:00, 90.97it/s] 

Train Loss:0.2001, Train Acc:57.08
validation start



15it [00:00, 53.37it/s]

Val Loss:0.1824, Val_Acc:53.33
loss 0.2440 - accu 54.4792 - val_loss 0.1820 - val_accu 64.1667
Epoch [4/500]



100%|██████████| 60/60 [00:00<00:00, 82.80it/s]

Train Loss:0.2067, Train Acc:58.33
validation start



15it [00:00, 48.95it/s]

Val Loss:0.1801, Val_Acc:66.67
loss 0.2366 - accu 55.2500 - val_loss 0.1816 - val_accu 64.6667
Epoch [5/500]



100%|██████████| 60/60 [00:00<00:00, 89.15it/s] 

Train Loss:0.1830, Train Acc:66.25
validation start



15it [00:00, 56.21it/s]

Val Loss:0.1782, Val_Acc:66.67
loss 0.2276 - accu 57.0833 - val_loss 0.1811 - val_accu 65.0000
Epoch [6/500]



100%|██████████| 60/60 [00:00<00:00, 84.25it/s] 

Train Loss:0.1795, Train Acc:67.50
validation start



15it [00:00, 49.22it/s]

Val Loss:0.1652, Val_Acc:65.00
loss 0.2208 - accu 58.5714 - val_loss 0.1788 - val_accu 65.0000
Epoch [7/500]



100%|██████████| 60/60 [00:00<00:00, 84.98it/s] 

Train Loss:0.1882, Train Acc:64.17
validation start



15it [00:00, 52.72it/s]

Val Loss:0.1842, Val_Acc:48.33
loss 0.2167 - accu 59.2708 - val_loss 0.1795 - val_accu 62.9167
Epoch [8/500]



100%|██████████| 60/60 [00:00<00:00, 81.69it/s]

Train Loss:0.1725, Train Acc:64.58
validation start



15it [00:00, 48.95it/s]

Val Loss:0.1643, Val_Acc:75.00
loss 0.2118 - accu 59.8611 - val_loss 0.1778 - val_accu 64.2593
Epoch [9/500]



100%|██████████| 60/60 [00:00<00:00, 89.36it/s] 

Train Loss:0.1836, Train Acc:62.08
validation start



15it [00:00, 58.82it/s]

Val Loss:0.1723, Val_Acc:53.33
loss 0.2090 - accu 60.0833 - val_loss 0.1772 - val_accu 63.1667
Epoch [10/500]



100%|██████████| 60/60 [00:00<00:00, 88.11it/s] 

Train Loss:0.1695, Train Acc:62.92
validation start



15it [00:00, 52.38it/s]

Val Loss:0.1699, Val_Acc:78.33
loss 0.2054 - accu 60.3409 - val_loss 0.1766 - val_accu 64.5455
Epoch [11/500]



100%|██████████| 60/60 [00:00<00:00, 94.31it/s] 

Train Loss:0.1588, Train Acc:67.50
validation start



15it [00:00, 56.59it/s]

Val Loss:0.1960, Val_Acc:78.33
loss 0.2015 - accu 60.9375 - val_loss 0.1782 - val_accu 65.6944
Epoch [12/500]



100%|██████████| 60/60 [00:00<00:00, 78.05it/s] 

Train Loss:0.1683, Train Acc:67.50
validation start



15it [00:00, 55.47it/s]

Val Loss:0.1698, Val_Acc:58.33
loss 0.1989 - accu 61.4423 - val_loss 0.1775 - val_accu 65.1282
Epoch [13/500]



100%|██████████| 60/60 [00:00<00:00, 89.37it/s] 

Train Loss:0.1452, Train Acc:67.92
validation start



15it [00:00, 47.47it/s]

Val Loss:0.1542, Val_Acc:73.33
loss 0.1951 - accu 61.9048 - val_loss 0.1759 - val_accu 65.7143
Epoch [14/500]



100%|██████████| 60/60 [00:00<00:00, 89.27it/s] 

Train Loss:0.1558, Train Acc:70.42
validation start



15it [00:00, 50.00it/s]


Val Loss:0.1556, Val_Acc:76.67
loss 0.1925 - accu 62.4722 - val_loss 0.1745 - val_accu 66.4444
Epoch [15/500]


100%|██████████| 60/60 [00:00<00:00, 85.15it/s] 

Train Loss:0.1605, Train Acc:67.08
validation start



15it [00:00, 60.76it/s]

Val Loss:0.1628, Val_Acc:65.00
loss 0.1905 - accu 62.7604 - val_loss 0.1738 - val_accu 66.3542
Epoch [16/500]



100%|██████████| 60/60 [00:00<00:00, 88.15it/s] 

Train Loss:0.1514, Train Acc:71.25
validation start



15it [00:00, 53.90it/s]

Val Loss:0.1652, Val_Acc:60.00
loss 0.1882 - accu 63.2598 - val_loss 0.1733 - val_accu 65.9804
Epoch [17/500]



100%|██████████| 60/60 [00:00<00:00, 84.68it/s]

Train Loss:0.1550, Train Acc:62.92
validation start



15it [00:00, 51.98it/s]


Val Loss:0.1520, Val_Acc:71.67
loss 0.1863 - accu 63.2407 - val_loss 0.1721 - val_accu 66.2963
Epoch [18/500]


100%|██████████| 60/60 [00:00<00:00, 92.73it/s] 

Train Loss:0.1423, Train Acc:73.33
validation start



15it [00:00, 55.35it/s]

Val Loss:0.1938, Val_Acc:43.33
loss 0.1840 - accu 63.7719 - val_loss 0.1732 - val_accu 65.0877
Epoch [19/500]



100%|██████████| 60/60 [00:00<00:00, 82.79it/s]

Train Loss:0.1395, Train Acc:75.00
validation start



15it [00:00, 54.50it/s]

Val Loss:0.1426, Val_Acc:75.00
loss 0.1818 - accu 64.3333 - val_loss 0.1717 - val_accu 65.5833
Epoch [20/500]



100%|██████████| 60/60 [00:00<00:00, 78.99it/s]

Train Loss:0.1412, Train Acc:72.50
validation start



15it [00:00, 51.96it/s]


Val Loss:0.1509, Val_Acc:68.33
loss 0.1799 - accu 64.7222 - val_loss 0.1707 - val_accu 65.7143
=> early stopping


In [None]:
#cd /content/drive/MyDrive/knight_challenegs2022

/content/drive/MyDrive/knight_challenegs2022


In [None]:
#!mkdir fold0model

In [None]:
### testing the model for validation
import pandas as pd

pathtest=pd.read_csv('/content/drive/MyDrive/knight_challenegs2022/test_featurelatest.csv')
#pathtest.drop(['gender'])

In [None]:
pathtest

Unnamed: 0,SubjectId,age,bmi,gender,gender_num,comorbidities,smoking_history,radiographic_size,preop_egfr,alcohol_use,chewing_tobacco_use,x_spacing,y_spacing,z_spacing
0,case_00400,88,29.18,female,1,1,2,4.0,57.0,2,1,0.810547,0.810547,5.0
1,case_00401,61,36.96,male,0,0,2,3.0,75.0,2,1,0.976562,0.976562,5.0
2,case_00402,62,25.53,female,1,1,0,2.3,67.0,2,1,0.824219,0.824219,5.0
3,case_00403,75,59.86,female,1,1,0,1.6,39.0,1,1,0.976562,0.976562,5.0
4,case_00404,67,31.38,male,0,1,2,4.7,53.0,1,1,0.898438,0.898438,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,case_00498,67,24.97,male,0,1,0,5.7,54.0,2,1,0.750000,0.750000,5.0
99,case_00499,44,20.66,male,0,1,0,2.7,90.0,1,1,0.585938,0.585938,5.0
100,case_00500,48,37.62,female,1,0,0,3.0,0.0,2,1,0.839844,0.839844,5.0
101,case_00501,58,30.45,male,0,1,2,0.6,90.0,2,1,0.789062,0.789062,5.0


In [None]:
import torch
pathmodel='/content/drive/MyDrive/knight_challenegs2022/models/fold0model/3dmodeltask1.pth'
trainedmodel=torch.load(pathmodel)
model.load_state_dict(trainedmodel)
model.eval()
model.to(device)

DataParallel(
  (module): ClassifierMLP(
    (classifier): Sequential(
      (0): Linear(in_features=12, out_features=256, bias=True)
      (1): ReLU()
      (2): Dropout(p=0.1, inplace=False)
      (3): Linear(in_features=256, out_features=128, bias=True)
      (4): ReLU()
      (5): Dropout(p=0.1, inplace=False)
      (6): Linear(in_features=128, out_features=2, bias=True)
    )
  )
)

In [None]:
#NoAT-score
#CanAT-score
#case_id
import torch
pathmodel='/content/drive/MyDrive/knight_challenegs2022/models/fold0model/3dmodeltask1.pth'
trainedmodel=torch.load(pathmodel)
model.load_state_dict(trainedmodel)
model.eval()
model.to(device)
dataframe={'case_id':[],
           'NoAT-score':[],
           'CanAT-score':[]}
CLINICAL_NAMES=['case_id','NoAT-score','CanAT-score']
df = pd.DataFrame(columns=CLINICAL_NAMES)
for i,patinet in enumerate(pathtest['SubjectId']):
  #pat=patinet.split('_')[-1]
  #pat=str(0000)+i
  number_str = str(i)
  pat = number_str.zfill(5)
  #print(i)
  #print(patinet)
  features=pathtest.iloc[i]
  feature=features.drop(['SubjectId'])
  feature1=feature.drop(['gender'])
  #print(feature1.shape)
  x_f_array=np.array(feature1).astype(float)
  #print(x_f_array)
  #.astype(int)
  x_f_array_t=torch.from_numpy(x_f_array).float()
  x_f_array_t=torch.unsqueeze(x_f_array_t,axis=0).to(device)
  prediction=model(x_f_array_t)
  output=torch.softmax(prediction, dim=1)
  output=torch.squeeze(output,axis=0)
  pred=output.detach().cpu().numpy()
  #print(output.detach().cpu().numpy())
  df.loc[i,'case_id']=pat
  df.loc[i,'NoAT-score']='%.1f' % pred[0]
  df.loc[i,'CanAT-score']='%.1f' % pred[1]

  #dataframe['case_id'].append(str(pat))
  #dataframe['NoAT-score'].append(pred[0])
  #dataframe['CanAT-score'].append(pred[1])
  #features.iloc[i]
  #print(x_f_array_t.shape)
  print(pat)




00000
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102


In [None]:
df
df['case_id']=df['case_id'].str.rjust(2, "0")


In [None]:
df

Unnamed: 0,case_id,NoAT-score,CanAT-score
0,00000,0.6,0.4
1,00001,0.9,0.1
2,00002,0.7,0.3
3,00003,1.0,0.0
4,00004,0.6,0.4
...,...,...,...
98,00098,0.4,0.6
99,00099,0.8,0.2
100,00100,1.0,0.0
101,00101,1.0,0.0


In [None]:
#df.apply(lambda x: x.zfill(2))
#df.case_id = df.case_id.astype("str")
df['case_id'] = df['case_id'].apply('="{}"'.format)

In [None]:
df

Unnamed: 0,case_id,NoAT-score,CanAT-score
0,"=""00000""",0.6,0.4
1,"=""00001""",0.9,0.1
2,"=""00002""",0.7,0.3
3,"=""00003""",1.0,0.0
4,"=""00004""",0.6,0.4
...,...,...,...
98,"=""00098""",0.4,0.6
99,"=""00099""",0.8,0.2
100,"=""00100""",1.0,0.0
101,"=""00101""",1.0,0.0


In [None]:
df.to_csv('task1_predictions.csv',index=False)

In [None]:
predict=pd.DataFrame(dataframe)

In [None]:
predict

Unnamed: 0,case_id,NoAT-score,CanAT-score
0,00400,0.571395,0.428605
1,00401,0.867308,0.132692
2,00402,0.703473,0.296527
3,00403,0.990785,0.009215
4,00404,0.586223,0.413777
...,...,...,...
98,00498,0.382007,0.617993
99,00499,0.832450,0.167550
100,00500,0.970588,0.029412
101,00501,0.950895,0.049105


In [None]:
predict.to_csv('task1_prediction_aq.csv')

In [None]:
feature

age                          56
bmi                        34.7
gender                     male
gender_num                    0
comorbidities                 1
smoking_history               2
radiographic_size           5.0
preop_egfr                 90.0
alcohol_use                   2
chewing_tobacco_use           1
x_spacing              0.921875
y_spacing              0.921875
z_spacing                   5.0
Name: 102, dtype: object