In [1]:
import argparse
import torch
import numpy as np
import pandas as pd
from PIL import Image
import glob, os, re
from torch.utils.data.dataset import Dataset
from torchvision import transforms
import random
from tqdm import tqdm
import torch.nn.functional as F
from sklearn.model_selection import StratifiedKFold
import cv2
import matplotlib.pyplot as plt

import  torchvision.models as models
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.autograd import Variable
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix
from efficientnet_pytorch import EfficientNet

import import_ipynb
# from models_3ch import MyEffientnet_b1
from clinical_data_loader import ImageDataset 
from clinical_data_loader_external_test import ImageDataset as t_ImageDataset
from clinical_data_loader_external_validation import ImageDataset as v_ImageDataset

importing Jupyter notebook from clinical_data_loader.ipynb
importing Jupyter notebook from clinical_data_loader_external_test.ipynb
importing Jupyter notebook from clinical_data_loader_external_validation.ipynb


In [2]:
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

In [3]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"

In [4]:
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [5]:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

random_seed = 123 #123

torch.manual_seed(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)

In [6]:
num_epoch = 10
batch_size = 32


train_transforms = {
    'train_aug' : transforms.Compose([
        transforms.Resize(256),
        transforms.RandomHorizontalFlip(0.5), #0.5
        transforms.RandomVerticalFlip(0.5), #0.5
        transforms.CenterCrop(240),
        transforms.ToTensor(),
    ]),
}

test_transforms={
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(240),
        transforms.ToTensor(),
    ]),
}


In [7]:
img_root = '/home/SMC_data_2101xx_EGFR_deeplearning/EGFR_deep_learning/3ch_img_minmax/img'
label_pth = '/home/SMC_data_2101xx_EGFR_deeplearning/EGFR_deep_learning/3ch_img_minmax/patient_information_clinical.xlsx'

In [8]:
pth_root = '/home/SMC_data_2101xx_EGFR_deeplearning/EGFR_deep_learning/3ch_img_minmax/result/final_re'

In [9]:
save_root = '/home/SMC_data_2101xx_EGFR_deeplearning/EGFR_deep_learning/3ch_img_minmax/result/final/external/val'

In [10]:
class MyEffientnet_b1_clinical(nn.Module):
    def __init__(self,out_features1, out_features2, out_features3, out_features4, out_features5, out_features6, 
                 out_features7, out_features8, model_name='efficientnet-b1',class_num=45,initfc_type='normal',gain=0.2):
        super(MyEffientnet_b1_clinical, self).__init__()
        
        
        self.clinical_fc1 = nn.Linear(3, out_features5) #(3, 32)
        self.clinical_fc2 = nn.Linear(out_features5, out_features6) #(32, 256)
        self.clinical_fc3 = nn.Linear(out_features6, out_features7) #(256, 1024)
        self.clinical_fc4 = nn.Linear(out_features7, out_features8) #(1024, 1280)

        
        
        model = EfficientNet.from_pretrained(model_name)
        self.model = model
        self.fc1 = nn.Linear(1280 + out_features8, out_features1) #1280
        self.fc2 = nn.Linear(out_features1, out_features2)
        self.fc3 = nn.Linear(out_features2, out_features3)
        self.fc4 = nn.Linear(out_features3, out_features4)
        self.fc5 = nn.Linear(out_features4, 2)
        self.dropout = nn.Dropout(0.5)
        
        self.batchnorm = nn.BatchNorm1d(1280 + out_features8)
        self.batchnorm1 = nn.BatchNorm1d(out_features1)
        self.batchnorm2 = nn.BatchNorm1d(out_features2)
        self.batchnorm3 = nn.BatchNorm1d(out_features3)
        self.batchnorm4 = nn.BatchNorm1d(out_features4)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
        

        
        
        if hasattr(self.fc1, 'bias') and self.fc1.bias is not None:
            nn.init.constant_(self.fc1.bias.data, 0.0)
        if initfc_type == 'normal':
            nn.init.normal_(self.fc1.weight.data, 0.0, gain)
        elif initfc_type == 'xavier':
            nn.init.xavier_normal_(self.fc1.weight.data, gain=gain)
        elif initfc_type == 'kaiming':
            nn.init.kaiming_normal_(self.fc1.weight.data, a=0, mode='fan_in')
        elif initfc_type == 'orthogonal':
            nn.init.orthogonal_(self.fc1.weight.data, gain=gain)


    def forward(self,x, c):
        x = self.model.extract_features(x)
        x = x * torch.sigmoid(x)
        x = nn.functional.adaptive_avg_pool2d(x, 1).squeeze(-1).squeeze(-1)
        
        c = self.clinical_fc1(c)
        c = self.clinical_fc2(c)
        c = self.clinical_fc3(c)
        c = self.clinical_fc4(c)

        x = torch.cat((x, c), 1)
        
        x = self.batchnorm(x)
        x = self.relu(x)
        x = self.fc1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.fc4(x)
        x = self.fc5(x)
        x = F.softmax(x, dim=1)
        return x

In [11]:
v_img_root = '/home/SMC_data_2101xx_EGFR_deeplearning/EGFR_deep_learning/external_validationset/external_validation/png/ct'
v_label_pth = '/home/SMC_data_2101xx_EGFR_deeplearning/EGFR_deep_learning/external_validationset/external_validation/external_validation_clinical.xlsx'

In [12]:
ex_val_dataset = v_ImageDataset(v_img_root, v_label_pth, 'val', transform = test_transforms['val'])
ex_val_loader = torch.utils.data.DataLoader(ex_val_dataset, batch_size=batch_size, shuffle=True,
                                               num_workers=0, pin_memory=True)

In [15]:
my_model = MyEffientnet_b1_clinical(512, 512, 256, 32, 16, 16, 16, 32)
my_model.cuda()

Loaded pretrained weights for efficientnet-b1


MyEffientnet_b1_clinical(
  (clinical_fc1): Linear(in_features=3, out_features=16, bias=True)
  (clinical_fc2): Linear(in_features=16, out_features=16, bias=True)
  (clinical_fc3): Linear(in_features=16, out_features=16, bias=True)
  (clinical_fc4): Linear(in_features=16, out_features=32, bias=True)
  (model): EfficientNet(
    (_conv_stem): Conv2dStaticSamePadding(
      3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
      (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
    )
    (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
    (_blocks): ModuleList(
      (0): MBConvBlock(
        (_depthwise_conv): Conv2dStaticSamePadding(
          32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
          (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
        )
        (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
        (_se_reduc

In [16]:
fname = pth_root + '/classification_checkpoint.pt'
checkpoint = torch.load(fname)
my_model.load_state_dict(checkpoint)

<All keys matched successfully>

In [17]:
class_weight = torch.FloatTensor([0.67, 0.33]).cuda()
criterion = nn.CrossEntropyLoss(class_weight)
optimizer = torch.optim.Adam(my_model.parameters(), lr=1e-4, weight_decay=1e-5) #lr = #1e-4        
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: 0.95 ** epoch) #0.95 

In [18]:
ex_val_auc_check = np.array([])
ex_val_acc_check = np.array([])

In [19]:
with torch.no_grad():
    ex_epoch_loss_val = 0.0
    ex_epoch_acc_val = 0.0
    ex_predicted_val_output = np.array([])
    ex_val_real = np.array([])
    ex_val_probability = np.array([]).reshape(0, 2)

    my_model.eval()

    for enu, (ex_validation_x_batch, ex_validation_y_batch, ex_validation_clinical_batch, p_number) in enumerate(tqdm(ex_val_loader)):
        ex_validation_x = Variable(ex_validation_x_batch).cuda()
        ex_validation_y = Variable(ex_validation_y_batch).cuda()
        ex_validation_clinical = Variable(ex_validation_clinical_batch).cuda()

        ex_validation_output = my_model(ex_validation_x, ex_validation_clinical)
        ex_validation_epoch_loss = criterion(ex_validation_output, torch.max(ex_validation_y, 1)[1])

        ex_epoch_loss_val += (ex_validation_epoch_loss.data.item() * len(ex_validation_x_batch))

        ex_pred_val = np.argmax(ex_validation_output.data.cpu().numpy(), axis = 1)
        ex_true_val = np.argmax(ex_validation_y.data.cpu().numpy(), axis = 1)
        ex_predicted_val_output = np.append(ex_predicted_val_output, ex_pred_val)
        ex_val_real = np.append(ex_val_real, ex_true_val)
        ex_val_probability = np.append(ex_val_probability, ex_validation_output.detach().data.cpu().numpy(), axis = 0)


    del ex_validation_x_batch, ex_validation_y_batch, ex_validation_output
    ex_val_loss = ex_epoch_loss_val / len(ex_val_dataset)
    ex_val_acc = len(np.where(ex_predicted_val_output == ex_val_real)[0]) / len(ex_predicted_val_output)
    ex_val_auc_score = roc_auc_score(ex_val_real, ex_val_probability[:, 1])
    ex_val_auc_check = np.append(ex_val_auc_check, ex_val_auc_score)
    ex_val_acc_check = np.append(ex_val_acc_check, ex_val_acc)


print('ex_val_auc : {:.2f} '
      'acc : {:.2f}'
      .format(ex_val_auc_score, ex_val_acc))

100%|██████████| 1/1 [00:00<00:00,  2.35it/s]

ex_val_auc : 0.48 acc : 0.60





In [None]:
test_real = ex_val_real.reshape(10, 1)
predicted_test_output = ex_predicted_val_output.reshape(10, 1)

In [None]:
test_label_output = np.append(test_real, predicted_test_output, axis = 1)

In [None]:
test_label_output_pd = pd.DataFrame(test_label_output, columns = ['real label', 'predicted label'])

In [None]:
test_label_output_pd.to_csv(save_root + '/deep_learning_output_test.csv', index = False)

In [None]:
conf_matrix = confusion_matrix(test_real, predicted_test_output)

In [None]:
conf_matrix

In [None]:
sensitivity = conf_matrix[0, 0] / conf_matrix.sum(axis = 1)[0]
specificity = conf_matrix[1, 1] / conf_matrix.sum(axis = 1)[1]

In [None]:
print('sensitivity : ', sensitivity)
print('specificity : ', specificity)

In [None]:
fpr, tpr, threshold = roc_curve(test_real.reshape(10, ), ex_val_probability[:, 0], pos_label = 0)

In [None]:
auc_score = roc_auc_score(val_real.reshape(10, ), val_probability[:, 1])

In [None]:
auc_score

In [None]:
df_prob = pd.DataFrame(val_probability, columns = ['label_0', 'label_1'])

In [None]:
df_prob.to_csv(save_root + '/deep_learning_prob_test.csv', index = False)

In [None]:
plt.plot(fpr,tpr, color = 'red', label='ROC curve (area = %0.2f)' % auc_score)
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.02])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc=4)
fig = plt.gcf()
fig.savefig(save_root + '/roc_curve.png', dpi = fig.dpi)
plt.show()