In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as utils
from torchvision import transforms, models
import pretrainedmodels
from torchvision.datasets import ImageFolder
from sklearn.metrics import confusion_matrix, roc_curve, auc
from sklearn.model_selection import StratifiedKFold
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torch.nn.functional as F
import time
import os
import copy

In [2]:
is_cuda = torch.cuda.is_available()
is_cuda

True

In [3]:
def extract_patches_2d(img,patch_shape,step=[1.0,1.0],batch_first=False):
    patch_H, patch_W = patch_shape[0], patch_shape[1]
    if(img.size(2)<patch_H):
        num_padded_H_Top = (patch_H - img.size(2))//2
        num_padded_H_Bottom = patch_H - img.size(2) - num_padded_H_Top
        padding_H = nn.ConstantPad2d((0,0,num_padded_H_Top,num_padded_H_Bottom),0)
        img = padding_H(img)
    if(img.size(3)<patch_W):
        num_padded_W_Left = (patch_W - img.size(3))//2
        num_padded_W_Right = patch_W - img.size(3) - num_padded_W_Left
        padding_W = nn.ConstantPad2d((num_padded_W_Left,num_padded_W_Right,0,0),0)
        img = padding_W(img)
    step_int = [0,0]
    step_int[0] = int(patch_H*step[0]) if(isinstance(step[0], float)) else step[0]
    step_int[1] = int(patch_W*step[1]) if(isinstance(step[1], float)) else step[1]
    patches_fold_H = img.unfold(2, patch_H, step_int[0])
    if((img.size(2) - patch_H) % step_int[0] != 0):
        patches_fold_H = torch.cat((patches_fold_H,img[:,:,-patch_H:,].permute(0,1,3,2).unsqueeze(2)),dim=2)
    patches_fold_HW = patches_fold_H.unfold(3, patch_W, step_int[1])   
    if((img.size(3) - patch_W) % step_int[1] != 0):
        patches_fold_HW = torch.cat((patches_fold_HW,patches_fold_H[:,:,:,-patch_W:,:].permute(0,1,2,4,3).unsqueeze(3)),dim=3)
    patches = patches_fold_HW.permute(2,3,0,1,4,5)
    patches = patches.reshape(-1,img.size(0),img.size(1),patch_H,patch_W)
    if(batch_first):
        patches = patches.permute(1,0,2,3,4)
    return patches

# Utiles

In [4]:
def misimshow(inp,name,index):
    inp = inp.transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    plt.title('misclassified as %s' %(name))
    plt.savefig('D:/capstone/split_patient/S1/Output/misclassified/1fold/%d.jpg'%(index))

def num_of_label(loader):
    global n
    label_list = []
    n=0
    while n < len(loader):
        if loader == trainloader.dataset:
            set = 'Train set'
            label_lists = np.array(train[n][1])
            label_list = np.append(label_list,label_lists)
            n+=1
        elif loader == validloader.dataset:
            set = 'Valid set'
            label_lists = np.array(valid[n][1])
            label_list = np.append(label_list,label_lists)
            n+=1
        elif loader == testloader.dataset:
            set = 'Test set'
            label_lists = np.array(test[n][1])
            label_list = np.append(label_list,label_lists)
            n+=1
    unique, counts = np.unique(label_list, return_counts=True)
    print('{} : {}'.format(set,dict(zip(unique, counts))))
    
def result_graph():
    plt.figure(1)
    
    plt.subplot(2,1,1)
    plt.plot(range(1,len(train_accuracy)+1),train_accuracy,'b',label = 'train accuracy')
    plt.plot(range(1,len(val_accuracy)+1),val_accuracy,'r',label = 'valid accuracy')
    plt.xlabel('epoch')
    plt.ylabel('acc')
    plt.title('Acc Curve')
    plt.legend()
    plt.subplots_adjust(hspace=0.7)
    
    plt.subplot(2,1,2)
    plt.plot(range(1,len(train_losses)+1),train_losses,'b',label = 'train loss')
    plt.plot(range(1,len(val_losses)+1),val_losses,'r',label = 'valid loss')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.title('Loss Curve')
    plt.legend()

class EarlyStopping():
    def __init__(self, patience=0, verbose=0):
        self._step = 0
        self._loss = float('inf')
        self.patience  = patience
        self.verbose = verbose
 
    def validate(self, loss):
        if self._loss < loss:
            self._step += 1
            if self._step > self.patience:
                if self.verbose:
                    print('Training process is stopped early....')
                return True
        else:
            self._step = 0
            self._loss = loss
 
        return False

def confmat(loader):
    volatile=True
    running_correct = 0
    nb_classes = 2
    index = 0
    up_sample = nn.UpsamplingBilinear2d(size=(478, 478))
    roc_max_diff, roc_target = [],[]
    #테스트셋의 사이즈에 따라 100,2(human_set) 또는 200,2(Test_set)로 설정
    average_output = torch.zeros(200,2)
    average_output = average_output.cuda()
    all_target = []
    
    confusion_matrix = torch.zeros(nb_classes, nb_classes)
   # x = 0일떄 Model 3, x = 1일때 Model 2
    for x in range(2):
        alpha = 0
        if x == 0:
            model.load_state_dict(torch.load('s3_1fold_1reapeated.pt'))
        else:
            model.load_state_dict(torch.load('s4_1fold_1reapeated.pt'))
        model.eval()
        with torch.no_grad():
            for data,target in loader:
                #x=0과 x=1의 데이터셋은 같으므로 결과 값을 한번만 저장 
                if x == 0:
                    all_target = np.append(all_target,target)
                inputs,target = data.cpu(),target.cpu()
                if loader == testloader:
                    if is_cuda:
                    #원본에 대한 결과값
                        inputs_original = data.cuda()
                    inputs_original = Variable(inputs_original)
                    outputs_original = model(inputs_original)
                    outputs_original = torch.sigmoid(outputs_original)

                    inputs = up_sample(inputs)
                    img = extract_patches_2d(inputs,[299,299],step=[0.6,0.6],batch_first=True)
                    length = len(target)
                    if is_cuda:
                        img,target = img.cuda(),target.cuda()
                    img , target = Variable(img),Variable(target)
                    for i in range(length):
                        outputs = model(img[i])
                        outputs = torch.sigmoid(outputs)

                    #원본 + 패치들의 값들을 더하기
                        for j in range(4):
                            outputs_original[i,0] += outputs[j,0]
                            outputs_original[i,1] += outputs[j,1]
                       #원본과 패치들의 값을 다 더한후 평균 내주기     
                        outputs_original[i] = outputs_original[i]/5
                        #x = 0일떄 임의의 변수에 확률 값들을 저장
                        if x == 0:
                            average_output[alpha,0] += outputs_original[i,0]
                            average_output[alpha,1] += outputs_original[i,1]
                        #x = 1일때 기존에 저장한 변수에 확률 값들을 더하고 평균내줌
                        else:
                            average_output[alpha,0] = (average_output[alpha,0] + outputs_original[i,0])/2
                            average_output[alpha,1] = (average_output[alpha,1] + outputs_original[i,1])/2
                            roc_max_diff = np.append(roc_max_diff,average_output[alpha,1])   
                        alpha += 1
                        #평균낸 값에서 멜라노마인 부분의 score를 쌓음
    target = torch.from_numpy(all_target)
    target = target.type(torch.LongTensor)
    target = target.cuda()
    _, preds = torch.max(average_output, 1)
    load_preds = preds.cpu()
    load_preds = load_preds.numpy()
    #np.savetxt(output_path + "load_preds.csv", load_preds)
    
    #preds,target,inputs = preds.cpu(),target.cpu(),inputs.cpu()
    #mpreds,mtarget,minputs = preds.numpy(),target.numpy(),inputs.numpy()
    #for m in range(len(mpreds)):
     #   index = index +1
      #  o = (mpreds[m]==mtarget[m]).astype(np.float32)
       # if o != 1:
        #    mis = mpreds[m]
         #   if mis != 0: 
          #      name = 'melanoma'
           # else: 
            #    name = 'benign'        
            #misimshow(minputs[m],name,index)
    running_correct += preds.eq(target.data.view_as(preds)).cpu().sum() 
    roc_target = np.append(roc_target,target)
            
    if loader == testloader:   
        for t, p in zip(target.view(-1), preds.view(-1)):
            confusion_matrix[t.long(), p.long()] += 1
    accuracy = 100. * running_correct/len(loader.dataset)

    s = [['TN','FP'], ['FN', 'TP']]
    for i in range(2):
        for j in range(2):
            s[i][j] = confusion_matrix[i][j]
    TN,FP,FN,TP = s[0][0],s[0][1],s[1][0],s[1][1]
    PE = ((TP+FN)/(len(loader.dataset)))*((TP+FP)/(len(loader.dataset)))+((FP+TN)/(len(loader.dataset)))*((FN+TN)/(len(loader.dataset)))
    print(confusion_matrix)
    print(confusion_matrix.diag()/confusion_matrix.sum(1))
    print('TP = {}, FP = {}, TN = {}, FN = {}'.format(TP,FP,TN,FN))
    print('Specifity = {:.4f}, Sensitivity = {:.4f}'.format(TN/(TN+FP),TP/(TP+FN)))
    print('F1 score = {:.4f}'.format(TP/(TP+(FN+FP)/2)))
    if loader == testloader:
        print('Test Acc = {:.4f}'.format(accuracy))
        accuracy = accuracy.type(torch.FloatTensor)
        PE = PE.type(torch.FloatTensor)
        Kappa = (0.01*accuracy-PE)/(1.0-PE)
        Kappa = Kappa.type(torch.FloatTensor)
        print('cohens kappa = {:.4f}'.format(Kappa))
        fpr, tpr, _ = roc_curve(roc_target,roc_max_diff)
        roc_auc = auc(fpr,tpr)
        #np.save(output_path + "fpr5.npy", fpr)
        #np.save(output_path + "tpr5.npy", tpr)
        #np.save(output_path + "auc5.npy", roc_auc)
        #np.save(output_path + "roc_target5.npy", roc_target)
        #np.save(output_path + "roc_max_diff5.npy", roc_max_diff)

# Network

In [5]:
fine_tune = False
model = models.resnet50(pretrained=True)

if not fine_tune:
    for parameter in model.parameters():
        parameter.requires_grad = False

n_features = model.fc.in_features
model.dropout = nn.Dropout(p=0.5)
model.fc = nn.Linear(n_features, 2)
##for vgg
#model.classifier[-1] = nn.Linear(in_features=4096, out_features=2)

if torch.cuda.is_available():
    model = model.cuda()

In [6]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [7]:
learning_rate = 0.1
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=learning_rate,momentum=0.9,nesterov=True)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [8]:
def fit(epoch,model,data_loader,phase='train',volatile=False):
    if phase == 'train':
        exp_lr_scheduler.step()
        model.train()
    if phase == 'valid':
        model.eval()

    running_loss = 0.0
    running_correct = 0
    up_sample = nn.UpsamplingBilinear2d(size=(478, 478))
    for batch_idx , (data,target) in enumerate(data_loader):
        inputs,target = data.cpu(),target.cpu()
        if phase == 'valid':
            with torch.no_grad():
                if is_cuda:
            #원본에 대한 결과값
                    inputs_original = data.cuda()
                inputs_original = Variable(inputs_original)
                outputs_original = model(inputs_original)
                outputs_original = torch.sigmoid(outputs_original)
                inputs = up_sample(inputs)
                img = extract_patches_2d(inputs,[299,299],step=[0.6,0.6],batch_first=True)
                length = len(target)
                if is_cuda:
                    img,target = img.cuda(),target.cuda()
                img , target = Variable(img),Variable(target)
                for i in range(length):
                    outputs = model(img[i])
                    outputs = torch.sigmoid(outputs)
                    #print(outputs.size())
                   
                #원본 + 패치들의 값들을 더하기
                    for j in range(4):
                        outputs_original[i,0] += outputs[j,0]
                        outputs_original[i,1] += outputs[j,1]
                        
                    outputs_original[i] = outputs_original[i]/5
                    
                loss = criterion(outputs_original,target)  
                running_loss += loss.data.item()
                _, preds = torch.max(outputs_original, 1)
                running_correct += preds.eq(target.data.view_as(preds)).cpu().sum() 
        
        if phase == 'train':
            if is_cuda:
                inputs,target = data.cuda(),target.cuda()
            inputs , target = Variable(inputs,volatile),Variable(target)
            optimizer.zero_grad()
            
         
            output = model(inputs)
            loss = criterion(output,target)  
            running_loss += loss.data.item()
            preds = output.data.max(dim=1,keepdim=True)[1]
            running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()
        if phase == 'train':
            loss.backward()
            optimizer.step()
    
    loss = running_loss/len(data_loader.dataset)
    accuracy = 100. * running_correct/len(data_loader.dataset)
    
    print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, loss, accuracy))
    return loss,accuracy

# Load Data & Train

# Test

In [9]:
batch_size = 16
num_epochs = 400

output_path = 'D:/capstone/split_patient/S5/Output/ROC/'
transform = transforms.Compose([transforms.Resize((299,299))
                                       ,transforms.ToTensor()
                                       ,transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

transforms = transforms.Compose([transforms.Resize((478,478))
                                       ,transforms.ToTensor()
                                       ,transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

#train = ImageFolder('D:/capstone/split_patient/S1/data/4fold/train',transform=transform)
#trainloader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=5, pin_memory=True)

valid = ImageFolder('D:/capstone/split_patient/S1/data/4fold/valid',transform=transforms)
validloader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=True, num_workers=5, pin_memory=True)

test = ImageFolder('D:/capstone/split_patient/raw data/test',transform=transforms)
testloader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, num_workers=5, pin_memory=True)
#print(test.samples)
#num_of_label(testloader)
confmat(testloader)



tensor([[94.,  6.],
        [10., 90.]])
tensor([0.9400, 0.9000])
TP = 90.0, FP = 6.0, TN = 94.0, FN = 10.0
Specifity = 0.9400, Sensitivity = 0.9000
F1 score = 0.9184
Test Acc = 92.0000
cohens kappa = 0.8400
