In [32]:
import sys
import torch
import torchvision
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import torchvision.transforms as transforms
import os
import cv2
from torch.utils.data import DataLoader, TensorDataset, Dataset

%load_ext autoreload
%autoreload 2
# %matplotlib inline
sys.path.append("/home/shiyi/gpu/")

from gpu_allocation import set_gpu
num_gpu = 1
set_gpu(num_gpu)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Found 8 GPU(s) only 6 gpu below threshold
Using GPU 7


In [48]:
#hyperparameters 

IMG_SIZE = 224
BATCH_SIZE = 128
CLASSES = 8
EPOCH = 10

IMG_MEAN = [0.485, 0.456, 0.406]
IMG_STD = [0.229, 0.224, 0.225]

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

LR = 0.001

In [15]:
#dir and path

csv_path = '/data/AlgProj/tct_yaoms/data/tct_0702_preprocess_iou0.1_img_series/iou0.1dataaumentation.csv'
csv = pd.read_csv(csv_path)
csv = csv.loc[csv['label']!=1].reset_index(drop=True)
len(csv)

3457

In [16]:
#train test dataset split
#train val transforms definition

def train_test_split(df, test_size, shuffle=True):
    if shuffle:
        df = df.sample(frac=1).reset_index(drop=True)
    #split
    length = len(df)
    threspoint = int((1 - test_size)*length)
    train_df = df.loc[:threspoint-1,:]
    test_df = df.loc[threspoint:,:]
    
    d = {}
    d['train'] = train_df
    d['test'] = test_df.reset_index(drop=True)
    return d

train_transformer = transforms.Compose([transforms.Resize((256,256)),
                                 transforms.RandomRotation((-180, 180)),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.RandomResizedCrop((224,224)),  
                                 transforms.ToTensor(),
                                       transforms.Normalize(IMG_MEAN,IMG_STD)]
                                )
test_transformer = transforms.Compose([transforms.Resize((256,256)),
                                  transforms.CenterCrop((224,224)),
                                 transforms.ToTensor(),
                                      transforms.Normalize(IMG_MEAN,IMG_STD)])
train_test_transformer = {'train':train_transformer, 'test':test_transformer}

In [17]:
train_test_dict = train_test_split(csv, 0.2)
len(train_test_dict['test'])

692

In [6]:
class TCT_Dataset(Dataset):
    def __init__(self, phase, transforms=True):
        self.phase = phase
        self.transforms = transforms
        if phase == 'train':
            self.df = train_test_dict['train']
        else:
            self.df = train_test_dict['test']
        
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        img_path = self.df.loc[idx, 'img_path']
        img = Image.open(img_path).convert('RGB')
        label = self.df.loc[idx, 'label']
        
        if self.transforms:
            if self.phase == 'train':
                transformer = train_test_transformer['train']
            else:
                transformer = train_test_transformer['test']

            img = transformer(img)
        
        return (img, label)

In [104]:
train_dataset = TCT_Dataset('train')
test_dataset = TCT_Dataset('test')

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [8]:
from torchvision.models import resnet18
model = resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

fc_features = model.fc.in_features
model.fc = torch.nn.Linear(fc_features, CLASSES)

model.to(DEVICE)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [36]:
optimizer = optim.SGD([{'params':model.fc.parameters(), 'lr':0.004},
                      {'params':model.layer4.parameters(), 'lr':0.001}])
#                        {'params':model.layer4.parameters(),'lr':0.00001}
criterion = nn.CrossEntropyLoss()

In [10]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

    print ('Train Epoch: {}\t Loss: {:.6f}\n'.format(epoch,loss.item()))

    
def test(model, device, test_loader, optimizer):
    model.eval()
    test_loss = 0
    correct = 0
#     pred_result = []
    with torch.no_grad():
        for i,data in enumerate(test_loader):          
            x,y= data
            x=x.to(device)
            y=y.to(device)
            optimizer.zero_grad()
            y_hat = model(x)
            test_loss = criterion(y_hat, y).item() # sum up batch loss
            pred = y_hat.max(1, keepdim=True)[1] # get the index of the max log-probability
            
            correct += pred.eq(y.view_as(pred)).sum().item()
#     test_loss /= len(test_loader.dataset)
    acc = 100. * correct / len(train_test_dict['test'])
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(train_test_dict['test']),
        acc))
    
#     print('-----------------------')
    return acc

In [53]:
max_acc = 0
max_epoch = 0
save_path = '/data/AlgProj/tct_yaoms/model/resnet-18_exclude_ascus_0716.pth'
for epoch in range(EPOCH):
    train(model, DEVICE, train_loader, optimizer, epoch)
    acc = test(model, DEVICE, test_loader, optimizer)
    if acc >= max_acc:
        max_acc = acc
        max_epoch = epoch
        if os.path.exists(save_path):
            os.remove(save_path)
        torch.save(model, save_path)
    print('current best acc: ', max_acc)
    print('-----------------------')
print('Result: at {} epoch, achieved best acc: {:.0f}'.format(max_epoch, max_acc))

Train Epoch: 0	 Loss: 0.424450


Test set: Average loss: 0.4064, Accuracy: 579/692 (84%)

current best acc:  83.67052023121387
-----------------------
Train Epoch: 1	 Loss: 0.366303


Test set: Average loss: 0.4202, Accuracy: 584/692 (84%)

current best acc:  84.39306358381504
-----------------------
Train Epoch: 2	 Loss: 0.451552


Test set: Average loss: 0.2491, Accuracy: 582/692 (84%)

current best acc:  84.39306358381504
-----------------------
Train Epoch: 3	 Loss: 0.346555


Test set: Average loss: 0.2896, Accuracy: 581/692 (84%)

current best acc:  84.39306358381504
-----------------------
Train Epoch: 4	 Loss: 0.339532


Test set: Average loss: 0.3629, Accuracy: 580/692 (84%)

current best acc:  84.39306358381504
-----------------------
Train Epoch: 5	 Loss: 0.303298


Test set: Average loss: 0.3134, Accuracy: 580/692 (84%)

current best acc:  84.39306358381504
-----------------------
Train Epoch: 6	 Loss: 0.303017


Test set: Average loss: 0.2757, Accuracy: 584/692 (84%)

curr

In [38]:
test_dataset.df.head()

Unnamed: 0,img_name,label,img_path
0,303152_1.jpg,3,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
1,302246_1.jpg,3,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
2,3481aaf5ac7642699d3151ebdb8c8520_59_0.jpg,0,/data/AlgProj/tct_yaoms/data/neg/3481aaf5ac764...
3,304823_36.jpg,6,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
4,285806_0.jpg,2,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...


In [54]:
help(test_loader)

Help on DataLoader in module torch.utils.data.dataloader object:

class DataLoader(builtins.object)
 |  Data loader. Combines a dataset and a sampler, and provides
 |  single- or multi-process iterators over the dataset.
 |  
 |  Arguments:
 |      dataset (Dataset): dataset from which to load the data.
 |      batch_size (int, optional): how many samples per batch to load
 |          (default: ``1``).
 |      shuffle (bool, optional): set to ``True`` to have the data reshuffled
 |          at every epoch (default: ``False``).
 |      sampler (Sampler, optional): defines the strategy to draw samples from
 |          the dataset. If specified, ``shuffle`` must be False.
 |      batch_sampler (Sampler, optional): like sampler, but returns a batch of
 |          indices at a time. Mutually exclusive with :attr:`batch_size`,
 |          :attr:`shuffle`, :attr:`sampler`, and :attr:`drop_last`.
 |      num_workers (int, optional): how many subprocesses to use for data
 |          loading. 0 

In [105]:
y_pred =[]
y_true = []
for i,data in enumerate(test_loader):
    x, y = data
    y = np.array(y.cpu())
    y_true.append(y)
    
    x = x.to(DEVICE)
    y_hat = model(x)
    pred = y_hat.max(1, keepdim=True)[1]
    pred = np.array(pred.cpu())
    y_pred.append(pred)

In [108]:
y_pred_list
# y_true = np.array(y_true)

[3,
 3,
 0,
 6,
 2,
 0,
 6,
 6,
 2,
 2,
 3,
 2,
 3,
 2,
 2,
 2,
 0,
 3,
 7,
 0,
 0,
 3,
 0,
 0,
 0,
 2,
 7,
 7,
 7,
 7,
 0,
 6,
 7,
 6,
 2,
 2,
 0,
 7,
 0,
 0,
 6,
 2,
 0,
 0,
 0,
 6,
 4,
 2,
 0,
 6,
 0,
 0,
 0,
 3,
 0,
 3,
 2,
 4,
 7,
 7,
 7,
 4,
 3,
 3,
 6,
 2,
 2,
 7,
 4,
 2,
 3,
 0,
 4,
 2,
 2,
 3,
 2,
 6,
 0,
 2,
 0,
 2,
 2,
 0,
 0,
 7,
 2,
 6,
 6,
 2,
 7,
 0,
 0,
 7,
 7,
 0,
 0,
 7,
 0,
 2,
 2,
 7,
 6,
 7,
 0,
 2,
 2,
 3,
 2,
 7,
 2,
 7,
 0,
 3,
 3,
 4,
 2,
 2,
 6,
 7,
 7,
 6,
 3,
 7,
 0,
 6,
 0,
 7,
 2,
 3,
 3,
 3,
 3,
 6,
 7,
 3,
 7,
 2,
 6,
 2,
 0,
 0,
 2,
 3,
 7,
 3,
 6,
 2,
 2,
 4,
 3,
 2,
 2,
 0,
 4,
 6,
 0,
 4,
 6,
 0,
 0,
 6,
 6,
 0,
 2,
 6,
 0,
 0,
 2,
 2,
 2,
 3,
 0,
 6,
 6,
 2,
 0,
 0,
 6,
 2,
 2,
 0,
 7,
 6,
 0,
 2,
 0,
 4,
 4,
 0,
 2,
 0,
 0,
 3,
 3,
 6,
 6,
 6,
 6,
 7,
 0,
 6,
 0,
 0,
 3,
 3,
 0,
 0,
 0,
 0,
 2,
 0,
 3,
 2,
 7,
 2,
 7,
 0,
 0,
 2,
 2,
 6,
 0,
 2,
 3,
 2,
 0,
 0,
 7,
 2,
 0,
 6,
 0,
 7,
 7,
 6,
 3,
 3,
 0,
 0,
 4,
 3,
 7,
 2,
 0,
 2,
 0,
 0,
 3,
 6,


In [106]:
y_pred_list = []
y_true_list = []
for i in range(len(y_pred)):
    y_pred_list += list(y_pred[i].ravel())
for i in range(len(y_true)):
    y_true_list += list(y_true[i].ravel())

In [95]:

import numpy as np
# import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels


def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = classes[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

#     fig, ax = plt.subplots()
#     im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
#     ax.figure.colorbar(im, ax=ax)
#     # We want to show all ticks...
#     ax.set(xticks=np.arange(cm.shape[1]),
#            yticks=np.arange(cm.shape[0]),
#            # ... and label them with the respective list entries
#            xticklabels=classes, yticklabels=classes,
#            title=title,
#            ylabel='True label',
#            xlabel='Predicted label')

#     # Rotate the tick labels and set their alignment.
#     plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
#              rotation_mode="anchor")

#     # Loop over data dimensions and create text annotations.
#     fmt = '.2f' if normalize else 'd'
#     thresh = cm.max() / 2.
#     for i in range(cm.shape[0]):
#         for j in range(cm.shape[1]):
#             ax.text(j, i, format(cm[i, j], fmt),
#                     ha="center", va="center",
#                     color="white" if cm[i, j] > thresh else "black")
#     fig.tight_layout()
#     return ax


In [96]:
plot_confusion_matrix(y_true_list, y_pred_list, classes=np.array(['0','1','2', '3', '4', '5', '6', '7']))

Confusion matrix, without normalization
[[246   0   2   0   0   0]
 [  0  81   9  11   3   0]
 [  0   7  68   1   6   0]
 [  0  61   1  25   5   0]
 [  1   0   2   2  76   1]
 [  0   1   0   0   2  81]]


In [125]:
import random
def show_mis_cls_picture(y_true, y_pred, test_dataset, cls, mis_cls):
    mis_list = []
    right_list = []
    correct_img_path =[]
    mis_cls_img_path = []
    
    for idx, y in enumerate(zip(y_true, y_pred)):
        i, j = y
        if (i == j) and (j == cls):
            right_list.append(idx)
        if (i == cls) and (j == mis_cls):
            mis_list.append(idx)
    
    df = test_dataset.df
#     figure1, ax1 = plt.subplots(2,5)
#     plt.title('cls correct')
    for i in range(10):
        idx = right_list[i]
        img_path = df.loc[idx, 'img_path']
        correct_img_path.append(img_path)
#         img = Image.open(img_path)
#         img.show()
#         ax1[i].imshow(img)
#     plt.show()
    print('-----------------')
    
    choice = random.sample(mis_list, 20)
#     figure2, ax2 = plt.subplots(4,5)
#     plt.title('mis cls')
    for i in range(20):
        idx = choice[i]
        img_path = df.loc[idx, 'img_path']
        mis_cls_img_path.append(img_path)
#         img = Image.open(img_path)
#         img.show()
#         ax2[i].imshow(img)
#     plt.show()
    return correct_img_path, mis_cls_img_path

In [102]:
test_dataset.df

Unnamed: 0,img_name,label,img_path
0,303152_1.jpg,3,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
1,302246_1.jpg,3,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
2,3481aaf5ac7642699d3151ebdb8c8520_59_0.jpg,0,/data/AlgProj/tct_yaoms/data/neg/3481aaf5ac764...
3,304823_36.jpg,6,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
4,285806_0.jpg,2,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
5,c51d01dbe09b45ca868f18d10f15e8a0_226_0.jpg,0,/data/AlgProj/tct_yaoms/data/neg/c51d01dbe09b4...
6,286454_2.jpg,4,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
7,302231_32.jpg,6,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
8,304918_0.jpg,2,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...
9,305325_0.jpg,4,/data/AlgProj/tct_yaoms/data/tct_0702_preproce...


In [126]:
path1, path2 = show_mis_cls_picture(y_true_list, y_pred_list, test_dataset, 4, 2)

-----------------


In [138]:
path = path1 + path2
df = pd.DataFrame()
df['img_path'] = path
df.loc[:9, 'cls'] = 1
df.loc[10:, 'cls'] = 0

In [140]:
df.to_csv('/data/AlgProj/tct_yaoms/model/resnet18_0716_error_analysis.csv')