In [50]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy.misc
import os
import openslide
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import time
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models

In [51]:
device = torch.device('cpu')

In [52]:
image_transform = transforms.Compose([
        transforms.Resize((1024,1024)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])


class dataset(Dataset):
    def __init__(self, df_path, train = False):
        self.df = pd.read_csv(df_path)
        self.train = train
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        
        imgage_folder = '/scratch/cz2064/myjupyter/BDML/Project/data/'
        file_id = self.df.iloc[idx]['File ID']
        file_name = self.df.iloc[idx]['File Name']
        image_path = imgage_folder+file_id+'/'+file_name
        image = openslide.open_slide(image_path)
        image = image.get_thumbnail((2048,2048))
        image_tensor = image_transform(image)
        image.close()
        label = self.df.iloc[idx]['label']
        
        sample = {'x': image_tensor, 'y': label}
        
        return sample

In [53]:
train_df_path = './train-Copy1.csv'
val_df_path = './val-Copy1.csv'
test_df_path = './test.csv'
transformed_dataset = {'train': dataset(train_df_path, train = True),
                       'validate':dataset(val_df_path),
                       'test':dataset(test_df_path),}
bs = 8
dataloader = {x: DataLoader(transformed_dataset[x], batch_size=bs,
                        shuffle=True, num_workers=0) for x in ['train', 'validate','test']}
data_sizes ={x: len(transformed_dataset[x]) for x in ['train', 'validate','test']}

In [54]:
train_samples = iter(dataset(train_df_path, train = True))

In [55]:
image = next(train_samples)

In [56]:
image['x'].size()

torch.Size([3, 1024, 1024])

In [57]:
image['y']

2

In [58]:
def train_model(model, dataloader, optimizer, loss_fn, num_epochs = 10, verbose = True, scheduler=None,\
                best_model_wts=None,best_acc=None):
    acc_dict = {'train':[],'validate':[]}
    loss_dict = {'train':[],'validate':[]}
    if not best_acc:
        best_acc = 0
    phases = ['train','validate']
    since = time.time()
    for i in range(num_epochs):
        print('Epoch: {}/{}'.format(i, num_epochs-1))
        print('-'*10)
        for p in phases:
            running_correct = 0
            running_loss = 0
            running_total = 0
            if p == 'train':
                model.train()
            else:
                model.eval()
                
            for data in dataloader[p]:
                optimizer.zero_grad()
                image = data['x'].to(device,dtype=torch.float)
                label = data['y'].to(device,dtype=torch.long)
                output = model(image)
                loss = loss_fn(output, label)
                print(loss)
                _, preds = torch.max(output, dim = 1)
                num_imgs = image.size()[0]
                running_correct += torch.sum(preds ==label).item()
                running_loss += loss.item()*num_imgs
                running_total += num_imgs
                if p== 'train':
                    loss.backward()
                    optimizer.step()
            epoch_acc = float(running_correct/running_total)
            epoch_loss = float(running_loss/running_total)
            if verbose or (i%10 == 0):
                print('Phase:{}, epoch loss: {:.4f} Acc: {:.4f}'.format(p, epoch_loss, epoch_acc))
            
            acc_dict[p].append(epoch_acc)
            loss_dict[p].append(epoch_loss)
            if p == 'validate':
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = model.state_dict()
            else:
                if scheduler:
                    scheduler.step()
            torch.save({'epoch': i + 1,'state_dict': model.state_dict(),\
                        'best_model': best_model_wts,'best_acc':best_acc,\
                        'acc_dict':acc_dict,'loss_dict':loss_dict}, 'checkpoint.tar' )
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val acc: {:4f}'.format(best_acc))
    
    model.load_state_dict(best_model_wts)
    
    return model, acc_dict, loss_dict

In [59]:
model = models.vgg16(pretrained=True)
loss_fn = nn.CrossEntropyLoss()
#optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = optim.Adam(model.parameters(),lr=0.0001)

In [None]:
model, acc_dict, loss_dict = train_model(model, dataloader, optimizer, loss_fn, num_epochs = 50)


Epoch: 0/49
----------
tensor(8.4821, grad_fn=<NllLossBackward>)
tensor(4.2142, grad_fn=<NllLossBackward>)
tensor(3.3328, grad_fn=<NllLossBackward>)
tensor(1.7961, grad_fn=<NllLossBackward>)
tensor(2.0554, grad_fn=<NllLossBackward>)
tensor(1.1949, grad_fn=<NllLossBackward>)
tensor(4.8278, grad_fn=<NllLossBackward>)
Phase:train, epoch loss: 3.5652 Acc: 0.2800
tensor(1.5884, grad_fn=<NllLossBackward>)
tensor(1.4796, grad_fn=<NllLossBackward>)
tensor(1.9824, grad_fn=<NllLossBackward>)
Phase:validate, epoch loss: 1.6048 Acc: 0.2105
Epoch: 1/49
----------
tensor(1.1727, grad_fn=<NllLossBackward>)
