# Training

In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils import data
import torchvision
from torchvision import transforms
from skimage.io import imread
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score
from tqdm import tqdm
import random

In [2]:
train_path = "../data/train/"

### Tansformation

In [3]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

### Load Data

In [4]:
train_labels = pd.read_csv("../data/train_labels.csv")

In [5]:
train_labels['img_path'] = train_path + train_labels[['id']] + '.tif'

In [6]:
train_labels.head()

Unnamed: 0,id,label,img_path
0,f38a6374c348f90b587e046aac6079959adf3835,0,../data/train/f38a6374c348f90b587e046aac607995...
1,c18f2d887b7ae4f6742ee445113fa1aef383ed77,1,../data/train/c18f2d887b7ae4f6742ee445113fa1ae...
2,755db6279dae599ebb4d39a9123cce439965282d,0,../data/train/755db6279dae599ebb4d39a9123cce43...
3,bc3f0c64fb968ff4a8bd33af6971ecae77c75e08,0,../data/train/bc3f0c64fb968ff4a8bd33af6971ecae...
4,068aba587a4950175d04c680d38943fd488d6a9d,0,../data/train/068aba587a4950175d04c680d38943fd...


In [7]:
total_images = train_labels.shape[0]

#### Train and Validation Data

In [8]:
train_index, validation_index = train_test_split(train_labels.index, test_size=0.2)

In [9]:
print(train_index.shape)
print(validation_index.shape)

(176020,)
(44005,)


### Pytorch Data Generator

In [10]:
class DataGenerator(data.Dataset):
    def __init__(self, dataset, transform):
        self.dataset = dataset
        self.transform = transform
        
    def __len__(self):
        return self.dataset.shape[0]
    
    def __getitem__(self, index):
        label = self.dataset['label'][index]
        
        img_raw = imread(self.dataset['img_path'][index])
        img = self.transform(img_raw)
        
        return [img, label]

## Data Loaders

In [11]:
train_data = DataGenerator(train_labels, transform=transform)

In [12]:
train_loader = data.DataLoader(train_data, batch_size=32,
                              sampler= data.SubsetRandomSampler(train_index))

In [13]:
valid_loader = data.DataLoader(train_data, batch_size=32,
                              sampler= data.SubsetRandomSampler(validation_index))

## Model

In [14]:
model = torchvision.models.resnet34(pretrained=False)

## Custom Fully connected Layer

In [15]:
model.fc = torch.nn.Sequential(
    torch.nn.Linear(in_features=512, out_features=1),
    torch.nn.Sigmoid()
)

In [16]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

#### Hyperparameters

In [17]:
epochs = 5

In [18]:
rand(0,1)

### Train Model

In [19]:
## USE GPU ##
model = model.cuda()

optimizer = torch.optim.Adam(model.parameters())
criterion = torch.nn.BCELoss()

print_every = 10

loss_sum = 0
correct_sum = 0
samples = 0
sum_precision = 0
sum_recall = 0
valid_count = 0
total_train_batch = 0

val_loss_sum = 0
val_precision_sum = 0
val_recall_sum = 0
val_samples = 0
total_val_batch = 0

train_metrics = []
valid_metrics = []

for epoch in range(epochs):
    ## Training
    batch = 1
    model.train()
    for x, y in train_loader:
        total_train_batch += 1
        x = x.cuda()
        y = y.cuda()
        y = y.view(-1, 1).float()
        
        optimizer.zero_grad()
        output = model(x)

        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        
        samples += x.shape[0]
        loss_sum += loss.item() * x.shape[0] 
        y_pred = (output >= 0.5).float()
        num_corrects = torch.sum(y_pred == y)
        correct_sum += num_corrects
  
        precision = precision_score(y.to('cpu'), y_pred.to('cpu'))
        recall = recall_score(y.to('cpu'), y_pred.to('cpu'))
        sum_precision += precision
        sum_recall += recall
        
        if batch % print_every == 0:
            train_loss = float(loss_sum)/float(samples)
            train_acc = float(correct_sum)/float(samples)
            train_prec = float(sum_precision)/float(total_train_batch)
            train_rec = float(sum_recall)/float(total_train_batch)
            train_metrics.append([epoch+1, total_train_batch, train_acc, train_prec, train_rec])
                            
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Batch {batch}.. "
                  f"Train loss: {train_loss:.3f}.. "
                  f"Train acc: {train_acc:.3f}.. "
                  f"Train avg precision: {train_prec:.3f}.. "
                  f"Train avg recall: {train_rec:.3f}.. ")
            
        batch +=1
    

            
    model.eval()
    with torch.no_grad():
        print("== Validation step")
        for x_val, y_val in valid_loader:
            total_val_batch +=1
            x_val = x_val.cuda()
            y_val = y_val.cuda()
            y_val = y_val.view(-1, 1).float()

            output_val = model(x_val)
            loss_val = criterion(output_val, y_val)
            y_val_pred = (output_val >= 0.5).float()

            val_samples += x_val.shape[0]
            val_loss_sum = loss_val.item() * x_val.shape[0]

            val_precision = precision_score(y_val.to('cpu'), y_val_pred.to('cpu'))
            val_recall = recall_score(y_val.to('cpu'), y_val_pred.to('cpu'))
            val_precision_sum += val_precision
            val_recall_sum += val_recall
            
            if total_val_batch % print_every == 0:
                val = float(val_loss_sum)/float(val_samples)
                prec = float(val_precision_sum)/float(total_val_batch)
                rec = float(val_recall_sum)/float(total_val_batch)
                
                valid_metrics.append([epoch+1, total_val_batch, val, prec, rec])
                
                print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Valid loss: {val:.3f}.. "
                      f"Valid avg presision: {prec:.3f}.. "
                      f"Valid avg recall: {rec:.3f}.. ")


    

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/5.. Batch 10.. Train loss: 0.889.. Train acc: 0.578.. Train avg precision: 0.393.. Train avg recall: 0.478.. 
Epoch 1/5.. Batch 20.. Train loss: 0.723.. Train acc: 0.650.. Train avg precision: 0.503.. Train avg recall: 0.587.. 
Epoch 1/5.. Batch 30.. Train loss: 0.654.. Train acc: 0.685.. Train avg precision: 0.592.. Train avg recall: 0.570.. 
Epoch 1/5.. Batch 40.. Train loss: 0.632.. Train acc: 0.694.. Train avg precision: 0.605.. Train avg recall: 0.612.. 
Epoch 1/5.. Batch 50.. Train loss: 0.621.. Train acc: 0.692.. Train avg precision: 0.610.. Train avg recall: 0.618.. 
Epoch 1/5.. Batch 60.. Train loss: 0.595.. Train acc: 0.710.. Train avg precision: 0.648.. Train avg recall: 0.637.. 
Epoch 1/5.. Batch 70.. Train loss: 0.577.. Train acc: 0.726.. Train avg precision: 0.667.. Train avg recall: 0.662.. 
Epoch 1/5.. Batch 80.. Train loss: 0.571.. Train acc: 0.730.. Train avg precision: 0.675.. Train avg recall: 0.658.. 
Epoch 1/5.. Batch 90.. Train loss: 0.564.. Train acc: 0.

KeyboardInterrupt: 