### In this notebook we investigate a designed Resnet network on augmented image data

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

### Importing the libraries

In [2]:
import torch 

import torch.nn as nn
import torch.utils.data as Data
from torch.autograd import Function, Variable
from torch.optim import lr_scheduler

import torchvision
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn

from pathlib import Path
import os
import copy
import math
import matplotlib.pyplot as plt
import numpy as np

from datetime import datetime
import time as time

import warnings

#### Checking whether the GPU is active

In [3]:
torch.backends.cudnn.enabled

True

In [4]:
torch.cuda.is_available()

True

In [5]:
torch.cuda.init()

#### Dataset paths

In [6]:
PATH = Path("/home/saman/Saman/data/Image_Data01/")
train_path = PATH / 'train' / 'Total'
valid_path = PATH / 'valid' / 'Total'
test_path = PATH / 'test' / 'Total'

### Model parameters

In [7]:
Num_Filter1=16
Num_Filter2=64 
Ker_Sz1=5 
Ker_Sz2=5

learning_rate= 0.0001

Dropout= 0.2
BchSz= 1
EPOCH= 10

In [8]:
# Loss calculator
criterion = nn.CrossEntropyLoss()   # cross entropy loss

### Defining resnet classes

In [9]:
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        
        self.conv3 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        
        self.conv4 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn4 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(x)
        out = self.bn2(out)
        out = self.relu(out)
        
        out = self.conv3(x)
        out = self.bn3(out)
        out = self.relu(out)
        
        out = self.conv4(out)
        out = self.bn4(out)
        if self.downsample is not None:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

In [10]:
class ResNet(nn.Module):

    def __init__(self, block, layers, Num_Filter1 , Num_Filter2, Ker_Sz1, Ker_Sz2, num_classes=2):
        self.in_channels = Num_Filter2
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, out_channels=Num_Filter1, kernel_size=Ker_Sz1, stride=1,
                               padding=int((Ker_Sz1-1)/2),bias=False)
        self.bn1 = nn.BatchNorm2d(Num_Filter1)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        
        self.conv2 = nn.Conv2d(Num_Filter1, Num_Filter2, kernel_size=Ker_Sz2, stride=1,
                               padding=int((Ker_Sz2-1)/2),bias=False)
        self.bn2 = nn.BatchNorm2d(Num_Filter2)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        
        self.layer1 = self._make_layer(block, Num_Filter2, layers[0])
        self.layer2 = self._make_layer(block, Num_Filter2, layers[1], stride=1)
        
        self.maxpool = nn.MaxPool2d(7, stride=1, padding=1)
        self.fc = nn.Linear(12616704* block.expansion, num_classes)
        
        # Self initiation weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)

        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

### Freading number of parameter in our model

In [11]:
def print_num_params(model):
    TotalParam=0
    for param in list(model.parameters()):
        print("Individual parameters are:")
        nn=1
        for size in list(param.size()):
            print(size)
            nn = nn*size
        print("Total parameters: {}" .format(param.numel()))
        TotalParam += nn
    print('-' * 10)
    print("Sum of all Parameters is: {}" .format(TotalParam))

In [12]:
def get_num_params(model):
    TotalParam=0
    for param in list(model.parameters()):
        nn=1
        for size in list(param.size()):
            nn = nn*size
        TotalParam += nn
    return TotalParam

### Training and Validating

#### Training and validation function

In [13]:
def train_model(model, criterion, optimizer,  Dropout, learning_rate,  BATCHSIZE, num_epochs):
        print(str(datetime.now()).split('.')[0], "Starting training and validation...\n")
        print("====================Data and Hyperparameter Overview====================\n")
        print("Number of training examples: {} , Number of validation examples: {} \n".format(len(train_data), len(valid_data)))
              
        print("Dropout:{:,.2f}, Learning rate: {:,.5f} " 
              .format( Dropout, learning_rate ))        
        print("Batch size: {}, Number of epochs: {} " 
              .format(BATCHSIZE, num_epochs)) 
        
        print("Number of parameter in the model: {}". format(get_num_params(model)))
              
        print("================================Results...==============================\n")

        since = time.time()  #record the beginning time

        best_model = model
        best_acc = 0.0
        acc_vect =[]   

        for epoch in range(num_epochs):
            for i, (images, labels) in enumerate(train_loader):   
                images = Variable(images).cuda()
                labels = Variable(labels).cuda()

                # Forward pass
                outputs = model(images)            # model output
                loss = criterion(outputs, labels)  # cross entropy loss

                # Trying binary cross entropy
                #loss = criterion(torch.max(outputs.data, 1), labels)
                #loss = torch.nn.functional.binary_cross_entropy(outputs, labels)
                
                

                # Backward and optimize
                optimizer.zero_grad()             # clear gradients for this training step
                loss.backward()                   # backpropagation, compute gradients
                optimizer.step()                  # apply gradients

                if (i+1) % 1000 == 0:               # Reporting the loss and progress every 50 step
                    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                               .format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))

            model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)

            with torch.no_grad():
                correct = 0
                total = 0
                for images, labels in valid_loader:
                    images = Variable(images).cuda()
                    labels = Variable(labels).cuda()
                    
                    outputs = model(images)
                    _, predicted = torch.max(outputs.data, 1)

                    loss = criterion(outputs, labels)
                    loss += loss.item()

                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                epoch_loss= loss / total
                epoch_acc = 100 * correct / total
                acc_vect.append(epoch_acc)

                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model = copy.deepcopy(model)

                print('Validation accuracy and loss of the model on  {} images: {} %, {:.5f}'
                      .format(len(valid_data), 100 * correct / total, loss))

            correct = 0
            total = 0
            for images, labels in train_loader:
                images = Variable(images).cuda()
                labels = Variable(labels).cuda()
                
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)

                loss = criterion(outputs, labels)
                loss += loss.item()

                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            epoch_loss= loss / total
            epoch_acc = 100 * correct / total

            print('Train  accuracy and loss of the model on  {} images: {} %, {:.5f}'
                  .format(len(train_data), epoch_acc, loss))
            print('-' * 10)

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print('Best validation Acc: {:4f}'.format(best_acc)) 
        
        mean_acc = np.mean(acc_vect)
        print('Average accuracy on the validation {} images: {}'
              .format(len(train_data),mean_acc))
        print('-' * 10)
        return best_model, mean_acc

### Testing function

In [14]:
def test_model(model, test_loader):
    print("Starting testing...\n")
    model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)

    with torch.no_grad():
        correct = 0
        total = 0
        test_loss_vect=[]
        test_acc_vect=[]
        
        since = time.time()  #record the beginning time
        
        for i in range(10):
            
            Indx = torch.randperm(len(test_data))
            Cut=int(len(Indx)/10) # Here 10% showing the proportion of data is chosen for pooling
            indices=Indx[:Cut]            
            Sampler = Data.SubsetRandomSampler(indices)
            pooled_data =  torch.utils.data.DataLoader(test_data , batch_size=BchSz,sampler=Sampler)

            for images, labels in pooled_data:
                images = Variable(images).cuda()
                labels = Variable(labels).cuda()
                
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                
            test_loss= loss / total
            test_accuracy= 100 * correct / total
            
            test_loss_vect.append(test_loss)
            test_acc_vect.append(test_accuracy)

            
#             print('Test accuracy and loss for the {}th pool: {:.2f} %, {:.5f}'
#                   .format(i+1, test_accuracy, test_loss))
            
        
        mean_test_loss = torch.mean(torch.tensor(test_loss_vect))
        mean_test_acc = torch.mean(torch.tensor(test_acc_vect))
        std_test_acc = torch.std(torch.tensor(test_acc_vect))
        
        print('-' * 10)
        print('Average test accuracy on test data: {:.2f} %, loss: {:.5f}, Standard deviion of accuracy: {:.4f}'
              .format(mean_test_acc, mean_test_loss, std_test_acc))
        
        print('-' * 10)
        time_elapsed = time.time() - since
        print('Testing complete in {:.1f}m {:.4f}s'.format(time_elapsed // 60, time_elapsed % 60))
        
        print('-' * 10)
        
        return mean_test_acc, mean_test_loss, std_test_acc

### Augmentation

In [15]:
# Mode of transformation
transformation = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(),
    transforms.ToTensor(),
]) 
transformation2 = transforms.Compose([
    transforms.ToTensor(),  
]) 


train_data = torchvision.datasets.ImageFolder(train_path,transform=transformation)
train_loader =torch.utils.data.DataLoader(train_data, batch_size=BchSz, shuffle=True,
                                          num_workers=8)

valid_data = torchvision.datasets.ImageFolder(valid_path,transform=transformation)
valid_loader =torch.utils.data.DataLoader(valid_data, batch_size=BchSz, shuffle=True,
                                          num_workers=8)

test_data = torchvision.datasets.ImageFolder(test_path,transform=transformation2)
test_loader =torch.utils.data.DataLoader(test_data, batch_size=BchSz, shuffle=True,
                                          num_workers=8)

In [16]:
## Defining model with different variables, namely:
model = ResNet(BasicBlock, [1, 2] , Num_Filter1 , Num_Filter2, Ker_Sz1, Ker_Sz2)      
model = model.cuda()
print(model)

# Defining optimizer with variable learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
optimizer.scheduler=lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=7, stride=1, padding=1, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), paddi

In [17]:
get_num_params(model)

25704274

In [None]:
seed= [1, 3, 7, 19, 22]
#seed= [22]

val_acc_vect=[]
test_acc_vect=[]


for ii in seed: 
    torch.cuda.manual_seed(ii)
    torch.manual_seed(ii)
    
    model, val_acc= train_model(model, criterion, optimizer,  Dropout, learning_rate,  BchSz, EPOCH)
    testing = test_model (model, test_loader)
    test_acc= testing[0]
    
    
    val_acc_vect.append( val_acc )
    test_acc_vect.append(test_acc)
    
    mean_val_acc = torch.mean(torch.tensor(val_acc_vect))
    mean_test_acc = torch.mean(torch.tensor(test_acc_vect))
    
    
print('-' * 10)
print('-' * 10)
print('Average of validation accuracies on 5 different random seed: {:.2f} %, Average of testing accuracies on 5 different random seed: {:.2f} %'
      .format(mean_val_acc, mean_test_acc)) 


2020-01-08 17:58:06 Starting training and validation...


Number of training examples: 12000 , Number of validation examples: 4000 

Dropout:0.20, Learning rate: 0.00010 
Batch size: 1, Number of epochs: 10 
Number of parameter in the model: 25704274

Epoch [1/10], Step [1000/12000], Loss: 0.0000
Epoch [1/10], Step [2000/12000], Loss: 300.5869
Epoch [1/10], Step [3000/12000], Loss: 80.5591
Epoch [1/10], Step [4000/12000], Loss: 61.7886
Epoch [1/10], Step [5000/12000], Loss: 183.3763
Epoch [1/10], Step [6000/12000], Loss: 0.0000
Epoch [1/10], Step [7000/12000], Loss: 7.6211
Epoch [1/10], Step [8000/12000], Loss: 37.4562
Epoch [1/10], Step [9000/12000], Loss: 77.3143
Epoch [1/10], Step [10000/12000], Loss: 0.0000
Epoch [1/10], Step [11000/12000], Loss: 0.0000
Epoch [1/10], Step [12000/12000], Loss: 0.0000
Validation accuracy and loss of the model on  4000 images: 58.75 %, 0.00000
Train  accuracy and loss of the model on  12000 images: 61.858333333333334 %, 0.00000
----------
Epoch [2/10]

Epoch [1/10], Step [1000/12000], Loss: 0.5723
Epoch [1/10], Step [2000/12000], Loss: 0.3111
Epoch [1/10], Step [3000/12000], Loss: 0.8324
Epoch [1/10], Step [4000/12000], Loss: 1.0623
Epoch [1/10], Step [5000/12000], Loss: 0.0215
Epoch [1/10], Step [6000/12000], Loss: 0.9125
Epoch [1/10], Step [7000/12000], Loss: 0.4944
Epoch [1/10], Step [8000/12000], Loss: 0.2904
Epoch [1/10], Step [9000/12000], Loss: 0.4409
Epoch [1/10], Step [10000/12000], Loss: 0.7368
Epoch [1/10], Step [11000/12000], Loss: 0.7483
Epoch [1/10], Step [12000/12000], Loss: 0.7491
Validation accuracy and loss of the model on  4000 images: 62.775 %, 1.16847
Train  accuracy and loss of the model on  12000 images: 65.00833333333334 %, 1.03797
----------
Epoch [2/10], Step [1000/12000], Loss: 0.8921
Epoch [2/10], Step [2000/12000], Loss: 0.7256
Epoch [2/10], Step [3000/12000], Loss: 0.4678
Epoch [2/10], Step [4000/12000], Loss: 0.5304
Epoch [2/10], Step [5000/12000], Loss: 0.5473
Epoch [2/10], Step [6000/12000], Loss: 0.5