# ResNet, Regularization and DataAugmentation


These technique we will use 
* Data Normalization
* Data Augmentation
* Residual Connections
* Batch Normalization
* Learning rate scheduling
* Weight Decay
* Gradient Clipping
* Adam Optimizer

In [128]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

**1.Data Augmentation and Normalaization**

In [129]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding = 4),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)

batch_size = 128

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers = 3, pin_memory = True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers = 3, pin_memory = True )

Files already downloaded and verified
Files already downloaded and verified


In [130]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


**2. CNN model with Residual Block and  batch Normalization**

In [131]:
class CNNModelwithResidualandBatchnorm(nn.Module):
    def __init__(self):
        super().__init__()
        self.convlayer1 = nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 3, padding = 1)
        self.batchNormLayer1 = nn.BatchNorm2d(32)
        self.convlayer2 = nn.Conv2d(in_channels = 32,  out_channels = 64, kernel_size = 3, padding = 1)
        self.batchNormLayer2 = nn.BatchNorm2d(64)
        self.Maxpoollayer = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        # 1X1 Conv for matching dimenions
        self.residual_conv = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 1)
        
        self.fullyconnectedlayer = nn.Linear(64 * 16 * 16, 128)
        self.outputlayer = nn.Linear(128, 10)
        
        
    def forward(self, xb):
        out = F.relu(self.batchNormLayer1(self.convlayer1(xb)))
        #residual connection
        residual = self.residual_conv(out)
        
        out = F.relu(self.batchNormLayer2(self.convlayer2(out)))
        
        out = out + residual # adding residual conncetion
        
        out = self.Maxpoollayer(out)
        out = out.view(out.size(0), -1)
        
        out = F.relu(self.fullyconnectedlayer(out))
        out = self.outputlayer(out)
        
        return out
    # training step for each batch
    def training_step(self, batch):
        images, labels = batch
        images, labels = images.to(device), labels.to(device)
        # forward passing the images batch
        out = self.forward(images)
        # Loss claculation
        loss = F.cross_entropy(out, labels)
        # this function torch.max will take out the maximum probability index = label
        _, pred = torch.max(out, dim =1)
        
        acc = torch.tensor(torch.sum(pred == labels).item()/len(pred))
        return {'loss': loss, 'acc':acc}
    
    #function to calculate epoch matrics
    def training_epoch_end(self , outputs):
        batch_losses = [x['loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        
        batch_acc = [x['acc'] for x in outputs]
        epoch_acc = torch.stack(batch_acc).mean()
        
        return {'loss': epoch_loss.item(), 'acc': epoch_acc.item()}
    
    # print result after each epoch

    def epoch_end(self, epoch, result):
        print(f"Epoch [{epoch}], train_loss: {result['loss']:.4f}, train_acc: {result['acc']:.4f}")

In [132]:
# Evaluate the model
import time
def evaluate(model, data_loader):
    outputs = []
    with torch.no_grad():
        for batch in data_loader:
            images, labels = batch
            images, labels = images.to(device), labels.to(device)
            output = model.training_step((images, labels))
            outputs.append(output)
    return model.training_epoch_end(outputs)


# training function
def fit(epochs, lr, model, train_loader):
    history = []
    # adam optimizer with weight dcay
    optimizer = optim.Adam(model.parameters(), lr, weight_decay = 1e-4)
    # Learning Rate Scgheduling
    #schedular = optim.lr_scheduler.StepLR(optimizer, step_size= 10, gamma = 0.1)
    
    total_training_time = 0  # Initialize total training time
    for epoch in range(epochs):
        start_time = time.time()
        for batch in train_loader:
            images, labels = batch
            images , labels = images.to(device), labels.to(device)
            
            loss_dict = model.training_step((images,labels))
            loss = loss_dict['loss']
            loss.backward()
            
            #Gadient Clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm = 2.0)
            
            optimizer.step()
            optimizer.zero_grad()
            
        #schedular.step()
        result = evaluate(model, train_loader)
        model.epoch_end(epoch, result)
        history.append(result)
        
        end_time = time.time()
        epoch_time = end_time - start_time

        total_training_time += epoch_time 
    print(f"\nTotal training time: {total_training_time:.2f} seconds")
        
    return history

In [133]:
model = CNNModelwithResidualandBatchnorm().to(device)

history = fit(80, 0.001, model, train_loader)

Epoch [0], train_loss: 1.2746, train_acc: 0.5357
Epoch [1], train_loss: 1.0986, train_acc: 0.6108
Epoch [2], train_loss: 1.0259, train_acc: 0.6357
Epoch [3], train_loss: 0.9796, train_acc: 0.6540
Epoch [4], train_loss: 0.9162, train_acc: 0.6765
Epoch [5], train_loss: 0.8931, train_acc: 0.6862
Epoch [6], train_loss: 0.8607, train_acc: 0.6948
Epoch [7], train_loss: 0.8750, train_acc: 0.6918
Epoch [8], train_loss: 0.8312, train_acc: 0.7095
Epoch [9], train_loss: 0.8380, train_acc: 0.7055
Epoch [10], train_loss: 0.8072, train_acc: 0.7194
Epoch [11], train_loss: 0.7813, train_acc: 0.7256
Epoch [12], train_loss: 0.7807, train_acc: 0.7258
Epoch [13], train_loss: 0.7667, train_acc: 0.7344
Epoch [14], train_loss: 0.7478, train_acc: 0.7393
Epoch [15], train_loss: 0.7281, train_acc: 0.7456
Epoch [16], train_loss: 0.7672, train_acc: 0.7342
Epoch [17], train_loss: 0.7362, train_acc: 0.7436
Epoch [18], train_loss: 0.7345, train_acc: 0.7417
Epoch [19], train_loss: 0.7058, train_acc: 0.7547
Epoch [20]