## Lab Course: Distributed Data Analytics
## Exercise Sheet 7


In [31]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
import torchvision
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
import torch.utils.data as data_utils

In [32]:
def fiftyprcnt(train_dataset):
    indices = torch.arange(int(0.5*len(train_dataset)))
    train_50pcnt = data_utils.Subset(train_dataset, indices)
    return train_50pcnt

def dim_calc(width,kernel_size):
    w,k,p,s=width,kernel_size,0,1
    conv_op=(w-k+2*p)/s+1   
    w=conv_op/2
    conv_op=(w-k+2*p)/s+1  
    conv_op=(conv_op-k+2*p)/s+1   
    w=conv_op/2
    return int(w)


## Network Analysis: Image Classification

**Approach**:
1) **Model creation**: Created the model "base" as per specifications gvien in the excercise. The output of the network is without applying softmax as the cross entropy loss funtion already contains it.otmax is applied while calulating accuracy. Kenel size is chosen as 3. THe number of input and output channels in convolution layers and number of neurons are chosen randomly keeping in mind of complexity of the model.  The number o neurons after flattening of the dimension of the feature caluted using a predefined funtion named "dim_calc". This function uses predefined formulas to calculate the width of the output features at each convolution layer and returns the width of the feature output of pool2 layer. 

2) To use only **50 percent of the training dataset**,"data_utils.Subset(train_dataset, indices)" is used with "indices" as a list of numbers in the range of (50 percent the total length of the actuual train data.

3) Baseline image classification without any data augmentation or normalization is performed along with other configurations below

In [3]:
class base(nn.Module):
    def __init__(self,in_ch,width,kernel):
        super(base, self).__init__()
        
        self.conv1 = nn.Conv2d(in_ch, out_channels=32, kernel_size=kernel)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(32,64,kernel)
        self.conv3 = nn.Conv2d(64,128,kernel)
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(128*dim_calc(width,kernel)**2,100)
        self.fc2 = nn.Linear(100,50)
        self.fc3 = nn.Linear(50, 10)
        self.relu=nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.pool2(x)
        x = x.view(x.size(0),-1)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        return x

## Exercise 1: Normalization Effect (CNN)

**Appraoch for data loading**

1)**Data Augmentation**: the images are flipped using "RandomHorizontalFlip" & "RandomVerticalFlip",translated and scaled and translated using "RandomAffine(degrees=0,translate=(0.1, 0.3)". 

2)**Normalization**: Each channel of the image is normalized by substracting the mean (µ) of each feature and a division by the standard deviation (σ). Referring to 
https://towardsdatascience.com/how-to-calculate-the-mean-and-standard-deviation-normalizing-datasets-in-pytorch-704bd7d05f4c on 21st June,2022.

3) All combinations of configurations are predefined for the training data using transforms.Compose(). For the test data, only normalization is added for the configuration "with normalization" and " wiht augmentation and normalization". For the configuration "with baseline" and "with augmentatiions", only basic transformations to tensor is used. To load these configurations when required, a function named "data-laoder" is created.

In [4]:
#https://www.programcreek.com/python/example/117699/torchvision.transforms.RandomAffine
augmetnations = transforms.Compose([transforms.RandomHorizontalFlip(p=0.5),transforms.RandomVerticalFlip(0.2),
                              transforms.RandomAffine(degrees=0,translate=(0.1, 0.3),scale=(1.1,1.2)),
                              transforms.ToTensor()])

augmentations_with_norm=transforms.Compose([
                              transforms.RandomHorizontalFlip(p=0.5),
                              transforms.RandomVerticalFlip(0.2),
                              transforms.RandomAffine(degrees=0,translate=(0.1, 0.3),scale=(1.1,1.2)),
                              transforms.ToTensor(),
                              transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],std=[0.247, 0.243, 0.261])])

norms=transforms.Compose([transforms.ToTensor(),
                                            transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                                             std=[0.247, 0.243, 0.261])])



basic_transforms=transforms.Compose([transforms.ToTensor()])

test_trfms_with_norm=transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                                             std=[0.247, 0.243, 0.261])])



In [5]:
config_l=["baseline","with augmetnations","with noramlization","augmentations_with_noramlization"]
def data_loader(conig):
    if config==config_l[0]:
        train_50pcnt = fiftyprcnt(datasets.CIFAR10(root='data', train=True,download=True, transform=basic_transforms))
        test_dataset = datasets.CIFAR10(root='data', train=False,download=True, transform=basic_transforms)
    elif config==config_l[1]:
        train_50pcnt = fiftyprcnt(datasets.CIFAR10(root='data', train=True,download=True, transform=augmetnations))
        test_dataset = datasets.CIFAR10(root='data', train=False,download=True, transform=basic_transforms)
    elif config==config_l[2]:
        train_50pcnt = fiftyprcnt(datasets.CIFAR10(root='data', train=True,download=True, transform=norms))
        test_dataset = datasets.CIFAR10(root='data', train=False,download=True, transform=test_trfms_with_norm)
    else:
        train_50pcnt = fiftyprcnt(datasets.CIFAR10(root='data', train=True,download=True, transform=augmentations_with_norm))
        test_dataset = datasets.CIFAR10(root='data', train=False,download=True, transform=test_trfms_with_norm)
    return train_50pcnt,test_dataset

**Learning with different configurations**. 
1) In learning, iterating through list of configrations **including baseline**, 50 percent train data and full test data are loaded and "torch.utils.data.DataLoader" is used to load the data in minibatches. 

2) **Batch size** is chosen as **128**. **Adam optimizer** with learning rate **0.001** is chosen. The "cross entropy loss" is used to back propogate on to update the weights. 

3) **Softmax** is applied on output of the model and is compared with actaul labels to get accuracies. Iterating through mininbatches of train and test data, tarin and test lossses  and accuracies are taken at each epoch and written to tensorboard.

In [6]:
def learning(config,optim,lr,kernel):
    train_dataset,test_dataset=data_loader(config)
    trainloader_CIF = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True,num_workers = 2)
    testloader_CIF = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True)
    width,in_ch=32,3
    model = base(in_ch,width,kernel)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    train_loss_epoch_l,test_loss_epoch_l=[],[]
    writer = SummaryWriter(f"conf/coniguration={config}")
    print(f"for coniguration={config}")
    for j in range(40):
        # training
        model.train()
        train_loss_h,train_pred_l,test_pred_l,test_label,train_label=0,[],[],[],[]
        for images, labels in trainloader_CIF:
            optimizer.zero_grad()
            y_hat_train = model(images)
            prob=F.softmax(y_hat_train, dim=1)
            pred=[torch.argmax(j) for j in prob]
            train_loss = criterion(y_hat_train, labels)
            train_loss_h+=train_loss.item()*len(images)
            train_pred_l=train_pred_l+pred
            train_label=train_label+list(labels)
            train_loss.backward()
            optimizer.step()
        train_loss_epoch=np.round((train_loss_h/len(train_dataset)),4)
        train_loss_epoch_l.append(train_loss_epoch)
        train_acc=np.round((np.array(train_pred_l)==np.array(train_label)).mean(),4)
        # testing
        model.eval()
        with torch.no_grad():
            test_loss_h=0
            for images, labels in testloader_CIF:
                y_hat_test = model(images)
                prob=F.softmax(y_hat_test, dim=1)
                pred=[torch.argmax(j) for j in prob]
                test_loss = criterion(y_hat_test, labels)
                test_loss_h+=test_loss.item()*len(images)
                test_pred_l=test_pred_l+pred
                test_label=test_label+list(labels)
        test_acc=np.round((np.array(test_pred_l)==np.array(test_label))).mean()
        test_loss_epoch=np.round((test_loss_h/len(test_dataset)),4)
        test_loss_epoch_l.append(test_loss_epoch)
        writer.add_scalar('Loss_CIFAR10/train', train_loss_epoch, j)
        writer.add_scalar('Loss_CIFAR10/test', test_loss_epoch, j)
        writer.add_scalar('Accuracy_CIFAR10/train', train_acc, j)
        writer.add_scalar('Accuracy_CIFAR10/test', test_acc, j)
        print(f"Epoch {j} - train_loss : {train_loss_epoch},test loss : {test_loss_epoch},train_acc : {train_acc},test acc : {test_acc}")
    print("                                                                                 ")

In [None]:
optim,kernel,lr="Adam",3,0.001
torch.manual_seed(4)
for config in config_l:
    learning(config,optim,lr,kernel)

### Exercise 2: Network Regularization (CNN)

**Approach for regularization**

**Data**
50 percent of data is taken with only baseline transformations i.e "toTensor()" as to have comparison with baseline.

**Models**
A new model named "base_drop" is created. The dropout is added to in fully connected network of the original model "base" with p=0.25. Remining network kept same. For L1 and L2 regularization, the original model without dropout i.e "base" is used. 

**Learning**
For learning with different regularization techniques, a new function called "learning_regu" is created. This funtion checks for the name of regularization. If it is "dropout", it takes the model "base_drop". Inorder to handle droputs during testing, model.train() and model.eval() is used before testing and training. If it is l1 or l2, it takes original model "base". Then while calculating the losses, 

for **l1 regularization**, loss is calcualted as below,

lamda=0.0001<br>
l1_abs = sum(p.abs().sum() for p in model.parameters())<br>
train_loss = train_loss + lamda * l1_abs<br>

for **l2 regularization**, loss is calcualted as below 

lamda=0.001<br>
l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())<br>
train_loss = train_loss + lamda * l2_norm<br>

In [8]:
#https://arxiv.org/pdf/1207.0580.pdf
# https://wandb.ai/authors/ayusht/reports/Implementing-Dropout-in-PyTorch-With-Example--VmlldzoxNTgwOTE
class base_drop(nn.Module):
    def __init__(self,in_ch,width,kernel):
        super(base_drop, self).__init__()
        
        self.conv1 = nn.Conv2d(in_ch, out_channels=32, kernel_size=kernel)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(32,64,kernel)
        self.conv3 = nn.Conv2d(64,128,kernel)
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(128*dim_calc(width,kernel)**2,100)
        self.fc2 = nn.Linear(100,50)
        self.fc3 = nn.Linear(50, 10)
        self.relu=nn.ReLU()
        self.drop_fc=nn.Dropout(p=0.25)
        
    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.pool2(x)
        x = x.view(x.size(0),-1)
        x = self.drop_fc(x)
        x = self.relu(self.fc1(x))
        x = self.drop_fc(x)
        x = self.relu(self.fc2(x))
        x = self.drop_fc(x)
        x = self.relu(self.fc3(x))
        return x

In [9]:
train_dataset = fiftyprcnt(datasets.CIFAR10(root='data', train=True,download=True, transform=basic_transforms))
test_dataset = datasets.CIFAR10(root='data', train=False,download=True, transform=basic_transforms)
trainloader_CIF = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True,num_workers = 2)
testloader_CIF = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [28]:
# https://androidkt.com/how-to-add-l1-l2-regularization-in-pytorch-loss-function/
def learning_regu(reg,optim,lr,kernel,lamda):
    width,in_ch=32,3
    if reg=="dropout":
        model = base_drop(in_ch,width,kernel)
    else:
        model = base(in_ch,width,kernel)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    train_loss_epoch_l,test_loss_epoch_l=[],[]
    writer = SummaryWriter(f"conf/regularization={reg}")
    print(f"for regularization={reg}")
    for j in range(40):
        # training
        model.train()
        train_loss_h,train_pred_l,test_pred_l,test_label,train_label=0,[],[],[],[]
        for images, labels in trainloader_CIF:
            optimizer.zero_grad()
            y_hat_train = model(images)
            prob=F.softmax(y_hat_train, dim=1)
            pred=[torch.argmax(j) for j in prob]
            train_loss = criterion(y_hat_train, labels)
            if reg=="l2":
                lamda=0.001
                l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())
                train_loss = train_loss + lamda * l2_norm
            elif reg=="l1":
                lamda=0.0001
                l1_abs = sum(p.abs().sum() for p in model.parameters())
                train_loss = train_loss + lamda * l1_abs
            train_loss_h+=train_loss.item()*len(images)
            train_pred_l=train_pred_l+pred
            train_label=train_label+list(labels)
            train_loss.backward()
            optimizer.step()
        train_loss_epoch=np.round((train_loss_h/len(train_dataset)),4)
        train_loss_epoch_l.append(train_loss_epoch)
        train_acc=np.round((np.array(train_pred_l)==np.array(train_label)).mean(),4)
        # testing
        model.eval()
        with torch.no_grad():
            test_loss_h=0
            for images, labels in testloader_CIF:
                y_hat_test = model(images)
                prob=F.softmax(y_hat_test, dim=1)
                pred=[torch.argmax(j) for j in prob]
                test_loss = criterion(y_hat_test, labels)
                test_loss_h+=test_loss.item()*len(images)
                test_pred_l=test_pred_l+pred
                test_label=test_label+list(labels)
        test_acc=np.round((np.array(test_pred_l)==np.array(test_label))).mean()
        test_loss_epoch=np.round((test_loss_h/len(test_dataset)),4)
        test_loss_epoch_l.append(test_loss_epoch)
        writer.add_scalar('Loss_CIFAR10/train', train_loss_epoch, j)
        writer.add_scalar('Loss_CIFAR10/test', test_loss_epoch, j)
        writer.add_scalar('Accuracy_CIFAR10/train', train_acc, j)
        writer.add_scalar('Accuracy_CIFAR10/test', test_acc, j)
        print(f"Epoch {j} - train_loss : {train_loss_epoch},test loss : {test_loss_epoch},train_acc : {train_acc},test acc : {test_acc}")
    print("                                                                                 ")

In [None]:
regu_l=["l2","l1","dropout"]
optim,kernel,lr,lamda="Adam",3,0.001,0.001
torch.manual_seed(4)
for reg in regu_l:
    learning_regu(reg,optim,lr,kernel,lamda)

### Exercise 3: Optimizers (CNN)

**Approach**
**Data** 50 percent of data is taken with only baseline transformations i.e "toTensor()" as to have comparison with baseline. Here baseline optimizer is also Adam with learning rate 0.001. It is alos a part of following excercise.

**Models**: the original model used for baseline i.e "base" is used.

**Learning** For learning with different optimizers i.e SGD and Adam, a new function called "learning_lr" is created. This funtion takes the name of the optimizer as argument and learns with different learning rates i.e [0.01,0.001,0.00001]. Rest of the learning methodology is same as that of the baseline model. The respective train/test losses and accuracies are recorded in tensprboard for each combination of optimzer and learning rate. The same are analyzed below.

In [15]:
def optim_sel(model,opt,lr):
    if opt=="Adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    return optimizer

In [33]:
def learning_lr(optim,kernel): 
    for lr in [0.01,0.001,0.00001]:
        width,in_ch=32,3
        model = base(in_ch,width,kernel)
        optimizer = optim_sel(model,optim,lr)
        criterion = nn.CrossEntropyLoss()
        train_loss_epoch_l,test_loss_epoch_l=[],[]
        writer = SummaryWriter(f"conf/optimizer={optim}_lr={lr}")
        print(f"for optimizer={optim}_lr={lr}")
        for j in range(40):
            # training
            model.train()
            train_loss_h,train_pred_l,test_pred_l,test_label,train_label=0,[],[],[],[]
            for images, labels in trainloader_CIF:
                optimizer.zero_grad()
                y_hat_train = model(images)
                prob=F.softmax(y_hat_train, dim=1)
                pred=[torch.argmax(j) for j in prob]
                train_loss = criterion(y_hat_train, labels)
                train_loss_h+=train_loss.item()*len(images)
                train_pred_l=train_pred_l+pred
                train_label=train_label+list(labels)
                train_loss.backward()
                optimizer.step()
            train_loss_epoch=np.round((train_loss_h/len(train_dataset)),4)
            train_loss_epoch_l.append(train_loss_epoch)
            train_acc=np.round((np.array(train_pred_l)==np.array(train_label)).mean(),4)
            # testing
            model.eval()
            with torch.no_grad():
                test_loss_h=0
                for images, labels in testloader_CIF:
                    y_hat_test = model(images)
                    prob=F.softmax(y_hat_test, dim=1)
                    pred=[torch.argmax(j) for j in prob]
                    test_loss = criterion(y_hat_test, labels)
                    test_loss_h+=test_loss.item()*len(images)
                    test_pred_l=test_pred_l+pred
                    test_label=test_label+list(labels)
            test_acc=np.round((np.array(test_pred_l)==np.array(test_label))).mean()
            test_loss_epoch=np.round((test_loss_h/len(test_dataset)),4)
            test_loss_epoch_l.append(test_loss_epoch)
            writer.add_scalar('Loss_CIFAR10/train', train_loss_epoch, j)
            writer.add_scalar('Loss_CIFAR10/test', test_loss_epoch, j)
            writer.add_scalar('Accuracy_CIFAR10/train', train_acc, j)
            writer.add_scalar('Accuracy_CIFAR10/test', test_acc, j)
            print(f"Epoch {j} - train_loss : {train_loss_epoch},test loss : {test_loss_epoch},train_acc : {train_acc},test acc : {test_acc}")
        print("                                                                                 ")

In [None]:
torch.manual_seed(4)
for optim in ["SGD","Adam"]:
    learning_lr(optim,kernel)