## Define dataset

In [6]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader

batch_size=100

transform=transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

cifar_path="../../Datasets/cifar10"
weight_path="../../Weights/Vgg19"

train=datasets.CIFAR10(root=cifar_path, download=True, train=True, transform=transform)


test=datasets.CIFAR10(root=cifar_path, download=True, train=False, transform=transform)
test_dl=DataLoader(test, batch_size=batch_size)
 
class_=('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


### Train, Validation split

In [7]:
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit

indices=list(range(len(train)))
values=[y for _,y in train]

s=StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_idx, val_idx in s.split(indices, values):
    print(f"#train:{len(train_idx)}")
    print(f"#val:{len(val_idx)}")

#train:40000
#val:10000


In [8]:
import collections
from torch.utils.data import Subset

val=Subset(train, val_idx)
train=Subset(train, train_idx)

val_count=collections.Counter([y for _, y in val])
train_count=collections.Counter([y for _, y in train])

print(f"train distribution:{train_count}")
print(f"validation distribution:{val_count}")

train distribution:Counter({6: 4000, 7: 4000, 8: 4000, 3: 4000, 2: 4000, 1: 4000, 4: 4000, 9: 4000, 0: 4000, 5: 4000})
validation distribution:Counter({2: 1000, 7: 1000, 1: 1000, 9: 1000, 6: 1000, 8: 1000, 0: 1000, 3: 1000, 5: 1000, 4: 1000})


In [9]:
train_dl=DataLoader(train, batch_size=batch_size)
val_dl=DataLoader(val, batch_size=batch_size)

### Define Model

In [10]:
import torch.nn as nn
# On thesis paper, initializion is done by weights with pretrained 11 layer net
# But they also mentioned  it is possible to initialize the weights without pre training by using "glorot et al 2010"

class VGG19(nn.Module):
  def __init__(self, initialization, num_classes=10):
    super(VGG19, self).__init__()

    self.initialization=initialization
    
    self.conv=nn.Sequential(
        nn.Conv2d(3, 64, kernel_size=3, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(), #64, 32, 32
        nn.Conv2d(64, 64, kernel_size=3, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(2,2), #64, 16, 16

        nn.Conv2d(64, 128, kernel_size=3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(), #128, 16, 16
        nn.Conv2d(128, 128, kernel_size=3, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(2, 2), #128, 8, 8

        nn.Conv2d(128, 256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.MaxPool2d(2, 2), #256, 4, 4

        nn.Conv2d(256, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),      
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),    
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),    
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.MaxPool2d(2, 2), #512, 2, 2 

        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),      
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),    
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),    
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.BatchNorm2d(512),
        nn.ReLU(),
        nn.MaxPool2d(2, 2), #512, 1, 1 
    )
    self.classifier=nn.Sequential(
        nn.Dropout(0.2),
        nn.Linear(512, 1000),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(1000, 500),
        nn.ReLU(),
        nn.Linear(500, num_classes)
    )
    """
    for layer in self.modules(): #Use 4 normalization
          if isinstance(layer, (nn.Conv2d or nn.BatchNorm2d or nn.Linear)):
                  if initialization == "Xavier uniform":
                      nn.init.xavier_uniform_(layer.weight.data)
                  if initialization == "Xavier normal":
                      nn.init.xavier_normal_(layer.weight.data)
                  if initialization == "He uniform":
                      nn.init.kaiming_uniform_(layer.weight.data, mode='fan_in', nonlinearity='relu')
                  if initialization == "He normal":
                      nn.init.kaiming_normal_(layer.weight.data, mode='fan_in', nonlinearity='relu')
                  if initialization == "Normal":
                      nn.init.normal_(layer.weight.data, mean=0, std=0.01)  
    """                 
  def init_name(self):
    return self.initialization
                                    
  def forward(self, x):
    output=self.conv(x)
    output=output.view(-1, 512)
    output=self.classifier(output)
    return output


In [11]:
import torch
from torchsummary import summary

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

model_xavier_uniform=VGG19("Xavier uniform").to(device)
model_xavier_normal=VGG19("Xavier normal").to(device)
model_he_uniform=VGG19("He uniform").to(device)
model_he_normal=VGG19("He normal").to(device)
model_normal=VGG19("Normal").to(device)

models=[model_xavier_uniform, model_xavier_normal, model_he_uniform, model_he_normal, model_normal]

print(summary(model_he_normal, input_size=(3,32, 32)))

cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          36,928
       BatchNorm2d-5           [-1, 64, 32, 32]             128
              ReLU-6           [-1, 64, 32, 32]               0
         MaxPool2d-7           [-1, 64, 16, 16]               0
            Conv2d-8          [-1, 128, 16, 16]          73,856
       BatchNorm2d-9          [-1, 128, 16, 16]             256
             ReLU-10          [-1, 128, 16, 16]               0
           Conv2d-11          [-1, 128, 16, 16]         147,584
      BatchNorm2d-12          [-1, 128, 16, 16]             256
             ReLU-13          [-1, 128, 16, 16]               0
        MaxPool2d-14            [-

In [12]:
model_best_acc=VGG19("He normal").to(device) 
model_state_dict=torch.load("..\..\Weights\Vgg19\He normal_27_0.0_vgg_acc.pt", map_location=device)
model_best_acc.load_state_dict(model_state_dict)

model_best_loss=VGG19("He normal").to(device)
model_state_dict=torch.load("..\..\Weights\Vgg19\He normal_27_0.0_vgg_loss.pt", map_location=device)
model_best_loss.load_state_dict(model_state_dict)

<All keys matched successfully>

### Best Accuracy test

In [22]:
running_loss=0
running_acc=0
loss_fn=nn.CrossEntropyLoss()

model_best_loss.eval()
with torch.no_grad():
    for i, data in enumerate(test_dl): 
        test_x, test_y= data
        test_x, test_y= test_x.to(device), test_y.to(device)
        
        test_pred=model_best_loss(test_x)
        loss=loss_fn(test_pred, test_y)
        _, predicted = torch.max(test_pred, 1)
        acc = (predicted == test_y).squeeze().int()
        acc = torch.sum(acc)
        
        running_loss+=loss.item()
        running_acc+=acc
        
    print(f"test loss: {running_loss/i}, test accuracy: {running_acc/len(test)*100}%")

test loss: 0.7995142750065736, test accuracy: 83.0%


### Best Loss test

In [25]:
running_loss=0
running_acc=0

model_best_acc.eval()
with torch.no_grad():
    for i, data in enumerate(test_dl): 
        test_x, test_y= data
        test_x, test_y= test_x.to(device), test_y.to(device)
        
        test_pred=model_best_acc(test_x)
        loss=loss_fn(test_pred, test_y)
        _, predicted = torch.max(test_pred, 1)
        acc = (predicted == test_y).squeeze().int()
        acc = torch.sum(acc)
        
        running_loss+=loss.item()
        running_acc+=acc
        
    print(f"test loss: {running_loss/i}, test accuracy: {running_acc/len(test)*100}%")

test loss: 0.8031363710008487, test accuracy: 82.5%
