In [1]:
import pickle

LeNet5, AlexNet, VGG, ResNet18, SENet18 and GoogleNet Architrctures are defined

**LeNet5 ARCHITECTURE**

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class LeNet5(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

**AlexNet ARCHITECTURE**

In [None]:
import torch.nn as nn

class AlexNet(nn.Module):
    def __init__(self, num_classes=10, input_channels=3):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

**VGG ARCHITECTURE**

In [2]:
import torch.nn as nn

class VGG(nn.Module):
    def __init__(self, num_classes=10, input_channels=3):
        super(VGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(input_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


**ResNet18 ARCHITECTURE**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicBlock(nn.Module):
    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet18(nn.Module):
    def __init__(self, num_classes=10, input_channels=3):
        super(ResNet18, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(64, 2, stride=1)
        self.layer2 = self._make_layer(128, 2, stride=2)
        self.layer3 = self._make_layer(256, 2, stride=2)
        self.layer4 = self._make_layer(512, 2, stride=2)
        self.linear = nn.Linear(512, num_classes)

    def _make_layer(self, planes, blocks, stride):
        strides = [stride] + [1]*(blocks-1)
        layers = []
        for stride in strides:
            layers.append(BasicBlock(self.in_planes, planes, stride))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.max_pool2d(out, kernel_size=3, stride=2, padding=1)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, (1,1))
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

**SENet18 ARCHITECTURE**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SEBlock(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(SEBlock, self).__init__()
        self.fc1 = nn.Linear(in_channels, in_channels // reduction, bias=False)
        self.fc2 = nn.Linear(in_channels // reduction, in_channels, bias=False)

    def forward(self, x):
        b, c, _, _ = x.size()
        y = F.adaptive_avg_pool2d(x, 1).view(b, c)
        y = F.relu(self.fc1(y))
        y = torch.sigmoid(self.fc2(y)).view(b, c, 1, 1)
        return x * y.expand_as(x)

class SEBasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, reduction=16):
        super(SEBasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.se = SEBlock(planes, reduction)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class SENet18(nn.Module):
    def __init__(self, num_classes=10, input_channels=3):
        super(SENet18, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(SEBasicBlock, 64, 2, stride=1)
        self.layer2 = self._make_layer(SEBasicBlock, 128, 2, stride=2)
        self.layer3 = self._make_layer(SEBasicBlock, 256, 2, stride=2)
        self.layer4 = self._make_layer(SEBasicBlock, 512, 2, stride=2)
        self.linear = nn.Linear(512 * SEBasicBlock.expansion, num_classes)

    def _make_layer(self, block, planes, blocks, stride):
        strides = [stride] + [1] * (blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.max_pool2d(out, kernel_size=3, stride=2, padding=1)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.adaptive_avg_pool2d(out, 1)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

**GoogleNet ARCHITECTURE**

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Inception(nn.Module):
    def __init__(self, in_channels, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj):
        super(Inception, self).__init__()
        self.branch1 = nn.Conv2d(in_channels, ch1x1, kernel_size=1)

        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, ch3x3red, kernel_size=1),
            nn.Conv2d(ch3x3red, ch3x3, kernel_size=3, padding=1)
        )

        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels, ch5x5red, kernel_size=1),
            nn.Conv2d(ch5x5red, ch5x5, kernel_size=5, padding=2)
        )

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, pool_proj, kernel_size=1)
        )

    def forward(self, x):
        branch1 = self.branch1(x)
        branch2 = self.branch2(x)
        branch3 = self.branch3(x)
        branch4 = self.branch4(x)
        outputs = branch1, branch2, branch3, branch4
        return torch.cat(outputs, dim=1)

class GoogleNet(nn.Module):
    def __init__(self, num_classes=10, input_channels=3, aux_logits=True):
        super(GoogleNet, self).__init__()
        self.aux_logits = aux_logits
        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=1)
        self.conv3 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
        self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)

        if aux_logits:
            self.aux1 = InceptionAux(512, num_classes)
            self.aux2 = InceptionAux(528, num_classes)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=0.4)
        self.fc = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.maxpool2(x)

        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.maxpool3(x)

        x = self.inception4a(x)
        if self.aux_logits and self.training:
            aux1 = self.aux1(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        if self.aux_logits and self.training:
            aux2 = self.aux2(x)
        x = self.inception4e(x)
        x = self.maxpool4(x)

        x = self.inception5a(x)
        x = self.inception5b(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)

        if self.aux_logits and self.training:
            return aux1, aux2, x
        return x

class InceptionAux(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(InceptionAux, self).__init__()
        self.conv = nn.Conv2d(in_channels, 128, kernel_size=1)
        self.fc1 = nn.Linear(128*4*4, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        self.dropout = nn.Dropout(0.7)

    def forward(self, x):
        x = F.adaptive_avg_pool2d(x, (4, 4))
        x = self.conv(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x), inplace=True)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

**DOWNLOADING DATA**

In [3]:
import torch
import torchvision
from torchvision import datasets
import torchvision.transforms as transforms

def get_datasets():
    transform_mnist = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    transform_cifar = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    mnist_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform_mnist)
    mnist_test = datasets.MNIST(root='./data', train=False, download=True, transform=transform_mnist)

    fmnist_train = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform_mnist)
    fmnist_test = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform_mnist)

    cifar10_train = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_cifar)
    cifar10_test = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cifar)

    return {
        'MNIST': (mnist_train, mnist_test),
        'FMNIST': (fmnist_train, fmnist_test),
        'CIFAR-10': (cifar10_train, cifar10_test)
    }


In [4]:
#Function call for getting the data
data = get_datasets()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:02<00:00, 4525360.77it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 56913.30it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1076482.08it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4632910.69it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 10449857.49it/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 166197.30it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:02<00:00, 1726050.85it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 6224351.97it/s]


Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:18<00:00, 9098514.18it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [5]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


**USER DEFINED FUNCTION FOR TRAINING AND EVALUATING THE MODEL**

In [11]:
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import precision_score, recall_score, f1_score

def training_and_evaluating(model, train_loader, test_loader, num_epochs):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=0.001)
  scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

  train_loss_list = []

  for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
      images, labels = images.to(device), labels.to(device)
      optimizer.zero_grad()
      outputs = model(images)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()
      running_loss += loss.item()

    scheduler.step()

    #getting the loss curve
    train_loss = running_loss / len(train_loader)
    train_loss_list.append(train_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}')

  # Test the model
  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for images, labels in test_loader:
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

  #calculating accuracy
  accuracy = 100 * correct / total

  #other evaluation metrics
  precision = precision_score(labels.cpu(), predicted.cpu(),average='macro')
  recall = recall_score(labels.cpu(), predicted.cpu(),average='macro')
  f1 = f1_score(labels.cpu(), predicted.cpu(),average='macro')

  print(f'\nAccuracy: {accuracy:.2f}%')
  print(f'Precision: {precision:.2f}')
  print(f'Recall: {recall:.2f}')
  print(f'F1-score: {f1:.2f}')

  metrics = {'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1-score':f1 }

  return train_loss_list, metrics

In [6]:
#training and evaluating GoogleNet
#weighted loss is calculated for original and auxiliary outputs
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import precision_score, recall_score, f1_score

def training_and_evaluating_GoogleNet(model, train_loader, test_loader, num_epochs):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=0.001)
  scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

  train_loss_list = []

  for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
      images, labels = images.to(device), labels.to(device)
      optimizer.zero_grad()
      aux1, aux2, outputs = model(images)
      loss = criterion(outputs, labels)
      aux_loss_1 = criterion(aux1, labels)
      aux_loss_2 = criterion(aux2, labels)
      total_loss = loss + 0.3 * aux_loss_1 + 0.3 * aux_loss_2
      total_loss.backward()
      optimizer.step()
      running_loss += total_loss.item()

    scheduler.step()

    #getting the loss curve
    train_loss = running_loss / len(train_loader)
    train_loss_list.append(train_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}')

  # Test the model
  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for images, labels in test_loader:
      images, labels = images.to(device), labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

  #calculating accuracy
  accuracy = 100 * correct / total

  #other evaluation metrics
  precision = precision_score(labels.cpu(), predicted.cpu(),average='macro')
  recall = recall_score(labels.cpu(), predicted.cpu(),average='macro')
  f1 = f1_score(labels.cpu(), predicted.cpu(),average='macro')

  print(f'\nAccuracy: {accuracy:.2f}%')
  print(f'Precision: {precision:.2f}')
  print(f'Recall: {recall:.2f}')
  print(f'F1-score: {f1:.2f}')

  metrics = {'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1-score':f1 }

  return train_loss_list, metrics

**MNIST DATA**

In [7]:
#Data_loader for MNIST
from torch.utils.data import DataLoader
train_loader = DataLoader(data['MNIST'][0], batch_size=64, shuffle=True)
test_loader = DataLoader(data['MNIST'][1], batch_size=1000, shuffle=False)

**Function call for each model**

In [None]:
#parameters of training_and_evaluating fumction --> model, train_loader, test_loader, num_epochs

model_1 = LeNet5(num_classes=10).to(device)
m_train_loss_list_1, m_metrics_1 = training_and_evaluating(model_1, train_loader, test_loader, 20)
m_metrics_1['Dataset']='MNIST'
m_metrics_1['Model name']='LeNet5'
pickle.dump(m_train_loss_list_1, open("m_train_loss_list_1.p", "wb"))
pickle.dump(m_metrics_1, open("m_metrics_1.p", "wb"))

Epoch [1/20], Loss: 0.1955
Epoch [2/20], Loss: 0.0504
Epoch [3/20], Loss: 0.0337
Epoch [4/20], Loss: 0.0248
Epoch [5/20], Loss: 0.0186
Epoch [6/20], Loss: 0.0146
Epoch [7/20], Loss: 0.0118
Epoch [8/20], Loss: 0.0098
Epoch [9/20], Loss: 0.0086
Epoch [10/20], Loss: 0.0077
Epoch [11/20], Loss: 0.0070
Epoch [12/20], Loss: 0.0066
Epoch [13/20], Loss: 0.0063
Epoch [14/20], Loss: 0.0062
Epoch [15/20], Loss: 0.0060
Epoch [16/20], Loss: 0.0059
Epoch [17/20], Loss: 0.0058
Epoch [18/20], Loss: 0.0058
Epoch [19/20], Loss: 0.0057
Epoch [20/20], Loss: 0.0057

Accuracy: 99.25%
Precision: 1.00
Recall: 1.00
F1-score: 1.00


In [None]:
model_2 = AlexNet(num_classes=10).to(device)
m_train_loss_list_2, m_metrics_2 = training_and_evaluating(model_2, train_loader, test_loader, 20)
m_metrics_2['Dataset']='MNIST'
m_metrics_2['Model name']='AlexNet'
pickle.dump(m_train_loss_list_2, open("m_train_loss_list_2.p", "wb"))
pickle.dump(m_metrics_2, open("m_metrics_2.p", "wb"))

Epoch [1/20], Loss: 0.4093
Epoch [2/20], Loss: 0.0884
Epoch [3/20], Loss: 0.0474
Epoch [4/20], Loss: 0.0295
Epoch [5/20], Loss: 0.0156
Epoch [6/20], Loss: 0.0095
Epoch [7/20], Loss: 0.0048
Epoch [8/20], Loss: 0.0033
Epoch [9/20], Loss: 0.0017
Epoch [10/20], Loss: 0.0008
Epoch [11/20], Loss: 0.0005
Epoch [12/20], Loss: 0.0004
Epoch [13/20], Loss: 0.0003
Epoch [14/20], Loss: 0.0002
Epoch [15/20], Loss: 0.0002
Epoch [16/20], Loss: 0.0003
Epoch [17/20], Loss: 0.0001
Epoch [18/20], Loss: 0.0001
Epoch [19/20], Loss: 0.0001
Epoch [20/20], Loss: 0.0001

Accuracy: 99.29%
Precision: 1.00
Recall: 1.00
F1-score: 1.00


In [None]:
model_3 = VGG(num_classes=10).to(device)
m_train_loss_list_3, m_metrics_3 = training_and_evaluating(model_3, train_loader, test_loader, 20)
m_metrics_3['Dataset']='MNIST'
m_metrics_3['Model name']='VGG'
pickle.dump(m_train_loss_list_3, open("m_train_loss_list_3.p", "wb"))
pickle.dump(m_metrics_3, open("m_metrics_3.p", "wb"))

Epoch [1/20], Loss: 2.3021
Epoch [2/20], Loss: 2.3015
Epoch [3/20], Loss: 2.3013
Epoch [4/20], Loss: 2.3013
Epoch [5/20], Loss: 2.3013
Epoch [6/20], Loss: 2.3012
Epoch [7/20], Loss: 2.3012
Epoch [8/20], Loss: 2.3012
Epoch [9/20], Loss: 2.3012
Epoch [10/20], Loss: 2.3012
Epoch [11/20], Loss: 2.3012
Epoch [12/20], Loss: 2.3012
Epoch [13/20], Loss: 2.3012
Epoch [14/20], Loss: 2.3012
Epoch [15/20], Loss: 2.3012
Epoch [16/20], Loss: 2.3012
Epoch [17/20], Loss: 2.3012
Epoch [18/20], Loss: 2.3012
Epoch [19/20], Loss: 2.3012
Epoch [20/20], Loss: 2.3012

Accuracy: 11.35%
Precision: 0.01
Recall: 0.10
F1-score: 0.02


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
model_4 = ResNet18(num_classes=10).to(device)
m_train_loss_list_4, m_metrics_4 = training_and_evaluating(model_4, train_loader, test_loader, 20)
m_metrics_4['Dataset']='MNIST'
m_metrics_4['Model name']='ResNet18'
pickle.dump(m_train_loss_list_4, open("m_train_loss_list_4.p", "wb"))
pickle.dump(m_metrics_4, open("m_metrics_4.p", "wb"))

Epoch [1/20], Loss: 0.1344
Epoch [2/20], Loss: 0.0513
Epoch [3/20], Loss: 0.0333
Epoch [4/20], Loss: 0.0222
Epoch [5/20], Loss: 0.0162
Epoch [6/20], Loss: 0.0110
Epoch [7/20], Loss: 0.0072
Epoch [8/20], Loss: 0.0050
Epoch [9/20], Loss: 0.0034
Epoch [10/20], Loss: 0.0020
Epoch [11/20], Loss: 0.0014
Epoch [12/20], Loss: 0.0011
Epoch [13/20], Loss: 0.0008
Epoch [14/20], Loss: 0.0008
Epoch [15/20], Loss: 0.0005
Epoch [16/20], Loss: 0.0004
Epoch [17/20], Loss: 0.0004
Epoch [18/20], Loss: 0.0003
Epoch [19/20], Loss: 0.0003
Epoch [20/20], Loss: 0.0003

Accuracy: 99.50%
Precision: 1.00
Recall: 1.00
F1-score: 1.00


In [None]:
model_5 = SENet18(num_classes=10).to(device)
m_train_loss_list_5, m_metrics_5 = training_and_evaluating(model_5, train_loader, test_loader, 20)
m_metrics_5['Dataset']='MNIST'
m_metrics_5['Model name']='SENet18'
pickle.dump(m_train_loss_list_5, open("m_train_loss_list_5.p", "wb"))
pickle.dump(m_metrics_5, open("m_metrics_5.p", "wb"))

Epoch [1/20], Loss: 0.1150
Epoch [2/20], Loss: 0.0440
Epoch [3/20], Loss: 0.0294
Epoch [4/20], Loss: 0.0206
Epoch [5/20], Loss: 0.0138
Epoch [6/20], Loss: 0.0095
Epoch [7/20], Loss: 0.0062
Epoch [8/20], Loss: 0.0039
Epoch [9/20], Loss: 0.0029
Epoch [10/20], Loss: 0.0018
Epoch [11/20], Loss: 0.0012
Epoch [12/20], Loss: 0.0008
Epoch [13/20], Loss: 0.0007
Epoch [14/20], Loss: 0.0006
Epoch [15/20], Loss: 0.0005
Epoch [16/20], Loss: 0.0004
Epoch [17/20], Loss: 0.0004
Epoch [18/20], Loss: 0.0003
Epoch [19/20], Loss: 0.0003
Epoch [20/20], Loss: 0.0003

Accuracy: 99.45%
Precision: 1.00
Recall: 0.99
F1-score: 1.00


In [None]:
model_6 = GoogleNet(num_classes=10).to(device)
m_train_loss_list_6, m_metrics_6 = training_and_evaluating_GoogleNet(model_6, train_loader, test_loader, 20)
m_metrics_6['Dataset']='MNIST'
m_metrics_6['Model name']='GoogleNet'
pickle.dump(m_train_loss_list_6, open("m_train_loss_list_6.p", "wb"))
pickle.dump(m_metrics_6, open("m_metrics_6.p", "wb"))

Epoch [1/20], Loss: 0.4807
Epoch [2/20], Loss: 0.1030
Epoch [3/20], Loss: 0.0628
Epoch [4/20], Loss: 0.0376
Epoch [5/20], Loss: 0.0223
Epoch [6/20], Loss: 0.0143
Epoch [7/20], Loss: 0.0058
Epoch [8/20], Loss: 0.0031
Epoch [9/20], Loss: 0.0008
Epoch [10/20], Loss: 0.0001
Epoch [11/20], Loss: 0.0001
Epoch [12/20], Loss: 0.0000
Epoch [13/20], Loss: 0.0000
Epoch [14/20], Loss: 0.0000
Epoch [15/20], Loss: 0.0000
Epoch [16/20], Loss: 0.0000
Epoch [17/20], Loss: 0.0000
Epoch [18/20], Loss: 0.0000
Epoch [19/20], Loss: 0.0000
Epoch [20/20], Loss: 0.0000

Accuracy: 99.55%
Precision: 1.00
Recall: 1.00
F1-score: 1.00


**FASHION MNIST DATA**

In [None]:
#Data_loader for FASHION MNIST
from torch.utils.data import DataLoader
train_loader = DataLoader(data['FMNIST'][0], batch_size=64, shuffle=True)
test_loader = DataLoader(data['FMNIST'][1], batch_size=1000, shuffle=False)

**Function call for each model**

In [None]:
#parameters of training_and_evaluating fumction --> model, train_loader, test_loader, num_epochs

model_1 = LeNet5(num_classes=10).to(device)
f_train_loss_list_1, f_metrics_1 = training_and_evaluating(model_1, train_loader, test_loader, 20)
f_metrics_1['Dataset']='FASHION MNIST'
f_metrics_1['Model name']='LeNet5'
pickle.dump(f_train_loss_list_1, open("f_train_loss_list_1.p", "wb"))
pickle.dump(f_metrics_1, open("f_metrics_1.p", "wb"))

Epoch [1/20], Loss: 0.5674
Epoch [2/20], Loss: 0.3654
Epoch [3/20], Loss: 0.3168
Epoch [4/20], Loss: 0.2900
Epoch [5/20], Loss: 0.2699
Epoch [6/20], Loss: 0.2578
Epoch [7/20], Loss: 0.2470
Epoch [8/20], Loss: 0.2403
Epoch [9/20], Loss: 0.2350
Epoch [10/20], Loss: 0.2314
Epoch [11/20], Loss: 0.2287
Epoch [12/20], Loss: 0.2272
Epoch [13/20], Loss: 0.2258
Epoch [14/20], Loss: 0.2249
Epoch [15/20], Loss: 0.2242
Epoch [16/20], Loss: 0.2238
Epoch [17/20], Loss: 0.2235
Epoch [18/20], Loss: 0.2232
Epoch [19/20], Loss: 0.2230
Epoch [20/20], Loss: 0.2230

Accuracy: 89.72%
Precision: 0.89
Recall: 0.89
F1-score: 0.89


In [None]:
model_2 = AlexNet(num_classes=10).to(device)
f_train_loss_list_2, f_metrics_2 = training_and_evaluating(model_2, train_loader, test_loader, 20)
f_metrics_2['Dataset']='FASHION MNIST'
f_metrics_2['Model name']='AlexNet'
pickle.dump(f_train_loss_list_2, open("f_train_loss_list_2.p", "wb"))
pickle.dump(f_metrics_2, open("f_metrics_2.p", "wb"))

  return F.conv2d(input, weight, bias, self.stride,


Epoch [1/20], Loss: 0.7550
Epoch [2/20], Loss: 0.4043
Epoch [3/20], Loss: 0.3343
Epoch [4/20], Loss: 0.2874
Epoch [5/20], Loss: 0.2516
Epoch [6/20], Loss: 0.2206
Epoch [7/20], Loss: 0.1936
Epoch [8/20], Loss: 0.1700
Epoch [9/20], Loss: 0.1537
Epoch [10/20], Loss: 0.1384
Epoch [11/20], Loss: 0.1280
Epoch [12/20], Loss: 0.1206
Epoch [13/20], Loss: 0.1141
Epoch [14/20], Loss: 0.1103
Epoch [15/20], Loss: 0.1078
Epoch [16/20], Loss: 0.1055
Epoch [17/20], Loss: 0.1040
Epoch [18/20], Loss: 0.1040
Epoch [19/20], Loss: 0.1019
Epoch [20/20], Loss: 0.1021

Accuracy: 89.88%
Precision: 0.90
Recall: 0.91
F1-score: 0.90


In [None]:
model_3 = VGG(num_classes=10).to(device)
f_train_loss_list_3, f_metrics_3 = training_and_evaluating(model_3, train_loader, test_loader, 20)
f_metrics_3['Dataset']='FASHION MNIST'
f_metrics_3['Model name']='VGG'
pickle.dump(f_train_loss_list_3, open("f_train_loss_list_3.p", "wb"))
pickle.dump(f_metrics_3, open("f_metrics_3.p", "wb"))

Epoch [1/20], Loss: 2.3084
Epoch [2/20], Loss: 2.3028
Epoch [3/20], Loss: 2.3027
Epoch [4/20], Loss: 2.3027
Epoch [5/20], Loss: 2.3027
Epoch [6/20], Loss: 2.3026
Epoch [7/20], Loss: 2.3026
Epoch [8/20], Loss: 2.3026
Epoch [9/20], Loss: 2.3026
Epoch [10/20], Loss: 2.3026
Epoch [11/20], Loss: 2.3026
Epoch [12/20], Loss: 2.3026
Epoch [13/20], Loss: 2.3026
Epoch [14/20], Loss: 2.3026
Epoch [15/20], Loss: 2.3026
Epoch [16/20], Loss: 2.3026
Epoch [17/20], Loss: 2.3026
Epoch [18/20], Loss: 2.3026
Epoch [19/20], Loss: 2.3026
Epoch [20/20], Loss: 2.3026

Accuracy: 10.00%
Precision: 0.01
Recall: 0.10
F1-score: 0.02


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
model_4 = ResNet18(num_classes=10).to(device)
f_train_loss_list_4, f_metrics_4 = training_and_evaluating(model_4, train_loader, test_loader, 20)
f_metrics_4['Dataset']='FASHION MNIST'
f_metrics_4['Model name']='ResNet18'
pickle.dump(f_train_loss_list_4, open("f_train_loss_list_4.p", "wb"))
pickle.dump(f_metrics_4, open("f_metrics_4.p", "wb"))

Epoch [1/20], Loss: 0.4332
Epoch [2/20], Loss: 0.2959
Epoch [3/20], Loss: 0.2452
Epoch [4/20], Loss: 0.2089
Epoch [5/20], Loss: 0.1776
Epoch [6/20], Loss: 0.1478
Epoch [7/20], Loss: 0.1178
Epoch [8/20], Loss: 0.0937
Epoch [9/20], Loss: 0.0710
Epoch [10/20], Loss: 0.0550
Epoch [11/20], Loss: 0.0416
Epoch [12/20], Loss: 0.0324
Epoch [13/20], Loss: 0.0255
Epoch [14/20], Loss: 0.0202
Epoch [15/20], Loss: 0.0182
Epoch [16/20], Loss: 0.0155
Epoch [17/20], Loss: 0.0137
Epoch [18/20], Loss: 0.0127
Epoch [19/20], Loss: 0.0125
Epoch [20/20], Loss: 0.0122

Accuracy: 92.60%
Precision: 0.92
Recall: 0.93
F1-score: 0.92


In [None]:
model_5 = SENet18(num_classes=10).to(device)
f_train_loss_list_5, f_metrics_5 = training_and_evaluating(model_5, train_loader, test_loader, 20)
f_metrics_5['Dataset']='FASHION MNIST'
f_metrics_5['Model name']='SENet18'
pickle.dump(f_train_loss_list_5, open("f_train_loss_list_5.p", "wb"))
pickle.dump(f_metrics_5, open("f_metrics_5.p", "wb"))

Epoch [1/20], Loss: 0.4144
Epoch [2/20], Loss: 0.2777
Epoch [3/20], Loss: 0.2303
Epoch [4/20], Loss: 0.1927
Epoch [5/20], Loss: 0.1588
Epoch [6/20], Loss: 0.1279
Epoch [7/20], Loss: 0.0999
Epoch [8/20], Loss: 0.0772
Epoch [9/20], Loss: 0.0576
Epoch [10/20], Loss: 0.0448
Epoch [11/20], Loss: 0.0351
Epoch [12/20], Loss: 0.0276
Epoch [13/20], Loss: 0.0214
Epoch [14/20], Loss: 0.0187
Epoch [15/20], Loss: 0.0159
Epoch [16/20], Loss: 0.0147
Epoch [17/20], Loss: 0.0135
Epoch [18/20], Loss: 0.0126
Epoch [19/20], Loss: 0.0120
Epoch [20/20], Loss: 0.0117

Accuracy: 92.15%
Precision: 0.93
Recall: 0.93
F1-score: 0.93


In [None]:
model_6 = GoogleNet(num_classes=10).to(device)
f_train_loss_list_6, f_metrics_6 = training_and_evaluating_GoogleNet(model_6, train_loader, test_loader, 20)
f_metrics_6['Dataset']='FASHION MNIST'
f_metrics_6['Model name']='GoogleNet'
pickle.dump(f_train_loss_list_6, open("f_train_loss_list_6.p", "wb"))
pickle.dump(f_metrics_6, open("f_metrics_6.p", "wb"))

  return F.conv2d(input, weight, bias, self.stride,


Epoch [1/20], Loss: 1.0181
Epoch [2/20], Loss: 0.5377
Epoch [3/20], Loss: 0.4240
Epoch [4/20], Loss: 0.3400
Epoch [5/20], Loss: 0.2625
Epoch [6/20], Loss: 0.1923
Epoch [7/20], Loss: 0.1295
Epoch [8/20], Loss: 0.0788
Epoch [9/20], Loss: 0.0415
Epoch [10/20], Loss: 0.0183
Epoch [11/20], Loss: 0.0086
Epoch [12/20], Loss: 0.0033
Epoch [13/20], Loss: 0.0015
Epoch [14/20], Loss: 0.0009
Epoch [15/20], Loss: 0.0008
Epoch [16/20], Loss: 0.0006
Epoch [17/20], Loss: 0.0005
Epoch [18/20], Loss: 0.0004
Epoch [19/20], Loss: 0.0003
Epoch [20/20], Loss: 0.0003

Accuracy: 91.24%
Precision: 0.92
Recall: 0.92
F1-score: 0.92


**CIFAR-10 DATA**

In [7]:
#Data_loader for CIFAR-10 DATA
from torch.utils.data import DataLoader
train_loader = DataLoader(data['CIFAR-10'][0], batch_size=64, shuffle=True)
test_loader = DataLoader(data['CIFAR-10'][1], batch_size=1000, shuffle=False)

**Function call for each model**

In [None]:
#parameters of training_and_evaluating fumction --> model, train_loader, test_loader, num_epochs

model_1 = LeNet5(num_classes=10).to(device)
c_train_loss_list_1, c_metrics_1 = training_and_evaluating(model_1, train_loader, test_loader, 100)
c_metrics_1['Dataset']='CIFAR-10'
c_metrics_1['Model name']='LeNet5'
pickle.dump(c_train_loss_list_1, open("c_train_loss_list_1.p", "wb"))
pickle.dump(c_metrics_1, open("c_metrics_1.p", "wb"))

Epoch [1/100], Loss: 1.6264
Epoch [2/100], Loss: 1.3515
Epoch [3/100], Loss: 1.2498
Epoch [4/100], Loss: 1.1845
Epoch [5/100], Loss: 1.1404
Epoch [6/100], Loss: 1.1083
Epoch [7/100], Loss: 1.0871
Epoch [8/100], Loss: 1.0720
Epoch [9/100], Loss: 1.0605
Epoch [10/100], Loss: 1.0525
Epoch [11/100], Loss: 1.0477
Epoch [12/100], Loss: 1.0438
Epoch [13/100], Loss: 1.0412
Epoch [14/100], Loss: 1.0390
Epoch [15/100], Loss: 1.0375
Epoch [16/100], Loss: 1.0364
Epoch [17/100], Loss: 1.0360
Epoch [18/100], Loss: 1.0352
Epoch [19/100], Loss: 1.0348
Epoch [20/100], Loss: 1.0354
Epoch [21/100], Loss: 1.0348
Epoch [22/100], Loss: 1.0345
Epoch [23/100], Loss: 1.0347
Epoch [24/100], Loss: 1.0346
Epoch [25/100], Loss: 1.0344
Epoch [26/100], Loss: 1.0343
Epoch [27/100], Loss: 1.0342
Epoch [28/100], Loss: 1.0341
Epoch [29/100], Loss: 1.0346
Epoch [30/100], Loss: 1.0345
Epoch [31/100], Loss: 1.0341
Epoch [32/100], Loss: 1.0348
Epoch [33/100], Loss: 1.0344
Epoch [34/100], Loss: 1.0346
Epoch [35/100], Loss: 1

In [None]:
model_2 = AlexNet(num_classes=10).to(device)
c_train_loss_list_2, c_metrics_2= training_and_evaluating(model_2, train_loader, test_loader, 100)
c_metrics_2['Dataset']='CIFAR-10'
c_metrics_2['Model name']='AlexNet'
pickle.dump(c_train_loss_list_2, open("c_train_loss_list_2.p", "wb"))
pickle.dump(c_metrics_2, open("c_metrics_2.p", "wb"))

  return F.conv2d(input, weight, bias, self.stride,


Epoch [1/100], Loss: 1.8890
Epoch [2/100], Loss: 1.5239
Epoch [3/100], Loss: 1.3117
Epoch [4/100], Loss: 1.1658
Epoch [5/100], Loss: 1.0356
Epoch [6/100], Loss: 0.9273
Epoch [7/100], Loss: 0.8302
Epoch [8/100], Loss: 0.7452
Epoch [9/100], Loss: 0.6752
Epoch [10/100], Loss: 0.6229
Epoch [11/100], Loss: 0.5862
Epoch [12/100], Loss: 0.5580
Epoch [13/100], Loss: 0.5381
Epoch [14/100], Loss: 0.5248
Epoch [15/100], Loss: 0.5142
Epoch [16/100], Loss: 0.5083
Epoch [17/100], Loss: 0.5033
Epoch [18/100], Loss: 0.5004
Epoch [19/100], Loss: 0.4988
Epoch [20/100], Loss: 0.4951
Epoch [21/100], Loss: 0.4935
Epoch [22/100], Loss: 0.4937
Epoch [23/100], Loss: 0.4944
Epoch [24/100], Loss: 0.4935
Epoch [25/100], Loss: 0.4918
Epoch [26/100], Loss: 0.4928
Epoch [27/100], Loss: 0.4930
Epoch [28/100], Loss: 0.4937
Epoch [29/100], Loss: 0.4914
Epoch [30/100], Loss: 0.4915
Epoch [31/100], Loss: 0.4925
Epoch [32/100], Loss: 0.4929
Epoch [33/100], Loss: 0.4920
Epoch [34/100], Loss: 0.4920
Epoch [35/100], Loss: 0

In [None]:
model_3 = VGG(num_classes=10).to(device)
c_train_loss_list_3, c_metrics_3 = training_and_evaluating(model_3, train_loader, test_loader, 20)
c_metrics_3['Dataset']='CIFAR-10'
c_metrics_3['Model name']='VGG'
pickle.dump(c_train_loss_list_3, open("c_train_loss_list_3.p", "wb"))
pickle.dump(c_metrics_3, open("c_metrics_3.p", "wb"))

Epoch [1/20], Loss: 2.3032
Epoch [2/20], Loss: 2.3028
Epoch [3/20], Loss: 2.3028
Epoch [4/20], Loss: 2.3027
Epoch [5/20], Loss: 2.3027
Epoch [6/20], Loss: 2.3026
Epoch [7/20], Loss: 2.3026
Epoch [8/20], Loss: 2.3026
Epoch [9/20], Loss: 2.3026
Epoch [10/20], Loss: 2.3026
Epoch [11/20], Loss: 2.3026
Epoch [12/20], Loss: 2.3026
Epoch [13/20], Loss: 2.3026
Epoch [14/20], Loss: 2.3026
Epoch [15/20], Loss: 2.3026
Epoch [16/20], Loss: 2.3026
Epoch [17/20], Loss: 2.3026
Epoch [18/20], Loss: 2.3026
Epoch [19/20], Loss: 2.3026
Epoch [20/20], Loss: 2.3026

Accuracy: 10.00%
Precision: 0.01
Recall: 0.10
F1-score: 0.02


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
model_4 = ResNet18(num_classes=10).to(device)
c_train_loss_list_4, c_metrics_4= training_and_evaluating(model_4, train_loader, test_loader, 100)
c_metrics_4['Dataset']='CIFAR-10'
c_metrics_4['Model name']='ResNet18'
pickle.dump(c_train_loss_list_4, open("c_train_loss_list_4.p", "wb"))
pickle.dump(c_metrics_4, open("c_metrics_4.p", "wb"))

Epoch [1/100], Loss: 1.3605
Epoch [2/100], Loss: 0.9384
Epoch [3/100], Loss: 0.7409
Epoch [4/100], Loss: 0.5750
Epoch [5/100], Loss: 0.4349
Epoch [6/100], Loss: 0.2928
Epoch [7/100], Loss: 0.1809
Epoch [8/100], Loss: 0.1015
Epoch [9/100], Loss: 0.0542
Epoch [10/100], Loss: 0.0316
Epoch [11/100], Loss: 0.0196
Epoch [12/100], Loss: 0.0130
Epoch [13/100], Loss: 0.0096
Epoch [14/100], Loss: 0.0089
Epoch [15/100], Loss: 0.0070
Epoch [16/100], Loss: 0.0074
Epoch [17/100], Loss: 0.0063
Epoch [18/100], Loss: 0.0053
Epoch [19/100], Loss: 0.0055
Epoch [20/100], Loss: 0.0044
Epoch [21/100], Loss: 0.0050
Epoch [22/100], Loss: 0.0040
Epoch [23/100], Loss: 0.0043
Epoch [24/100], Loss: 0.0047
Epoch [25/100], Loss: 0.0041
Epoch [26/100], Loss: 0.0045
Epoch [27/100], Loss: 0.0046
Epoch [28/100], Loss: 0.0041
Epoch [29/100], Loss: 0.0039
Epoch [30/100], Loss: 0.0046
Epoch [31/100], Loss: 0.0041
Epoch [32/100], Loss: 0.0047
Epoch [33/100], Loss: 0.0047
Epoch [34/100], Loss: 0.0045
Epoch [35/100], Loss: 0

In [None]:
model_5 = SENet18(num_classes=10).to(device)
c_train_loss_list_5, c_metrics_5 = training_and_evaluating(model_5, train_loader, test_loader, 100)
c_metrics_5['Dataset']='CIFAR-10'
c_metrics_5['Model name']='SENet18'
pickle.dump(c_train_loss_list_5, open("c_train_loss_list_5.p", "wb"))
pickle.dump(c_metrics_5, open("c_metrics_5.p", "wb"))

Epoch [1/100], Loss: 1.3065
Epoch [2/100], Loss: 0.8839
Epoch [3/100], Loss: 0.6735
Epoch [4/100], Loss: 0.5081
Epoch [5/100], Loss: 0.3483
Epoch [6/100], Loss: 0.2128
Epoch [7/100], Loss: 0.1157
Epoch [8/100], Loss: 0.0637
Epoch [9/100], Loss: 0.0353
Epoch [10/100], Loss: 0.0196
Epoch [11/100], Loss: 0.0137
Epoch [12/100], Loss: 0.0104
Epoch [13/100], Loss: 0.0079
Epoch [14/100], Loss: 0.0069
Epoch [15/100], Loss: 0.0062
Epoch [16/100], Loss: 0.0050
Epoch [17/100], Loss: 0.0051
Epoch [18/100], Loss: 0.0044
Epoch [19/100], Loss: 0.0046
Epoch [20/100], Loss: 0.0041
Epoch [21/100], Loss: 0.0040
Epoch [22/100], Loss: 0.0043
Epoch [23/100], Loss: 0.0036
Epoch [24/100], Loss: 0.0042
Epoch [25/100], Loss: 0.0040
Epoch [26/100], Loss: 0.0044
Epoch [27/100], Loss: 0.0039
Epoch [28/100], Loss: 0.0040
Epoch [29/100], Loss: 0.0042
Epoch [30/100], Loss: 0.0039
Epoch [31/100], Loss: 0.0041
Epoch [32/100], Loss: 0.0042
Epoch [33/100], Loss: 0.0041
Epoch [34/100], Loss: 0.0036
Epoch [35/100], Loss: 0

In [8]:
model_6 = GoogleNet(num_classes=10).to(device)
c_train_loss_list_6, c_metrics_6 = training_and_evaluating_GoogleNet(model_6, train_loader, test_loader, 100)
c_metrics_6['Dataset']='CIFAR-10'
c_metrics_6['Model name']='GoogleNet'
pickle.dump(c_train_loss_list_6, open("c_train_loss_list_6.p", "wb"))
pickle.dump(c_metrics_6, open("c_metrics_6.p", "wb"))

Epoch [1/100], Loss: 2.6362
Epoch [2/100], Loss: 1.7372
Epoch [3/100], Loss: 1.3056
Epoch [4/100], Loss: 0.9902
Epoch [5/100], Loss: 0.7114
Epoch [6/100], Loss: 0.4633
Epoch [7/100], Loss: 0.2603
Epoch [8/100], Loss: 0.1287
Epoch [9/100], Loss: 0.0530
Epoch [10/100], Loss: 0.0211
Epoch [11/100], Loss: 0.0097
Epoch [12/100], Loss: 0.0057
Epoch [13/100], Loss: 0.0041
Epoch [14/100], Loss: 0.0033
Epoch [15/100], Loss: 0.0029
Epoch [16/100], Loss: 0.0024
Epoch [17/100], Loss: 0.0021
Epoch [18/100], Loss: 0.0019
Epoch [19/100], Loss: 0.0018
Epoch [20/100], Loss: 0.0018
Epoch [21/100], Loss: 0.0018
Epoch [22/100], Loss: 0.0017
Epoch [23/100], Loss: 0.0016
Epoch [24/100], Loss: 0.0017
Epoch [25/100], Loss: 0.0015
Epoch [26/100], Loss: 0.0015
Epoch [27/100], Loss: 0.0015
Epoch [28/100], Loss: 0.0015
Epoch [29/100], Loss: 0.0015
Epoch [30/100], Loss: 0.0015
Epoch [31/100], Loss: 0.0015
Epoch [32/100], Loss: 0.0015
Epoch [33/100], Loss: 0.0015
Epoch [34/100], Loss: 0.0015
Epoch [35/100], Loss: 0

**Modified VGGNet Architecture**

In [9]:
import torch.nn as nn

class VGGNet(nn.Module):
    def __init__(self, num_classes=10):
        super(VGGNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.3),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.4),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(512 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(4096),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(4096),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
#Data_loader for MNIST
from torch.utils.data import DataLoader
train_loader = DataLoader(data['MNIST'][0], batch_size=64, shuffle=True)
test_loader = DataLoader(data['MNIST'][1], batch_size=1000, shuffle=False)

In [None]:
#MNIST
model_3 = VGGNet(num_classes=10).to(device)
m_train_loss_list_3_modified, m_metrics_3_modified = training_and_evaluating(model_3, train_loader, test_loader, 20)
pickle.dump(m_train_loss_list_3_modified, open("m_train_loss_list_3_modified.p", "wb"))
pickle.dump(m_metrics_3_modified, open("m_metrics_3_modified.p", "wb"))

Epoch [1/20], Loss: 0.6717
Epoch [2/20], Loss: 0.1240
Epoch [3/20], Loss: 0.0668
Epoch [4/20], Loss: 0.0547
Epoch [5/20], Loss: 0.0404
Epoch [6/20], Loss: 0.0321
Epoch [7/20], Loss: 0.0266
Epoch [8/20], Loss: 0.0218
Epoch [9/20], Loss: 0.0165
Epoch [10/20], Loss: 0.0153
Epoch [11/20], Loss: 0.0126
Epoch [12/20], Loss: 0.0120
Epoch [13/20], Loss: 0.0102
Epoch [14/20], Loss: 0.0104
Epoch [15/20], Loss: 0.0083
Epoch [16/20], Loss: 0.0090
Epoch [17/20], Loss: 0.0083
Epoch [18/20], Loss: 0.0085
Epoch [19/20], Loss: 0.0085
Epoch [20/20], Loss: 0.0081

Accuracy: 99.64%
Precision: 1.00
Recall: 1.00
F1-score: 1.00


In [10]:
#Data_loader for FASHION MNIST
from torch.utils.data import DataLoader
train_loader = DataLoader(data['FMNIST'][0], batch_size=64, shuffle=True)
test_loader = DataLoader(data['FMNIST'][1], batch_size=1000, shuffle=False)

In [13]:
#Fashion MNIST
model_3 = VGGNet(num_classes=10).to(device)
f_train_loss_list_3_modified, f_metrics_3_modified = training_and_evaluating(model_3, train_loader, test_loader, 20)
pickle.dump(f_train_loss_list_3_modified, open("f_train_loss_list_3_modified.p", "wb"))
pickle.dump(f_metrics_3_modified, open("f_metrics_3_modified.p", "wb"))

Epoch [1/20], Loss: 1.2399
Epoch [2/20], Loss: 0.5594
Epoch [3/20], Loss: 0.4091
Epoch [4/20], Loss: 0.3442
Epoch [5/20], Loss: 0.3039
Epoch [6/20], Loss: 0.2749
Epoch [7/20], Loss: 0.2469
Epoch [8/20], Loss: 0.2281
Epoch [9/20], Loss: 0.2118
Epoch [10/20], Loss: 0.2007
Epoch [11/20], Loss: 0.1926
Epoch [12/20], Loss: 0.1864
Epoch [13/20], Loss: 0.1810
Epoch [14/20], Loss: 0.1783
Epoch [15/20], Loss: 0.1744
Epoch [16/20], Loss: 0.1710
Epoch [17/20], Loss: 0.1725
Epoch [18/20], Loss: 0.1720
Epoch [19/20], Loss: 0.1723
Epoch [20/20], Loss: 0.1696

Accuracy: 92.42%
Precision: 0.93
Recall: 0.93
F1-score: 0.93


In [None]:
#Data_loader for CIFAR-10 DATA
from torch.utils.data import DataLoader
train_loader = DataLoader(data['CIFAR-10'][0], batch_size=64, shuffle=True)
test_loader = DataLoader(data['CIFAR-10'][1], batch_size=1000, shuffle=False)

In [None]:
#CIFAR-10
model_3 = VGGNet(num_classes=10).to(device)
c_train_loss_list_3_modified, c_metrics_3_modified = training_and_evaluating(model_3, train_loader, test_loader, 100)
pickle.dump(c_train_loss_list_3_modified, open("c_train_loss_list_3_modified.p", "wb"))
pickle.dump(c_metrics_3_modified, open("c_metrics_3_modified.p", "wb"))

Epoch [1/100], Loss: 2.2908
Epoch [2/100], Loss: 1.9514
Epoch [3/100], Loss: 1.6058
Epoch [4/100], Loss: 1.3205
Epoch [5/100], Loss: 1.1517
Epoch [6/100], Loss: 1.0170
Epoch [7/100], Loss: 0.9121
Epoch [8/100], Loss: 0.8396
Epoch [9/100], Loss: 0.7862
Epoch [10/100], Loss: 0.7476
Epoch [11/100], Loss: 0.7231
Epoch [12/100], Loss: 0.7018
Epoch [13/100], Loss: 0.6980
Epoch [14/100], Loss: 0.6803
Epoch [15/100], Loss: 0.6765
Epoch [16/100], Loss: 0.6745
Epoch [17/100], Loss: 0.6719
Epoch [18/100], Loss: 0.6635
Epoch [19/100], Loss: 0.6682
Epoch [20/100], Loss: 0.6667
Epoch [21/100], Loss: 0.6621
Epoch [22/100], Loss: 0.6673
Epoch [23/100], Loss: 0.6628
Epoch [24/100], Loss: 0.6598
Epoch [25/100], Loss: 0.6593
Epoch [26/100], Loss: 0.6630
Epoch [27/100], Loss: 0.6665
Epoch [28/100], Loss: 0.6602
Epoch [29/100], Loss: 0.6586
Epoch [30/100], Loss: 0.6610
Epoch [31/100], Loss: 0.6574
Epoch [32/100], Loss: 0.6650
Epoch [33/100], Loss: 0.6571
Epoch [34/100], Loss: 0.6612
Epoch [35/100], Loss: 0