In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import precision_score, recall_score, f1_score
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,))])

In [None]:
train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True, num_workers= 2)
test_loader  = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=128, shuffle=False, num_workers= 2)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:08<00:00, 3.30MB/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 305kB/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:01<00:00, 2.31MB/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<00:00, 23.2MB/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






In [None]:
# LeNet-5 Model
class LeNet5(nn.Module):
    def __init__(self,num_classes=10):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [None]:
def train_and_test(model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs, device):
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
  scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

  train_loss_list =[]
  for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    scheduler.step()
# Generating Loss Curve:
    train_loss = running_loss / len(train_loader)
    train_loss_list.append(train_loss)
    print(f'[Epoch {epoch + 1}/{(num_epochs)}], Loss:{train_loss:.4f}')

# Model Testing
  model.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for images,labels in test_loader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
  # Calculating Accuracy:
  accuracy = 100 * correct / total
  #Evaluation Metrics
  precision = precision_score(labels.cpu().numpy(), predicted.cpu().numpy(), average='macro')
  recall = recall_score(labels.cpu().numpy(), predicted.cpu().numpy(), average='macro')
  f1 = f1_score(labels.cpu().numpy(), predicted.cpu().numpy(), average='macro')

  print(f'\nAccuracy of the network : {accuracy:.2f}%')
  print(f'Precision: {precision:.2f}')
  print(f'Recall: {recall:.2f}')
  print(f'F1-score: {f1:.2f}')

  metrics = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1': f1}
  return metrics, train_loss_list

In [None]:
class ResidualBlock(nn.Module):
  def __init__(self, in_channels, out_channels, stride=1):
    super(ResidualBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias =False)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias =False)
    self.bn2 = nn.BatchNorm2d(out_channels)

    self.shortcut = nn.Sequential()
    if stride != 1 or in_channels != out_channels:
      self.shortcut = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride,bias =False),
                                    nn.BatchNorm2d(out_channels))
  def forward(self,x):
    out = torch.relu(self.bn1(self.conv1(x)))
    out = self.bn2(self.conv2(out))
    out += self.shortcut(x)
    out = torch.relu(out)
    return out

class ResNet(nn.Module):
  def __init__(self, block, num_blocks, num_classes=10):
    super(ResNet, self).__init__()
    self.in_channels = 64
    self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias =False)
    self.bn1 = nn.BatchNorm2d(64)
    self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
    self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
    self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
    self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
    self.fc = nn.Linear(512, num_classes)

  def  _make_layer(self,block,out_channels, num_blocks, stride):
    strides = [stride] + [1]*(num_blocks-1)
    layers = []
    for stride in strides:
      layers.append(block(self.in_channels, out_channels, stride))
      self.in_channels = out_channels
    return nn.Sequential(*layers)

  def forward(self,x):
    out = torch.relu(self.bn1(self.conv1(x)))
    out = self.layer1(out)
    out = self.layer2(out)
    out = self.layer3(out)
    out = self.layer4(out)
    out = torch.mean(out, dim=[2,3])
    out = self.fc(out)
    return out
def ResNet18():
  return ResNet(ResidualBlock, [2,2,2,2])
net = ResNet18()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [None]:
class Alexnet(nn.Module):
  def __init__(self, num_classes=10):
    super(Alexnet, self).__init__()
    self.features = nn.Sequential(
        nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),
        nn.Conv2d(64, 192, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),
        nn.Conv2d(192, 384, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(384, 256, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),
    )
    self.classifier = nn.Sequential(
        nn.Dropout(),
        nn.Linear(256*3*3, 4096),
        nn.ReLU(inplace=True),
        nn.Dropout(),
        nn.Linear(4096, 4096),
        nn.ReLU(inplace=True),
        nn.Linear(4096, num_classes),
    )
     def forward(self,x):
    x = self.features(x)
    x = x.view(x.size(0), -1)
    x = self.classifier(x)
    return x

In [None]:
class SeparableConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0):
        super(SeparableConv2d, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels, bias=False)
        self.pointwise = nn.Conv2d(in_channels, out_channels, 1, bias=False)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class Xception(nn.Module):
    def __init__(self, num_classes=10):
        super(Xception, self).__init__()
        self.entry_flow = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )

        self.middle_flow = nn.Sequential(
            SeparableConv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            SeparableConv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            SeparableConv2d(256, 728, kernel_size=3, padding=1),
            nn.BatchNorm2d(728),
            nn.ReLU(),
        )

        self.exit_flow = nn.Sequential(
            SeparableConv2d(728, 1024, kernel_size=3, padding=1),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))
        )

        self.fc = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.entry_flow(x)
        x = self.middle_flow(x)
        x = self.exit_flow(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

xception = Xception(num_classes=10).to(device)

In [None]:
import pickle
from torch.optim.lr_scheduler import StepLR
criterion = nn.CrossEntropyLoss()
model_1 = LeNet5(num_classes=10).to(device)
optimizer = optim.Adam(model_1.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
train_loss_list_1, metrics_1 = train_and_test(model_1, train_loader, test_loader, criterion, scheduler, optimizer, 20, device)
with open('./train_loss_list_1.pkl', 'wb') as file:
    pickle.dump(train_loss_list_1, file)
with open('./metrics_1.pkl', 'wb') as file:
    pickle.dump(metrics_1, file)

[Epoch 1/20], Loss:2.2880
[Epoch 2/20], Loss:1.7662
[Epoch 3/20], Loss:0.9418
[Epoch 4/20], Loss:0.7772
[Epoch 5/20], Loss:0.6899
[Epoch 6/20], Loss:0.6372
[Epoch 7/20], Loss:0.6301
[Epoch 8/20], Loss:0.6240
[Epoch 9/20], Loss:0.6185
[Epoch 10/20], Loss:0.6127
[Epoch 11/20], Loss:0.6080
[Epoch 12/20], Loss:0.6074
[Epoch 13/20], Loss:0.6067
[Epoch 14/20], Loss:0.6063
[Epoch 15/20], Loss:0.6058
[Epoch 16/20], Loss:0.6053
[Epoch 17/20], Loss:0.6052
[Epoch 18/20], Loss:0.6052
[Epoch 19/20], Loss:0.6051
[Epoch 20/20], Loss:0.6050

Accuracy of the network : 76.40%
Precision: 0.80
Recall: 0.80
F1-score: 0.80


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
import pickle
from torch.optim.lr_scheduler import StepLR
criterion = nn.CrossEntropyLoss()
model_2 = ResNet18().to(device)
optimizer = optim.Adam(model_2.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
train_loss_list_2, metrics_2 = train_and_test(model_2, train_loader, test_loader, criterion, scheduler, optimizer, 20, device)
with open('./train_loss_list_2.pkl', 'wb') as file:
    pickle.dump(train_loss_list_2, file)
with open('./metrics_2.pkl', 'wb') as file:
    pickle.dump(metrics_2, file)

[Epoch 1/20], Loss:0.5479
[Epoch 2/20], Loss:0.2678
[Epoch 3/20], Loss:0.2012
[Epoch 4/20], Loss:0.1590
[Epoch 5/20], Loss:0.1153
[Epoch 6/20], Loss:0.0569
[Epoch 7/20], Loss:0.0372
[Epoch 8/20], Loss:0.0294
[Epoch 9/20], Loss:0.0239
[Epoch 11/20], Loss:0.0171
[Epoch 12/20], Loss:0.0168
[Epoch 13/20], Loss:0.0164
[Epoch 14/20], Loss:0.0162
[Epoch 15/20], Loss:0.0160
[Epoch 16/20], Loss:0.0158
[Epoch 17/20], Loss:0.0158
[Epoch 18/20], Loss:0.0154
[Epoch 19/20], Loss:0.0158
[Epoch 20/20], Loss:0.0157

Accuracy of the network : 92.36%
Precision: 0.80
Recall: 0.80
F1-score: 0.80


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
import pickle
from torch.optim.lr_scheduler import StepLR
criterion = nn.CrossEntropyLoss()
model_3 = Alexnet().to(device)
optimizer = optim.Adam(model_3.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
train_loss_list_3, metrics_3 = train_and_test(model_3, train_loader, test_loader, criterion, scheduler, optimizer, 20, device)
with open('./train_loss_list_3.pkl', 'wb') as file:
    pickle.dump(train_loss_list_3, file)
with open('./metrics_3.pkl', 'wb') as file:
    pickle.dump(metrics_3, file)

[Epoch 1/20], Loss:2.3019
[Epoch 2/20], Loss:2.2990
[Epoch 3/20], Loss:2.2662
[Epoch 4/20], Loss:1.3032
[Epoch 5/20], Loss:0.8101
[Epoch 6/20], Loss:0.7226
[Epoch 7/20], Loss:0.7122
[Epoch 8/20], Loss:0.7037
[Epoch 9/20], Loss:0.6976
[Epoch 10/20], Loss:0.6919
[Epoch 11/20], Loss:0.6880
[Epoch 12/20], Loss:0.6837
[Epoch 13/20], Loss:0.6822
[Epoch 14/20], Loss:0.6822
[Epoch 15/20], Loss:0.6813
[Epoch 16/20], Loss:0.6814
[Epoch 17/20], Loss:0.6822
[Epoch 18/20], Loss:0.6809
[Epoch 19/20], Loss:0.6817
[Epoch 20/20], Loss:0.6802

Accuracy of the network : 73.85%
Precision: 0.56
Recall: 0.59
F1-score: 0.55


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
import pickle
from torch.optim.lr_scheduler import StepLR
criterion = nn.CrossEntropyLoss()
model_4 = Xception().to(device)
optimizer = optim.Adam(model_4.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
train_loss_list_4, metrics_4 = train_and_test(model_4, train_loader, test_loader, criterion, scheduler, optimizer, 20, device)
with open('./train_loss_list_4.pkl', 'wb') as file:
    pickle.dump(train_loss_list_4, file)
with open('./metrics_4.pkl', 'wb') as file:
    pickle.dump(metrics_4, file)

[Epoch 1/20], Loss:1.2091
[Epoch 2/20], Loss:0.6931
[Epoch 3/20], Loss:0.5673
[Epoch 4/20], Loss:0.4947
[Epoch 5/20], Loss:0.4421
[Epoch 6/20], Loss:0.3984
[Epoch 7/20], Loss:0.3912
[Epoch 8/20], Loss:0.3858
[Epoch 9/20], Loss:0.3806
[Epoch 10/20], Loss:0.3770
[Epoch 11/20], Loss:0.3725
[Epoch 12/20], Loss:0.3710
[Epoch 13/20], Loss:0.3709
[Epoch 14/20], Loss:0.3699
[Epoch 15/20], Loss:0.3698
[Epoch 16/20], Loss:0.3694
[Epoch 17/20], Loss:0.3692
[Epoch 18/20], Loss:0.3689
[Epoch 19/20], Loss:0.3693
[Epoch 20/20], Loss:0.3692

Accuracy of the network : 86.15%
Precision: 0.78
Recall: 0.85
F1-score: 0.79


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
