In [1]:
import time
from typing import Any, Generator, Literal
import torch
import torchvision
import torchvision.datasets
import torchvision.transforms
import torch.nn as nn
import torch.nn.functional as F
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision import models as torch_model
import seaborn as sns
import os
%matplotlib inline

In [2]:
class Processing(object):
    def __init__(self, model: nn.Module, train_dataloader: DataLoader, test_dataloader: DataLoader, optimizer: Any, loss: Any, device: Literal["cuda", "cpu"]) -> None:
        self.model = model
        self.train_dataloader = train_dataloader
        self.test_dataloader = test_dataloader
        self.optimizer = optimizer
        self.loss = loss
        self.device = device

    def _train_loop(self) -> float:
        self.model.train()
        true = 0
        total = 0
        for images, labels in self.train_dataloader:
            self.optimizer.zero_grad()
            images = images.type(torch.float32)
            images = images.to(self.device)
            labels = labels.to(self.device)
            preds = self.model(images)
            loss = self.loss(preds.type(torch.float32), F.one_hot(labels, num_classes=10).type(torch.float32))
            corrects = labels == preds.argmax(dim=1)
            true += int(corrects.sum())
            loss.backward()
            self.optimizer.step()
            total += len(preds)
        return true / total

    def _test_loop(self) -> float:
        self.model.eval()
        true = 0
        total = 0
        with torch.no_grad():
            for images, labels in self.test_dataloader:
                self.optimizer.zero_grad()
                images = images.type(torch.float32)
                images = images.to(self.device)
                labels = labels.to(self.device)
                preds = self.model(images)
                corrects = labels == preds.argmax(dim=1)
                true += int(corrects.sum())
                total += len(preds)
        return true / total

    def train(self, epoch_num: int) -> Generator[nn.Module, None, None]:
        for ep in range(epoch_num):
            start = time.time()
            trainacc = self._train_loop()
            end = time.time()
            trtime = end - start
            start = time.time()
            testacc = self._test_loop()
            end = time.time()
            tetime = end - start
            print(f"Epoch {ep + 1}: train acc {trainacc}, time {trtime}; test acc {testacc}, time {tetime}")
            yield self.model

def train(model, device, train_dataloader, test_dataloader, optimizer, loss, epochs_num) -> None:
    model = model
    model = model.to(device)
    process = Processing(model, train_dataloader, test_dataloader, optimizer, loss, device)
    for model in process.train(epochs_num):
        print()

def last_shallow_layer(inim):
    return nn.Linear(inim, 10)

def last_deep_layer(inim):
    return nn.Sequential(nn.Linear(inim, inim // 2), nn.ReLU(), nn.Linear(inim // 2, 10))


RESNET50

In [3]:
nownet = torch_model.resnet50(weights = torch_model.ResNet50_Weights.IMAGENET1K_V2)
transform = torch_model.ResNet50_Weights.IMAGENET1K_V2.transforms()
train_ds = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_ds = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size = 128, shuffle = True, num_workers = 2)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = 128, shuffle = False, num_workers = 2)
for param in nownet.parameters():
    param.requires_grad = False
nownet.fc = last_shallow_layer(2048)
optim = torch.optim.Adam(nownet.fc.parameters(), lr = 0.001)
start = time.time()
trained_model = train(nownet, "cuda", train_dl, test_dl, optim, torch.nn.CrossEntropyLoss(), 5)
end = time.time()
print(f"Total time: {end - start}")

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:02<00:00, 48.0MB/s]


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29537602.32it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch 1: train acc 0.71772, time 173.74135565757751; test acc 0.7694, time 33.168834924697876

Epoch 2: train acc 0.79066, time 170.59240555763245; test acc 0.7931, time 32.939101457595825

Epoch 3: train acc 0.8092, time 170.9405996799469; test acc 0.8044, time 32.585973501205444

Epoch 4: train acc 0.8195, time 171.12016606330872; test acc 0.8095, time 33.463088035583496

Epoch 5: train acc 0.82786, time 170.20956301689148; test acc 0.8106, time 33.3836510181427

Total time: 1022.4021151065826


In [4]:
print(nownet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
transform = torch_model.ResNet50_Weights.IMAGENET1K_V2.transforms()
train_ds = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_ds = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size = 128, shuffle = True, num_workers = 2)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = 128, shuffle = False, num_workers = 2)
nownet = torch_model.resnet50(weights = torch_model.ResNet50_Weights.IMAGENET1K_V2)
for param in nownet.parameters():
    param.requires_grad = False
nownet.fc = last_deep_layer(2048)
optim = torch.optim.Adam(nownet.fc.parameters(), lr = 0.001)
start = time.time()
trained_model = train(nownet, "cuda", train_dl, test_dl, optim, torch.nn.CrossEntropyLoss(), 5)
end = time.time()
print(f"Total time: {end - start}")

Files already downloaded and verified
Files already downloaded and verified
Epoch 1: train acc 0.75626, time 172.77818393707275; test acc 0.794, time 32.58893013000488

Epoch 2: train acc 0.8139, time 170.53417563438416; test acc 0.8023, time 33.00425672531128

Epoch 3: train acc 0.83668, time 172.21918869018555; test acc 0.8092, time 33.76229643821716

Epoch 4: train acc 0.85454, time 171.50245094299316; test acc 0.8133, time 34.2901656627655

Epoch 5: train acc 0.87468, time 171.63869714736938; test acc 0.8093, time 33.903928995132446

Total time: 1026.2569863796234


In [6]:
print(nownet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 


RESNET152

In [7]:
nownet = torch_model.resnet152(weights = torch_model.ResNet152_Weights.IMAGENET1K_V2)
transform = torch_model.ResNet152_Weights.IMAGENET1K_V2.transforms()
train_ds = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_ds = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size = 128, shuffle = True, num_workers = 2)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = 128, shuffle = False, num_workers = 2)
for param in nownet.parameters():
    param.requires_grad = False
nownet.fc = last_shallow_layer(2048)
optim = torch.optim.Adam(nownet.fc.parameters(), lr = 0.001)
start = time.time()
trained_model = train(nownet, "cuda", train_dl, test_dl, optim, torch.nn.CrossEntropyLoss(), 5)
end = time.time()
print(f"Total time: {end - start}")

Downloading: "https://download.pytorch.org/models/resnet152-f82ba261.pth" to /root/.cache/torch/hub/checkpoints/resnet152-f82ba261.pth
100%|██████████| 230M/230M [00:02<00:00, 118MB/s]


Files already downloaded and verified
Files already downloaded and verified
Epoch 1: train acc 0.76556, time 366.3097507953644; test acc 0.8138, time 69.42617988586426

Epoch 2: train acc 0.82814, time 369.56250977516174; test acc 0.8323, time 69.69159388542175

Epoch 3: train acc 0.8426, time 369.71301531791687; test acc 0.8354, time 69.46458721160889

Epoch 4: train acc 0.85268, time 368.9688415527344; test acc 0.84, time 69.32540225982666

Epoch 5: train acc 0.8582, time 369.53081917762756; test acc 0.8419, time 69.2954888343811

Total time: 2191.3835682868958


In [8]:
print(nownet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
transform = torch_model.ResNet152_Weights.IMAGENET1K_V2.transforms()
train_ds = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_ds = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size = 128, shuffle = True, num_workers = 2)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = 128, shuffle = False, num_workers = 2)
nownet = torch_model.resnet152(weights = torch_model.ResNet152_Weights.IMAGENET1K_V2)
for param in nownet.parameters():
    param.requires_grad = False
nownet.fc = last_deep_layer(2048)
optim = torch.optim.Adam(nownet.fc.parameters(), lr = 0.001)
start = time.time()
trained_model = train(nownet, "cuda", train_dl, test_dl, optim, torch.nn.CrossEntropyLoss(), 5)
end = time.time()
print(f"Total time: {end - start}")

Files already downloaded and verified
Files already downloaded and verified
Epoch 1: train acc 0.7926, time 368.81201362609863; test acc 0.8242, time 69.37609100341797

Epoch 2: train acc 0.84324, time 368.4374828338623; test acc 0.8286, time 69.37348532676697

Epoch 3: train acc 0.8608, time 369.18082332611084; test acc 0.8376, time 69.22501564025879

Epoch 4: train acc 0.87664, time 369.60194158554077; test acc 0.8342, time 69.30758380889893

Epoch 5: train acc 0.8956, time 369.2049992084503; test acc 0.8407, time 69.8724958896637

Total time: 2192.500194311142


In [10]:
print(nownet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

EFFICIENTNETV2

In [13]:
nownet = torch_model.efficientnet_v2_m(weights = torch_model.EfficientNet_V2_M_Weights.IMAGENET1K_V1)
transform = torch_model.EfficientNet_V2_M_Weights.IMAGENET1K_V1.transforms()
train_ds = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_ds = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size = 128, shuffle = True, num_workers = 2)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = 128, shuffle = False, num_workers = 2)
for param in nownet.parameters():
    param.requires_grad = False
nownet.fc = last_shallow_layer(2048)
optim = torch.optim.Adam(nownet.fc.parameters(), lr = 0.001)
start = time.time()
trained_model = train(nownet, "cuda", train_dl, test_dl, optim, nn.MSELoss(), 2)
end = time.time()
print(f"Total time: {end - start}")

Files already downloaded and verified
Files already downloaded and verified


RuntimeError: ignored

In [None]:
print(nownet)

In [None]:
transform = torch_model.EfficientNet_V2_M_Weights.IMAGENET1K_V1.transforms()
train_ds = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_ds = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size = 128, shuffle = True, num_workers = 2)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = 128, shuffle = False, num_workers = 2)
nownet = torch_model.efficientnet_v2_m(weights = torch_model.ResNet152_Weights.IMAGENET1K_V2)
for param in nownet.parameters():
    param.requires_grad = False
nownet.fc = last_deep_layer(2048)
optim = torch.optim.Adam(nownet.fc.parameters(), lr = 0.001)
start = time.time()
trained_model = train(nownet, "cuda", train_dl, test_dl, optim, torch.nn.CrossEntropyLoss(), 2)
end = time.time()
print(f"Total time: {end - start}")

In [None]:
print(nownet)

SHUFFLENET V2

In [14]:
transform = torch_model.ShuffleNet_V2_X1_0_Weights.DEFAULT.transforms()
train_ds = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_ds = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size = 128, shuffle = True, num_workers = 2)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = 128, shuffle = False, num_workers = 2)
nownet = torch_model.shufflenet_v2_x1_0(torch_model.ShuffleNet_V2_X1_0_Weights.DEFAULT)
for param in nownet.parameters():
    param.requires_grad = False
nownet.fc = last_shallow_layer(1024)
optim = torch.optim.Adam(nownet.fc.parameters(), lr = 0.001)
start = time.time()
trained_model = train(nownet, "cuda", train_dl, test_dl, optim, torch.nn.CrossEntropyLoss(), 5)
end = time.time()
print(f"Total time: {end - start}")

Files already downloaded and verified
Files already downloaded and verified


Downloading: "https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth" to /root/.cache/torch/hub/checkpoints/shufflenetv2_x1-5666bf0f80.pth
100%|██████████| 8.79M/8.79M [00:00<00:00, 66.0MB/s]


Epoch 1: train acc 0.6474, time 122.69409608840942; test acc 0.7208, time 27.562078714370728

Epoch 2: train acc 0.72078, time 123.49215173721313; test acc 0.7325, time 25.7490017414093

Epoch 3: train acc 0.73466, time 128.63482570648193; test acc 0.7426, time 26.638173818588257

Epoch 4: train acc 0.7461, time 123.59286546707153; test acc 0.7526, time 25.26438307762146

Epoch 5: train acc 0.75356, time 122.67882633209229; test acc 0.7633, time 27.192983150482178

Total time: 753.5118064880371


In [15]:
print(nownet)

ShuffleNetV2(
  (conv1): Sequential(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (stage2): Sequential(
    (0): InvertedResidual(
      (branch1): Sequential(
        (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False)
        (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Conv2d(24, 58, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (3): BatchNorm2d(58, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (4): ReLU(inplace=True)
      )
      (branch2): Sequential(
        (0): Conv2d(24, 58, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(58, eps=1e-05, momentum=0.1, affine=True, track_running_

In [None]:
transform = torch_model.ShuffleNet_V2_X1_0_Weights.DEFAULT.transforms()
train_ds = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
test_ds = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
train_dl = torch.utils.data.DataLoader(train_ds, batch_size = 128, shuffle = True, num_workers = 2)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = 128, shuffle = False, num_workers = 2)
nownet = torch_model.shufflenet_v2_x1_0(torch_model.ShuffleNet_V2_X1_0_Weights.DEFAULT)
for param in nownet.parameters():
    param.requires_grad = False
nownet.fc = last_deep_layer(1024)
optim = torch.optim.Adam(nownet.fc.parameters(), lr = 0.001)
start = time.time()
trained_model = train(nownet, "cuda", train_dl, test_dl, optim, torch.nn.CrossEntropyLoss(), 7)
end = time.time()
print(f"Total time: {end - start}")

In [None]:
print(nownet)