In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms
import time
%matplotlib inline

In [2]:
def set_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seed)
    np.random.seed(seed)
set_seed(0)

In [3]:
Ten2PIL = torchvision.transforms.ToPILImage()
PIL2Ten = torchvision.transforms.ToTensor()

In [4]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [5]:
dataset = torchvision.datasets.CIFAR10(root='data', train=True, transform=transform_train, download=True)
train_dataset, valid_dataset = torch.utils.data.random_split(dataset, lengths=[int(0.9 * len(dataset)), len(dataset) - int(0.9*len(dataset))])
test_dataset = torchvision.datasets.CIFAR10(root='data', train=False, transform=transform_test, download=True)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

In [7]:
len(dataset), len(test_dataset)

(50000, 10000)

In [8]:
device = torch.device('cuda')

In [9]:
class modelClass(nn.Module):
    def __init__(self, num_classes):
        super(modelClass, self).__init__()
        self.conv0 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=5)
        self.mp = nn.MaxPool2d(kernel_size=2)
        self.conv1 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=64, kernel_size=2)
        self.fc0 = nn.Linear(256, 64)
        self.fc1 = nn.Linear(64, 16)
        self.fc2 = nn.Linear(16, num_classes)
    
    def forward(self, x):
        out = self.mp(F.relu(self.conv0(x)))
        out = self.mp(F.relu(self.conv1(out)))
        out = self.mp(F.relu(self.conv2(out)))
        out = nn.Flatten()(out)
        out = self.fc0(out)
        out = self.fc1(out)
        return self.fc2(out)

In [10]:
model = modelClass(num_classes=10).to(device) # torchvision.models.mobilenet_v2(num_classes=10).to(device) 

In [11]:
optimizer = torch.optim.SGD(model.parameters(), lr=2e-3)

In [12]:
lossfunc = nn.CrossEntropyLoss()

In [13]:
num_epoch = 500
best_accu = 0
cnt = 0
model.train()
for epoch in range(num_epoch):
    for i, (x, y) in enumerate(train_loader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(x)
        loss = lossfunc(pred, y)
        loss.backward()
        optimizer.step()
        if i % 500 == 0 or i == (len(train_dataset) / 32) - 1 :
            # param = next(model.parameters())
            # print(param.grad.mean() / param.mean())
            model.eval()
            with torch.no_grad():
                valid_loss, accu = 0, 0
                for j, (x, y) in enumerate(valid_loader):
                    x, y = x.to(device), y.to(device)
                    pred = model(x)
                    valid_loss += lossfunc(pred, y)
                    accu += (pred.argmax(-1) == y.squeeze()).sum().item()
                # print(accu, valid_loss)
                valid_loss /= j
                accu /= len(valid_dataset)
                del x, y, pred
            print("Epoch: {} | Train loss: {:.4f} | Valid loss: {:.4f} | Valid Accu: {:.4f}".format(
                epoch, loss.item(), valid_loss.item(), accu
            ))
            model.train()
    # print("\n {:.4f} {:.4f} {} \n".format(1.02 * best_accu, accu, cnt))
    if accu >= best_accu:
        cnt = 0
        best_accu = accu
        torch.save(model.state_dict(), os.path.join('model', 'cifar',
                                                    'model-3C-{}.pt'.format(epoch)))
    else:
        cnt += 1

    if cnt == 50:
        # print("Early stopped!")
        break

Epoch: 0 | Train loss: 2.3288 | Valid loss: 2.3301 | Valid Accu: 0.0960
Epoch: 0 | Train loss: 2.2876 | Valid loss: 2.3258 | Valid Accu: 0.0992
Epoch: 0 | Train loss: 2.3059 | Valid loss: 2.3221 | Valid Accu: 0.1126
Epoch: 1 | Train loss: 2.2920 | Valid loss: 2.3193 | Valid Accu: 0.1180
Epoch: 1 | Train loss: 2.3222 | Valid loss: 2.3156 | Valid Accu: 0.1126
Epoch: 1 | Train loss: 2.2919 | Valid loss: 2.3100 | Valid Accu: 0.1206
Epoch: 2 | Train loss: 2.3044 | Valid loss: 2.3022 | Valid Accu: 0.1214
Epoch: 2 | Train loss: 2.2553 | Valid loss: 2.2816 | Valid Accu: 0.1362
Epoch: 2 | Train loss: 2.1667 | Valid loss: 2.2417 | Valid Accu: 0.1658
Epoch: 3 | Train loss: 2.1610 | Valid loss: 2.2094 | Valid Accu: 0.1900
Epoch: 3 | Train loss: 2.0869 | Valid loss: 2.1687 | Valid Accu: 0.2030
Epoch: 3 | Train loss: 2.0595 | Valid loss: 2.1274 | Valid Accu: 0.2128
Epoch: 4 | Train loss: 2.1068 | Valid loss: 2.1025 | Valid Accu: 0.2224
Epoch: 4 | Train loss: 1.9973 | Valid loss: 2.0844 | Valid Accu:

In [14]:
best_accu

0.6684

In [16]:
epoch - 50

263

In [17]:
loaded_model = modelClass(num_classes=10).to(device) # torchvision.models.mobilenet_v2(num_classes=10).to(device)

In [19]:
loaded_model.load_state_dict(torch.load(os.path.join('model', 'cifar', 'model-3C-{}.pt'.format(epoch - 50))))
loaded_model.eval()

modelClass(
  (conv0): Conv2d(3, 8, kernel_size=(5, 5), stride=(1, 1))
  (mp): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv1): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 64, kernel_size=(2, 2), stride=(1, 1))
  (fc0): Linear(in_features=256, out_features=64, bias=True)
  (fc1): Linear(in_features=64, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=10, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [20]:
test_accu = 0
for _, (x, y) in enumerate(test_loader):
    x, y = x.to(device), y.to(device)
    pred = loaded_model(x)
    test_accu += (pred.argmax(-1) == y).sum().item()

In [21]:
test_accu / len(test_dataset)

0.6909

In [25]:
model_parameters = filter(lambda p: p.requires_grad, loaded_model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])

In [26]:
params

23594