# 

In [1]:
from torchvision import datasets
from torch.utils.data import DataLoader
import torch
from torch import nn
from sklearn.metrics import accuracy_score as acc
import matplotlib.pyplot as plt
from torchvision import transforms

In [2]:
trans = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.TrivialAugmentWide(),
    transforms.ToTensor()
])
train_data = datasets.CIFAR10(root="data",  train=True, download=True, transform=trans)
test_data = datasets.CIFAR10(root="data", train=False, download=True, transform=transforms.ToTensor())

img, label = train_data[0]
n_channels = img.shape[0]
n_classes = len(train_data.classes)

n_channels, n_classes

Files already downloaded and verified
Files already downloaded and verified


(3, 10)

In [3]:
batch_size = 20
train_dataloader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, num_workers=5)
test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False, num_workers=5)

In [4]:
import torch.nn.init as init
import torch.nn.functional as F
class CIFAR100(nn.Module):
    def __init__(self, in_features, out_features, hidden=300):
        super().__init__()
        self.conv1 = nn.Conv2d(in_features, hidden, kernel_size=2)
        self.bn = nn.BatchNorm2d(hidden)
        self.conv2 = nn.Conv2d(hidden, hidden, kernel_size=5, padding=1)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=1)
        self.fc1 = nn.Linear(hidden*12*12, hidden*2)
        self.fc2 = nn.Linear(hidden*2, hidden*2)
        self.fc3 = nn.Linear(hidden*2, out_features)
        self.dropout = nn.Dropout(0.3)

        init.xavier_uniform_(self.conv1.weight) 
        init.xavier_uniform_(self.conv2.weight)
        init.xavier_uniform_(self.fc1.weight) 
        init.xavier_uniform_(self.fc2.weight)
        init.xavier_uniform_(self.fc3.weight)

    def forward(self, x):
        x = self.maxpool1(F.relu(self.conv1(x)))
        x = self.maxpool2(self.bn(F.relu(self.conv2(x))))
        x = x.view(-1, 300*12*12)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.dropout(x)
        return x

device = "cuda" if torch.cuda.is_available() else "cpu"
model = CIFAR100(n_channels, n_classes).to(device) #best 79

In [47]:
class VGG(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = VGG().to(device) #best 87

In [51]:
import torch
torch.manual_seed(42)
n_dummy = torch.rand(1, 3, 32, 32)
model(n_dummy.to(device))

tensor([[ 0.0940,  0.0662,  0.1145,  0.2040,  0.1250,  0.1596, -0.2197, -0.2703,
         -0.0990, -0.2163]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [63]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.0002)
loss_fn = nn.CrossEntropyLoss()
epochs = 5

In [65]:
from timeit import default_timer as timer

torch.manual_seed(42)
startTime = timer()

for epoch in (range(epochs)):
  loss_train = 0
  for x, y in train_dataloader:
    model.train()
    x, y = x.to(device), y.to(device)
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss_train += loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  loss_train /= len(train_dataloader)

  model.eval()
  loss_test, test_acc = 0, 0
  with torch.inference_mode():
    for x_test, y_test in test_dataloader:
      x_test, y_test = x_test.to(device), y_test.to(device)
      test_pred = model(x_test)
      loss_test += loss_fn(test_pred, y_test)

      test_acc += acc(y_test.cpu(), torch.softmax(test_pred, dim=1).argmax(dim=1).cpu())

    loss_test /= len(test_dataloader)
    test_acc /= len(test_dataloader)

  print(f"train loss : {loss_train:.4f} | test loss : {loss_test:.4f} | test acc : {test_acc*100}")

endTime = timer()
total_train_time = endTime - startTime
f"total time : {total_train_time:.4f} sec"

train loss : 0.3773 | test loss : 0.4114 | test acc : 87.38999999999999
train loss : 0.3316 | test loss : 0.4133 | test acc : 87.62000000000003
train loss : 0.2803 | test loss : 0.4141 | test acc : 87.73000000000008
train loss : 0.2397 | test loss : 0.4252 | test acc : 87.53999999999999
train loss : 0.1800 | test loss : 0.4407 | test acc : 87.57000000000001


'total time : 128.3463 sec'