### params to tune
- weight decay

In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm
import matplotlib.pyplot as plt

# MNIST

In [85]:
train = datasets.MNIST("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
validation_set_size = int(len(train) * 0.1)
training_set_size = len(train) - validation_set_size
train_set, validation_set = torch.utils.data.random_split(train, [training_set_size, validation_set_size])
train_set = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
validation_set = torch.utils.data.DataLoader(validation_set, batch_size=10, shuffle=True)
test_set = torch.utils.data.DataLoader(test, batch_size=10, shuffle=True)

In [99]:
class MNIST(nn.Module):
    def __init__(self):
        super().__init__()
        self.dropout = nn.Dropout(0.2)
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        x = torch.randn(29, 29).view(-1, 1, 29, 29)
        self.to_linear = None
        self.convs(x)
        self.fc1 = nn.Linear(self.to_linear, 512)
        self.fc2 = nn.Linear(512, 10)

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        if self.to_linear is None:
            self.to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x

    def forward(self, x):
        x = F.pad(x, (0, 0, 0, 0))
        x = self.convs(x)
        x = x.view(-1, self.to_linear)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [101]:
MNIST_net = MNIST()
optimizer = optim.Adam(MNIST_net.parameters(), lr=0.001)
EPOCHS = 3
for batch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        MNIST_net.zero_grad()
        output = MNIST_net(X)
        loss = F.nll_loss(output, y)
        loss.backward()
        optimizer.step()
    print(loss)

100%|██████████| 6000/6000 [00:40<00:00, 149.60it/s]
  0%|          | 15/6000 [00:00<00:42, 141.77it/s]

tensor(0.0163, grad_fn=<NllLossBackward0>)


100%|██████████| 6000/6000 [00:41<00:00, 145.13it/s]
  0%|          | 15/6000 [00:00<00:41, 143.59it/s]

tensor(0.4607, grad_fn=<NllLossBackward0>)


100%|██████████| 6000/6000 [00:41<00:00, 145.83it/s]

tensor(0.0808, grad_fn=<NllLossBackward0>)





# CIFAR-10

In [6]:
train = datasets.CIFAR10("./347data", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.CIFAR10("./347data", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
train_set = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
test_set = torch.utils.data.DataLoader(test, batch_size=10, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./347data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ./347data/cifar-10-python.tar.gz to ./347data
Files already downloaded and verified


In [None]:
class CIFAR10(nn.Module):
    def __init__(self):
        super().__init__()
        self.dropout = nn.Dropout(0.5)
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        x = torch.randn(32, 32).view(-1, 3, 32, 32)
        self.to_linear = None
        self.convs(x)
        self.fc1 = nn.Linear(self.to_linear, 512)
        self.fc2 = nn.Linear(512, 10)

    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        if self.to_linear is None:
            self.to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x
    
    def forward(self, x):
        x = F.pad(x, (0, 0, 0, 0))
        x = self.convs(x)
        x = x.view(-1, self.to_linear)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)