In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm

# MNIST

In [11]:
train = datasets.MNIST("", train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("", train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
train_set = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
test_set = torch.utils.data.DataLoader(test, batch_size=10, shuffle=True)

In [12]:
class MNIST(nn.Module):
    def __init__(self):
        super().__init__()
        self.dropout = nn.Dropout(0.2)
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 128, 3)
        x = torch.randn(28, 28).view(-1, 1, 28, 28)
        self._to_linear = None
        self.convs(x)
        self.fc1 = nn.Linear(self._to_linear, 512)
        self.fc2 = nn.Linear(512, 10)
        
    def convs(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x
    
    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [15]:
net = MNIST()
optimizer = optim.Adam(net.parameters(), lr=0.001)
EPOCHS = 3
for batch in range(EPOCHS):
    for data in tqdm(train_set):
        X, y = data
        net.zero_grad()
        output = net(X)
        loss = F.nll_loss(output, y)
        loss.backward()
        optimizer.step()
    print(loss)

100%|██████████| 6000/6000 [00:40<00:00, 148.60it/s]
  0%|          | 13/6000 [00:00<00:46, 127.57it/s]

tensor(0.0320, grad_fn=<NllLossBackward0>)


100%|██████████| 6000/6000 [00:40<00:00, 146.90it/s]
  0%|          | 15/6000 [00:00<00:41, 143.48it/s]

tensor(0.0023, grad_fn=<NllLossBackward0>)


100%|██████████| 6000/6000 [00:40<00:00, 147.29it/s]

tensor(0.0094, grad_fn=<NllLossBackward0>)





# CIFAR-10