# Neural Networks

### Dataset Loading

In [3]:
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((.5), (.5))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
valid_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

100%|██████████| 9.91M/9.91M [00:00<00:00, 54.1MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.76MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 14.9MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 7.57MB/s]


In [17]:
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=32, shuffle=True)

## MLP – Multi-Layer Perceptron

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class MLP(nn.Module):
  def __init__(self):
    super(MLP, self).__init__()
    self.linear1 = nn.Linear(784, 256)
    self.linear2 = nn.Linear(256, 128)
    self.linear3 = nn.Linear(128, 10)

  def forward(self, x):
    x = x.view(-1, 28 * 28)
    x = F.relu(self.linear1(x))
    x = F.relu(self.linear2(x))
    x = self.linear3(x)
    return F.log_softmax(x, dim=1)


mlp = MLP()

In [16]:
mlp

MLP(
  (linear1): Linear(in_features=784, out_features=256, bias=True)
  (linear2): Linear(in_features=256, out_features=128, bias=True)
  (linear3): Linear(in_features=128, out_features=10, bias=True)
)

In [19]:
sum(p.numel() for p in mlp.parameters())

235146

In [20]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [21]:
def fit(model, train_loader):
  optimizer = torch.optim.Adam(model.parameters(), lr=.001)
  criterion = nn.CrossEntropyLoss()
  EPOCHS = 5
  BATCH_SIZE = 64
  model.train()
  for epoch in range(EPOCHS):
    correct = 0
    for batch_idx, (data, target) in enumerate(train_loader):
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()
      outputs = model(data)
      loss = criterion(outputs, target)
      loss.backward()
      optimizer.step()

      predicted = torch.max(outputs.data, 1)[1]
      correct += (predicted == target).sum()
      if batch_idx % 50 == 0:
        print('Epoch: {} [{}/{} ({:.0f})]\t Loss: {:.6f}\t Accuracy: {:.3f}%'.format(epoch, batch_idx * len(data),
                                                                                     len(train_loader.dataset), 100. * batch_idx / len(train_loader),
                                                                                     loss.item(), float(correct * 100) / float(BATCH_SIZE * (batch_idx + 1))))

In [22]:
mlp = mlp.to(device)

In [23]:
fit(mlp, train_loader)

Epoch: 0 [0/60000 (0)]	 Loss: 2.297982	 Accuracy: 15.625%
Epoch: 0 [3200/60000 (5)]	 Loss: 0.524524	 Accuracy: 63.297%
Epoch: 0 [6400/60000 (11)]	 Loss: 0.617414	 Accuracy: 74.025%
Epoch: 0 [9600/60000 (16)]	 Loss: 0.371783	 Accuracy: 78.539%
Epoch: 0 [12800/60000 (21)]	 Loss: 0.258577	 Accuracy: 80.822%
Epoch: 0 [16000/60000 (27)]	 Loss: 0.285520	 Accuracy: 82.588%
Epoch: 0 [19200/60000 (32)]	 Loss: 0.281095	 Accuracy: 83.757%
Epoch: 0 [22400/60000 (37)]	 Loss: 0.212306	 Accuracy: 84.696%
Epoch: 0 [25600/60000 (43)]	 Loss: 0.213868	 Accuracy: 85.548%
Epoch: 0 [28800/60000 (48)]	 Loss: 0.192046	 Accuracy: 86.128%
Epoch: 0 [32000/60000 (53)]	 Loss: 0.324525	 Accuracy: 86.720%
Epoch: 0 [35200/60000 (59)]	 Loss: 0.361964	 Accuracy: 87.236%
Epoch: 0 [38400/60000 (64)]	 Loss: 0.117989	 Accuracy: 87.677%
Epoch: 0 [41600/60000 (69)]	 Loss: 0.246265	 Accuracy: 88.011%
Epoch: 0 [44800/60000 (75)]	 Loss: 0.217776	 Accuracy: 88.360%
Epoch: 0 [48000/60000 (80)]	 Loss: 0.159120	 Accuracy: 88.661%
E

In [24]:
@torch.inference_mode()
def evaluate(model, valid_loader):
  BATCH_SIZE = 32
  correct = 0
  model.eval()
  for data, target in valid_loader:
    data, target = data.to(device), target.to(device)
    outputs = model(data)
    predicted = torch.max(outputs, 1)[1]
    correct += (predicted == target).sum()
  print('Valid accuracy: {:.3f}'.format(float(correct) / (len(valid_loader) * BATCH_SIZE) * 100))

In [25]:
evaluate(mlp, valid_loader)

Valid accuracy: 97.005


## CNN – Convolutional Neural Network

In [26]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
    self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
    self.conv3 = nn.Conv2d(32, 64, kernel_size=5)
    self.fc1 = nn.Linear(3 * 3 * 64, 256)
    self.fc2 = nn.Linear(256, 10)

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.relu(F.max_pool2d(self.conv2(x), 2))
    x = F.dropout(x, p=.5, training=self.training)
    x = F.relu(F.max_pool2d(self.conv3(x), 2))
    x = F.dropout(x, p=.5, training=self.training)
    x = x.view(-1, 3 * 3 * 64)
    x = F.relu(self.fc1(x))
    x = F.dropout(x, training=self.training)
    x = self.fc2(x)
    return F.log_softmax(x, dim=1)


In [27]:
cnn = CNN().to(device)

In [28]:
cnn

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

In [29]:
fit(cnn, train_loader)

Epoch: 0 [0/60000 (0)]	 Loss: 2.297043	 Accuracy: 15.625%
Epoch: 0 [3200/60000 (5)]	 Loss: 0.611134	 Accuracy: 45.803%
Epoch: 0 [6400/60000 (11)]	 Loss: 0.428794	 Accuracy: 65.099%
Epoch: 0 [9600/60000 (16)]	 Loss: 0.118055	 Accuracy: 73.531%
Epoch: 0 [12800/60000 (21)]	 Loss: 0.163207	 Accuracy: 78.039%
Epoch: 0 [16000/60000 (27)]	 Loss: 0.189451	 Accuracy: 80.989%
Epoch: 0 [19200/60000 (32)]	 Loss: 0.147031	 Accuracy: 83.103%
Epoch: 0 [22400/60000 (37)]	 Loss: 0.109523	 Accuracy: 84.624%
Epoch: 0 [25600/60000 (43)]	 Loss: 0.066287	 Accuracy: 85.832%
Epoch: 0 [28800/60000 (48)]	 Loss: 0.093503	 Accuracy: 86.845%
Epoch: 0 [32000/60000 (53)]	 Loss: 0.179391	 Accuracy: 87.662%
Epoch: 0 [35200/60000 (59)]	 Loss: 0.104164	 Accuracy: 88.331%
Epoch: 0 [38400/60000 (64)]	 Loss: 0.126086	 Accuracy: 88.953%
Epoch: 0 [41600/60000 (69)]	 Loss: 0.054818	 Accuracy: 89.483%
Epoch: 0 [44800/60000 (75)]	 Loss: 0.132032	 Accuracy: 89.923%
Epoch: 0 [48000/60000 (80)]	 Loss: 0.031060	 Accuracy: 90.355%
E

In [31]:
evaluate(cnn, valid_loader)

Valid accuracy: 99.042


In [30]:
sum(p.numel() for p in cnn.parameters())

228010