In [2]:
import numpy as np
import torch
import random

In [3]:
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True

In [4]:
import torchvision.datasets

MNIST_train = torchvision.datasets.MNIST('./', download = True, train = True)
MNIST_test = torchvision.datasets.MNIST('./', download = True, train = False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



In [5]:
MNIST_train

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./
    Split: Train

In [6]:
X_train = MNIST_train.train_data
y_train = MNIST_train.train_labels
X_test = MNIST_test.test_data
y_test = MNIST_test.test_labels



In [7]:
X_train.dtype, y_train.dtype

(torch.uint8, torch.int64)

In [8]:
X_train = X_train.float()
y_train = y_train.float()

In [9]:
X_train.shape, y_train.shape

(torch.Size([60000, 28, 28]), torch.Size([60000]))

In [10]:
X_train = X_train.reshape([-1, 28 * 28])
X_test = X_test.reshape([-1, 28 * 28])

In [85]:
class MNISTNet(torch.nn.Module):
  def __init__(self, n_hidden_neurons):
    super(MNISTNet, self).__init__()

    self.fc1 = torch.nn.Linear(28 * 28, n_hidden_neurons)
    self.act1 = torch.nn.ReLU()
    self.fc2 = torch.nn.Linear(n_hidden_neurons, n_hidden_neurons)
    self.act2 = torch.nn.ReLU()
    self.fc3 = torch.nn.Linear(n_hidden_neurons, 10)

  def forward(self, x):
    x = self.fc1(x)
    x = self.act1(x)
    x = self.fc2(x)
    x = self.act2(x)
    x = self.fc3(x)
    return x

mnist_net = MNISTNet(100)

In [86]:
torch.cuda.is_available()

True

In [87]:
!nvidia-smi

Sat Sep 24 12:18:27 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   47C    P0    25W /  70W |   1014MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [88]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
mnist_net.to(device)
# list(mnist_net.parameters())

MNISTNet(
  (fc1): Linear(in_features=784, out_features=100, bias=True)
  (act1): ReLU()
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (act2): ReLU()
  (fc3): Linear(in_features=100, out_features=10, bias=True)
)

In [89]:
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mnist_net.parameters(), lr = 0.001)

In [90]:
batch_size = 100

test_accuracy_history = []
test_loss_history = []

X_test.to(device)
y_test.to(device)

X_test.type(torch.FloatTensor)
X_test = X_test.to(device)

for epoch in range(1000):
  order = np.random.permutation(len(X_train))

  for start_index in range(0, len(X_train), batch_size):
    optimizer.zero_grad()

    batch_indexes = order[start_index : start_index + batch_size]

    X_batch = X_train[batch_indexes].to(device)
    y_batch = y_train[batch_indexes].to(device)
    y_batch = y_batch.type(torch.LongTensor)
    y_batch = y_batch.to(device)

    preds = mnist_net.forward(X_batch)

    loss_value = loss(preds, y_batch)
    loss_value.backward()

    optimizer.step()

  test_preds = mnist_net.forward(X_test)
  test_loss_history.append(loss(test_preds, y_test))

  accuracy = (test_preds.argmax(dim = 1) == y_test).float().mean()
  test_accuracy_history.append(accuracy)
  print(accuracy)

tensor(0.9529, device='cuda:0')
tensor(0.9567, device='cuda:0')
tensor(0.9541, device='cuda:0')
tensor(0.9635, device='cuda:0')
tensor(0.9678, device='cuda:0')
tensor(0.9698, device='cuda:0')
tensor(0.9681, device='cuda:0')
tensor(0.9591, device='cuda:0')
tensor(0.9646, device='cuda:0')
tensor(0.9696, device='cuda:0')
tensor(0.9705, device='cuda:0')
tensor(0.9657, device='cuda:0')
tensor(0.9669, device='cuda:0')
tensor(0.9679, device='cuda:0')
tensor(0.9667, device='cuda:0')
tensor(0.9702, device='cuda:0')
tensor(0.9704, device='cuda:0')
tensor(0.9690, device='cuda:0')
tensor(0.9700, device='cuda:0')
tensor(0.9697, device='cuda:0')
tensor(0.9683, device='cuda:0')
tensor(0.9633, device='cuda:0')
tensor(0.9705, device='cuda:0')
tensor(0.9707, device='cuda:0')
tensor(0.9714, device='cuda:0')
tensor(0.9716, device='cuda:0')
tensor(0.9703, device='cuda:0')
tensor(0.9729, device='cuda:0')
tensor(0.9735, device='cuda:0')
tensor(0.9713, device='cuda:0')
tensor(0.9708, device='cuda:0')
tensor(0

KeyboardInterrupt: ignored

In [53]:
import matplotlib.pyplot as plt

In [91]:
np_test_accuracy_history = []
for tensor in test_accuracy_history:
  np_test_accuracy_history.append(tensor.cpu().numpy())

In [2]:
plt.plot(np_test_accuracy_history);

NameError: ignored