### Single GPU Training Loop

#### Imports

In [1]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision import transforms
from tqdm import tqdm

#### Loading Datasets

In [2]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=transforms.Compose([transforms.ToTensor(),
                                  transforms.Normalize(mean=0.0, std=1.0),
                                  transforms.Lambda(lambda x: torch.flatten(x))])
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:02<00:00, 4954597.39it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 133387.47it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 932804.69it/s] 


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4329665.63it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [3]:
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=transforms.Compose([transforms.ToTensor(),
                                  transforms.Normalize(mean=0.0, std=1.0),
                                  transforms.Lambda(lambda x: torch.flatten(x))])
)

In [4]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True, pin_memory=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True, pin_memory=True)

In [None]:
out = next(iter(train_dataloader))

In [None]:
out[0].shape, out[1].shape

(torch.Size([64, 784]), torch.Size([64]))

In [5]:
class NN(torch.nn.Module):
  def __init__(self):
    super(NN, self).__init__()
    self.model = torch.nn.Sequential(
        torch.nn.Linear(784, 256),
        torch.nn.ReLU(),
        torch.nn.Linear(256, 128),
        torch.nn.ReLU(),
        torch.nn.Linear(128, 64),
        torch.nn.ReLU(),
        torch.nn.Linear(64, 10),
        torch.nn.LogSoftmax(dim=1)
    )

  def forward(self, x):
    return self.model(x)

In [None]:
nn = NN()

In [None]:
out = nn(torch.randn(5, 784))
out.shape

torch.Size([5, 10])

### Checking CUDA Device Availability

In [None]:
is_device_available = torch.cuda.is_available()
if is_device_available:
  device = 'cuda'
  print(f"Device Name: {torch.cuda.get_device_name()}")
else:
  device = 'cpu'
  print(f"Device Name: CPU")

Device Name: CPU


#### Move model to device

In [None]:
nn.to(device)

NN(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=10, bias=True)
    (7): LogSoftmax(dim=1)
  )
)

In [None]:
optimizer = torch.optim.Adam(nn.parameters())
criterion = torch.nn.CrossEntropyLoss()

In [6]:
def train(model, data, epochs=10):
  for _ in tqdm(range(epochs)):
    for inputs, labels in data:
      inputs = inputs.to(device)
      labels = labels.to(device)

      output = model(inputs)
      loss = criterion(output, labels)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

In [None]:
train(nn, train_dataloader)

100%|██████████| 10/10 [03:52<00:00, 23.23s/it]


### CPU Training time is about 3 minutes 52 seconds

In [7]:
def eval(model, data):
  total, correct = 0, 0
  for inputs, labels in data:
    inputs = inputs.to(device)
    labels = labels.to(device)
    output = model(inputs)
    output = output.argmax(dim=1)
    correct_predictions = (output == labels).sum()
    total_predictions = labels.size()[0]
    total += total_predictions
    correct += correct_predictions
  print(f"Accuracy: {correct / total}")

In [None]:
eval(nn, test_dataloader)

Accuracy: 0.9803000092506409


#### Checking GPU availability

In [8]:
is_device_available = torch.cuda.is_available()
if is_device_available:
  device = 'cuda'
  print(f"Device Name: {torch.cuda.get_device_name()}")
else:
  device = 'cpu'
  print(f"Device Name: CPU")

Device Name: NVIDIA A100-SXM4-40GB


In [9]:
nn_gpu = NN()
nn_gpu = nn_gpu.to(device)
nn_gpu

NN(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=10, bias=True)
    (7): LogSoftmax(dim=1)
  )
)

In [10]:
optimizer = torch.optim.Adam(nn_gpu.parameters())
criterion = torch.nn.CrossEntropyLoss()

In [11]:
train(nn_gpu, train_dataloader)

100%|██████████| 10/10 [02:17<00:00, 13.71s/it]


In [12]:
eval(nn_gpu, test_dataloader)

Accuracy: 0.9794999957084656


### Total Train time is about 2 minutes 19 seconds which is almost 50% faster than the CPU training time