In [1]:
import torch
import torchvision
from torchvision.datasets import MNIST
import torch.utils.data.dataloader as DataLoader


In [3]:
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
import torchvision.transforms as transforms

In [4]:
import numpy as np

In [5]:
ds = MNIST(root="data/", train= True, transform=transforms.ToTensor(), download=True)

100%|██████████| 9.91M/9.91M [00:01<00:00, 6.83MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 160kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.52MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 10.1MB/s]


In [6]:
test_ds =  MNIST(root="data/" , train= False , download=True , transform=transforms.ToTensor())

In [7]:
def split_generator(num, percentage):
  size = int(num*percentage)
  indices = np.random.permutation(num)
  return indices[size:], indices[:size]


In [8]:
train_indices, validation_indices = split_generator(len(ds), 0.2)
print(len(train_indices))

48000


In [9]:
from torch.utils.data import DataLoader

In [10]:
batch_size =100
train_sampler = SubsetRandomSampler(train_indices)
validation_sampler = SubsetRandomSampler(validation_indices)


train_dl = DataLoader(ds, batch_size, sampler=train_sampler)
validation_dl = DataLoader(ds, batch_size, sampler=validation_sampler)

In [11]:
import torch.nn as nn

In [12]:
class MnistModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear1 = nn.Linear(784,128)
    self.relu1 = nn.ReLU()
    self.linear2 = nn.Linear(128,64)
    self.relu2 = nn.ReLU()
    self.linear3 = nn.Linear(64,10)

  def forward(self,xb):
    xb = xb.reshape(-1,784)
    out1 = self.relu1(self.linear1(xb))
    out2 = self.relu2(self.linear2(out1))
    out3 = self.linear3(out2)
    return out3

model = MnistModel()

In [13]:
def get_device():
  if torch.cuda.is_available():
    return torch.device('cuda')
  else:
    return torch.device('cpu')

In [14]:
device = get_device()
device

device(type='cpu')

In [15]:
def device_to(data,device):
  return data.to(device, non_blocking = True)

- Now we are going to code a function called DataLoader which sends data to the selected device as the batches are accessed
- We cant send all the datas intot the GPU in one Goo, therefore we should send datas in batches when needed!

In [16]:
class DataLoader():
  """
  A custom DataLoader that moves batches of data to the specified device.
  """
  def __init__(self,dl,device):
    self.dl = dl
    self.device = device

  def __iter__(self):
    for b in self.dl:
      yield device_to(b,self.device)

  def __len__(self):
    return len(self.dl)

In [17]:
def accuracy(preds,labels):
  _,x = torch.max(preds,dim=1)
  return torch.tensor(torch.sum(x==labels).item()/len(preds))

In [22]:
for xb,yb in train_dl:
  preds=model(xb)
  print(accuracy(preds,yb))
  break


tensor(0.1800)


In [23]:
def loss_batch(model,loss_fn,xb,yb,optimizer=None,metric=None):
  preds =  model(xb)
  loss = loss_fn(preds,yb)
  if optimizer is not None:
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
  else:
    with torch.no_grad():
      loss = loss_fn(preds,yb)
  metric_val=None
  if metric is not None:
    metric_val = metric(preds,yb)
  return loss.item(),len(xb),metric_val

In [24]:
def evaluate(model,loss_fn,valid_dl,optim=None,metric=None):
    result = [loss_batch(model,loss_fn,xb,yb,optim,metric) for xb,yb in valid_dl]
    losses,nums,metrics = zip(*result)
    total_size = np.sum(nums)

    avg_loss = np.sum(np.multiply(losses,nums))/total_size

    if metric is not None:
      avg_metrics = np.mean(np.multiply(metrics,nums))
    else:
      avg_metrics = None
    return avg_loss,total_size,avg_metrics

In [25]:
import torch.nn.functional as F

In [26]:
loss_fn = F.cross_entropy

In [27]:
optimizer =  torch.optim.SGD(model.parameters(),lr=0.5)

In [28]:
def fit(epochs,model,loss_fn,train_dl,valid_dl,optim=None,metric=None):
  losses, metrics = [] , []
  for epoch in range(epochs):
    for xb,yb in train_dl:
      loss,_,_ = loss_batch(model,loss_fn,xb,yb,optim)
    avg_loss,_,avg_metric = evaluate(model,loss_fn,valid_dl,optim,metric)
    losses.append(avg_loss)
    metrics.append(avg_metric)
    print("Epoch {}/{}, Loss: {}, Accuracy Achieved: {}".format(epoch+1,epochs,avg_loss,avg_metric))
  return losses, metrics # Added return statement

In [29]:
device_to(model,device)

MnistModel(
  (linear1): Linear(in_features=784, out_features=128, bias=True)
  (relu1): ReLU()
  (linear2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
  (linear3): Linear(in_features=64, out_features=10, bias=True)
)

In [30]:
fit(6,model,loss_fn,train_dl,validation_dl,optimizer,accuracy)

Epoch 1/6, Loss: 0.15554267127687732, Accuracy Achieved: 95.31666686137517
Epoch 2/6, Loss: 0.09456867960592111, Accuracy Achieved: 96.86666756868362
Epoch 3/6, Loss: 0.06826254486028725, Accuracy Achieved: 97.86666775743167
Epoch 4/6, Loss: 0.05209641293234502, Accuracy Achieved: 98.19166774551074
Epoch 5/6, Loss: 0.042344460531603546, Accuracy Achieved: 98.65000089009602
Epoch 6/6, Loss: 0.031595993701193946, Accuracy Achieved: 99.00000085433324


([np.float64(0.15554267127687732),
  np.float64(0.09456867960592111),
  np.float64(0.06826254486028725),
  np.float64(0.05209641293234502),
  np.float64(0.042344460531603546),
  np.float64(0.031595993701193946)],
 [np.float64(95.31666686137517),
  np.float64(96.86666756868362),
  np.float64(97.86666775743167),
  np.float64(98.19166774551074),
  np.float64(98.65000089009602),
  np.float64(99.00000085433324)])