<a href="https://colab.research.google.com/github/JanaRusrus/PyTorch-_Lightning_MasterClass/blob/main/01_MNIST_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader


In [18]:
 torch.randn(5).cuda()

tensor([-0.2801, -0.9804,  0.1072, -1.1904,  0.0494], device='cuda:0')

In [27]:
# train, val split
train_data = datasets.MNIST('data',train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data,[55000,5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)


In [28]:
# Define the model
model = nn.Sequential(
    nn.Linear(28*28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Dropout(0.1),  # if we're overfitting
    nn.Linear(64, 10)

)


In [40]:
# Define a more flexible model:

class ResNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(28*28, 64)
    self.l2 = nn.Linear(64, 64)
    self.l3 = nn.Linear(64,10)
    self.do = nn.Dropout(0.1)

  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2 + h1)
    logits = self.l3(do)
    return logits

model = ResNet()

In [41]:
# Define the optimizer
params = model.parameters()
optimizer = optim.SGD(params, lr =1e-2)

In [42]:
# Define the loss
loss = nn.CrossEntropyLoss()

In [44]:
# Training and validations loop
nb_epochs = 5
for epoch in range(nb_epochs):
  losses = list()
  accuracies = list()
  model.train()  #because I use dropout

  # **Move the model to the GPU**
  model.cuda()

  for batch in train_loader:
    x,y = batch

    # x: b x 1 x 28 x28
    b = x.size(0)
    x = x.view(b, -1).cuda()

    # 1. Forward
    l = model(x) #l:logits

    # 2. compute the objective function
    J = loss(l,y.cuda())

    # 3. Cleaning the gradients
    model.zero_grad()

    # 4. Accumlate the partial derivitives of J w.r.t params
    J.backward()
    # params.grad.sum()

    # 5. step in the opposite direction of the gradient
    optimizer.step()
    # with torch.no_grad(): params = params - eta * params.grad
    losses.append(J.item())

  print(f'Epoch{epoch +1}, train loss: {torch.tensor(losses).mean():.2f}')

  losses = list()
  accuracies = list()
  model.eval()
  for batch in train_loader:
    x,y = batch
    # x: b x 1 x 28 x28
    b = x.size(0)
    x = x.view(b, -1).cuda()

    # 1. Forward
    with torch.no_grad():
      l = model(x) #l:logits

    # 2. compute the objective function
    J = loss(l,y.cuda())
    losses.append(J.item())
    accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())


print(f'Epoch{epoch +1}', end=' , ')
print(f'Epoch{epoch +1}, val loss: {torch.tensor(losses).mean():.2f}')
print(f'val acc:{torch.tensor(accuracies).mean():.2f}' )








Epoch1, train loss: 0.90
Epoch2, train loss: 0.38
Epoch3, train loss: 0.31
Epoch4, train loss: 0.27
Epoch5, train loss: 0.24
Epoch5 , Epoch5, val loss: 0.20
val acc:0.94
