<a href="https://colab.research.google.com/github/JonathanSum/Pytorch_Lighting_example/blob/master/Lightning2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

In [None]:
x = np.array([[1,2,3],[4,5,6],[7,8,9]]); print(x);

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [None]:
# a1, a2 = np.split(x,1,axis=-1)

In [None]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import random_split, DataLoader


In [None]:
model = nn.Sequential(
    nn.Linear(28*28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
)

In [None]:
optimiser = optim.SGD(model.parameters(), lr=1e-2)
loss = nn.CrossEntropyLoss()

In [None]:
train_data = datasets.MNIST("data", train = True, download = True, transform = transforms.ToTensor())
print(len(train_data))
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size = 32)
val_loader = DataLoader(val, batch_size = 32)

60000


In [None]:
nb_epochs = 5
for epoch in range(nb_epochs):
  losses = list()
  accuracies = list()
  for batch in train_loader:
    #x: b x 1x 28 x 28
    x, y = batch
    b = x.size(0)
    x = x.view(b, -1)       #send x to cuda after creating the Resnet because it was definded on CPU

    # 1 forward
    l = model(x)  #l: logits

    # 2
    J = loss(l, y)           #y here too

    # 3 cleaning the gradients
    model.zero_grad()
    # optimiser.zero_grad()
    # params.grad._zero()

    # 4 accumulate the partial derivative of J with respect to param
    J.backward()


    # 5 step in the opposite direction of the gradient
    optimiser.step()
    # with torch.no_grad(): params = params - eta * params.grad
    losses.append(J.item())
    # accuracies.append(y.eq(l.detch().argmax(dim=1)).float().mean())
    #accuracies.append(y.eq(l.detch().argmax(dim=1).cpu()).float().mean())
    # accuracies.append(y.eq(l.detch().argmax(dim=1)).float().mean())

  print(f'Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():.2f}')

# model.eval()

  losses = list()
  # for epoch in range(nb_epochs):

  # accuracies = list()
  for batch in val_loader:
    #x: b x 1x 28 x 28
    x, y = batch
    b = x.size(0)
    x = x.view(b, -1)

    # 1 forward 
    with torch.no_grad():
        l = model(x)  #l: logits

    # 2
    J = loss(l, y)

    losses.append(J.item())
    accuracies.append(y.eq(l.argmax(dim=1).cpu()).float().mean())

  print(f'Epoch {epoch + 1}, validation loss: {torch.tensor(losses).mean():.2f}')
  print(f'Epoch {epoch + 1}, validation accuracies: {torch.tensor(accuracies).mean():.2f}')

Epoch 1, train loss: 1.21
Epoch 1, validation loss: 1.15
Epoch 1, validation accuracies: 0.86
Epoch 2, train loss: 0.40
Epoch 2, validation loss: 0.39
Epoch 2, validation accuracies: 0.90
Epoch 3, train loss: 0.32
Epoch 3, validation loss: 0.31
Epoch 3, validation accuracies: 0.91
Epoch 4, train loss: 0.28
Epoch 4, validation loss: 0.28
Epoch 4, validation accuracies: 0.92
Epoch 5, train loss: 0.25
Epoch 5, validation loss: 0.25
Epoch 5, validation accuracies: 0.93


In [None]:
# nn.Linear(28*28, 64),
# nn.ReLU(),
# nn.Linear(64, 64),
# nn.ReLU(),
# nn.Linear(64, 10)


class ResNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(28*28, 64)
    self.l2 = nn.Linear(64, 64)
    self.l3 = nn.Linear(64, 10)
    self.do = nn.Dropout(0.1)
  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    # print("Debugging: ",h1.shape, " ",h2.shape)
    do = self.do(h2 + h1)
    logits = self.l3(do)
    return logits
model = ResNet()

In [None]:
params = model.parameters()
optimiser = optim.SGD(model.parameters(), lr=1e-2)
loss = nn.CrossEntropyLoss()

In [None]:
nb_epochs = 5
for epoch in range(nb_epochs):
  losses = list()
  accuracies = list()
  model.train()
  for batch in train_loader:
    #x: b x 1x 28 x 28
    x, y = batch
    b = x.size(0)
    # print("Debugging  x.size(0): ", x.size(0))
    # print("Debugging  x.view(b, -1): ",x.view(b, -1).shape)
    x = x.view(b, -1)       #send x to cuda after creating the Resnet because it was definded on CPU

    # 1 forward
    l = model(x)  #l: logits

    # 2
    J = loss(l, y)           #y here too

    # 3 cleaning the gradients
    model.zero_grad()
    # optimiser.zero_grad()
    # params.grad._zero()

    # 4 accumulate the partial derivative of J with respect to param
    J.backward()


    # 5 step in the opposite direction of the gradient
    optimiser.step()
    # with torch.no_grad(): params = params - eta * params.grad
    losses.append(J.item())
    # accuracies.append(y.eq(l.detch().argmax(dim=1)).float().mean())
    accuracies.append(y.eq(l.argmax(dim=1).cpu()).float().mean())
    # accuracies.append(y.eq(l.detch().argmax(dim=1)).float().mean())

  print(f'Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():.2f}')
  print(f'Epoch {epoch + 1}, train accuracies: {torch.tensor(accuracies).mean():.2f}')



  losses = list()
  accuracies = list()
  model.eval()
  # for epoch in range(nb_epochs):


  for batch in val_loader:
    #x: b x 1x 28 x 28
    x, y = batch
    b = x.size(0)
    x = x.view(b, -1)

    # 1 forward   
    with torch.no_grad():
        l = model(x)  #l: logits

    # 2
    J = loss(l, y)

    losses.append(J.item())
    accuracies.append(y.eq(l.argmax(dim=1).cpu()).float().mean())

  print(f'Epoch {epoch + 1}, validation loss: {torch.tensor(losses).mean():.2f}')
  print(f'Epoch {epoch + 1}, validation accuracies: {torch.tensor(accuracies).mean():.2f}')

Epoch 1, train loss: 0.87
Epoch 1, train accuracies: 0.77
Epoch 1, validation loss: 0.40
Epoch 1, validation accuracies: 0.89
Epoch 2, train loss: 0.38
Epoch 2, train accuracies: 0.89
Epoch 2, validation loss: 0.31
Epoch 2, validation accuracies: 0.91
Epoch 3, train loss: 0.31
Epoch 3, train accuracies: 0.91
Epoch 3, validation loss: 0.26
Epoch 3, validation accuracies: 0.92
Epoch 4, train loss: 0.27
Epoch 4, train accuracies: 0.92
Epoch 4, validation loss: 0.23
Epoch 4, validation accuracies: 0.93
Epoch 5, train loss: 0.24
Epoch 5, train accuracies: 0.93
Epoch 5, validation loss: 0.21
Epoch 5, validation accuracies: 0.94


In [None]:
class ResNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(28 * 28, 64)
        self.l2 = nn.Linear(64, 64)
        self.l3 = nn.Linear(64, 10)
        self.do = nn.Dropout(0.1)

    def forward(self, x):
        h1 = nn.functional.relu(self.l1(x))
        h2 = nn.functional.relu(self.l2(h1))
        do = self.do(h2 + h1)
        logits = self.l3(do)
        return logits
model = ResNet()
