## Pytorch: Training loop for a simple neural network

In [1]:
import numpy as np
import pandas as pd
local_path_mnist_train = "/Users/gunnvantsaini/OneDrive/project_codes/content/dl_basics/vision/sony/data/mnist_train.csv"
local_path_mnist_test = "/Users/gunnvantsaini/OneDrive/project_codes/content/dl_basics/vision/sony/data/mnist_test.csv"
train = pd.read_csv(local_path_mnist_train)
test = pd.read_csv(local_path_mnist_test)

In [2]:
## Define our model
from torch import nn


class Model(nn.Module):
    def __init__(self):
        super().__init__()
        ## define the layers and activations
        self.activation1 = nn.Sigmoid()
        self.layer1 = nn.Linear(784,3)
        self.layer2 = nn.Linear(3,10)
        self.activation2 = nn.Softmax()        
    def forward(self,X):
        ## define the forward pass
        z = self.layer1(X)
        h = self.activation1(z)
        z = self.layer2(h)
        probs = self.activation2(z)
        return probs

In [3]:
## Create a dataloader
## Need to define a datatset->dataloader
import torch
from torch.utils.data import Dataset, DataLoader
## Need to write a training loop

In [4]:
class MnistData(Dataset):
    # __init__, specify the source of raw data
    # __len__, logic to give the total number of samples in our dataste
    #__getitem__, logic to obtain one sample from our raw data
    
    def __init__(self,df):
        self.X = df.drop('label',axis=1).values/255.0
        self.y = df['label'].values
    def __len__(self):
        return self.X.shape[0]
    def __getitem__(self,idx):
        x = self.X[idx]
        actual = self.y[idx]
        #sample = [x,actual]
        sample = {'X':x,'y':actual}
        #sample = (x,actual)
        return sample
        

In [5]:
mnist_dataset_pytorch = MnistData(train)
mnist_batched = DataLoader(mnist_dataset_pytorch,batch_size=20)

### Training loop
```python
for x,y in dataloader:
    probs = model(x) ## forward
    loss  =  loss_fn(y,probs)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    '''with torch.no_grad():
        update_wts()
        zero_grad()'''
    print(loss,acc)
   
```

In [6]:
## Use the predifined pytorch function to calulate the loss
loss_fn = torch.nn.CrossEntropyLoss()
## Define the optimizer to update the weights in backward pass
mod = Model()

In [9]:
[p for p in mod.parameters()]

[Parameter containing:
 tensor([[-0.0330,  0.0123,  0.0071,  ...,  0.0096, -0.0114,  0.0146],
         [ 0.0011, -0.0054,  0.0254,  ..., -0.0224,  0.0289, -0.0138],
         [ 0.0177, -0.0216,  0.0079,  ...,  0.0053,  0.0091,  0.0003]],
        requires_grad=True),
 Parameter containing:
 tensor([0.0097, 0.0253, 0.0105], requires_grad=True),
 Parameter containing:
 tensor([[ 0.1061, -0.0544, -0.4065],
         [ 0.4149, -0.4103,  0.2283],
         [-0.0717, -0.0133, -0.0577],
         [-0.5387, -0.5692, -0.2181],
         [-0.4720, -0.0647,  0.5554],
         [-0.1463,  0.4207,  0.0304],
         [-0.4009, -0.3487,  0.3090],
         [-0.4108, -0.4176, -0.5003],
         [ 0.2553, -0.3827,  0.5291],
         [-0.2433,  0.2578,  0.5442]], requires_grad=True),
 Parameter containing:
 tensor([ 0.3994, -0.3766, -0.3025, -0.2944, -0.3524,  0.2495,  0.5662,  0.4128,
          0.0897, -0.5661], requires_grad=True)]

In [10]:
import torch.optim as optim

In [11]:
opt = optim.SGD(mod.parameters(),lr = 0.01)

In [27]:
epochs = 10
for epoch in range(epochs):
    Acc = []
    Loss = []
    for itr,batch in enumerate(mnist_batched):
        x = batch['X'].float()
        y = batch['y']
        p = mod(x)
        loss = loss_fn(p,y)
        loss.backward()
        Loss.append(loss.item())
        pred_cls = p.argmax(axis=1)
        acc = (pred_cls == y).float().mean().item()
        Acc.append(acc)
        opt.step()
        opt.zero_grad()
    avg_acc = np.array(Acc).mean()
    avg_loss = np.array(Loss).mean()
    print(f'Epoch {epoch}, loss {avg_loss}, acc {avg_acc}')

  probs = self.activation2(z)


Epoch 0, loss 2.0229775671731858, acc 0.4774047631273667
Epoch 1, loss 2.018493793067478, acc 0.47840476315645947
Epoch 2, loss 2.014512245144163, acc 0.4795238108223393
Epoch 3, loss 2.010971406982059, acc 0.48066666788288526
Epoch 4, loss 2.0078140796933854, acc 0.48111904887216433
Epoch 5, loss 2.0049892501036326, acc 0.4817380965536549
Epoch 6, loss 2.0024520602112723, acc 0.48228571560411226
Epoch 7, loss 2.0001638223443714, acc 0.48266666803331604
Epoch 8, loss 1.9980914253280275, acc 0.4828809537419251
Epoch 9, loss 1.9962066287086124, acc 0.48340476327708787


In [12]:
pred_probs = np.array([0.3,0.5,0.2])

In [13]:
pred_probs.argmax()

1

In [14]:
pred_class = np.array([5,6,8,0])
actual_class = np.array([5,6,0,0])

In [18]:
(pred_class == actual_class)

array([ True,  True, False,  True])

In [22]:
(pred_class == actual_class).sum()

3

In [20]:
(pred_class == actual_class).mean()

0.75

## Convolutional NN

- Tensorflow API: Keras
- PyTorch

In [28]:
train.head(2)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
X,y = train.drop('label',axis=1).values/255.0, train['label'].values

In [30]:
X.shape

(42000, 784)

In [31]:
X.reshape((42000,28,28)).shape

(42000, 28, 28)

In [32]:
import tensorflow as tf

In [33]:
model = tf.keras.Sequential(

[
    tf.keras.layers.Conv2D(filters=6,kernel_size = (3,3),input_shape=(28,28,1)),## h,w,c
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(filters=16,kernel_size = (3,3)),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=120,activation='tanh'),
    tf.keras.layers.Dense(units=84,activation='tanh'),
    tf.keras.layers.Dense(units=10,activation='softmax')
]

)

2022-01-11 14:23:11.189092: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-01-11 14:23:11.189985: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1


In [34]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 6)         60        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 6)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 16)        880       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 16)          0         
_________________________________________________________________
flatten (Flatten)            (None, 400)               0         
_________________________________________________________________
dense (Dense)                (None, 120)               48120     
_________________________________________________________________
dense_1 (Dense)              (None, 84)                1

In [35]:
model.compile(loss="sparse_categorical_crossentropy",
                optimizer = "sgd",
             metrics = ['accuracy'])

In [37]:
X = X.reshape((42000,28,28,1))

In [39]:
X = tf.constant(X,dtype = 'float')
y = tf.constant(y,dtype = 'float')

In [40]:
mnsit_data = tf.data.Dataset.from_tensor_slices((X,y))

In [42]:
mnsit_data = mnsit_data.batch(64)

In [43]:
model.fit(mnsit_data,epochs = 10,)

Epoch 1/10


2022-01-11 14:28:36.168266: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-01-11 14:28:36.169211: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x147290e50>

In [48]:
model.get_weights()[0].shape

(3, 3, 1, 6)

In [55]:
model.get_weights()[0][:,:,:,0].shape

(3, 3, 1)

In [62]:
model.get_weights()[0][:,:,:,5]

array([[[-0.01409336],
        [ 0.00228957],
        [ 0.18896261]],

       [[-0.29381236],
        [-0.17852564],
        [ 0.35202098]],

       [[-0.35687378],
        [-0.13874947],
        [ 0.30939418]]], dtype=float32)

In [64]:
### How to use a validation dataset and see the fitting behaviour of the model
from sklearn.model_selection import train_test_split
X,y = train.drop('label',axis=1).values/255.0, train['label'].values
X = X.reshape((42000,28,28,1))
X_train,X_val,y_train,y_val = train_test_split(X,y,test_size = 0.10, random_state=42)

In [65]:
X_train = tf.constant(X_train,dtype = 'float')
y_train = tf.constant(y_train,dtype = 'float')
X_val = tf.constant(X_val,dtype = 'float')
y_val = tf.constant(y_val,dtype = 'float')

In [67]:
mnist_train = tf.data.Dataset.from_tensor_slices((X_train,y_train))
mnist_val = tf.data.Dataset.from_tensor_slices((X_val,y_val))

In [68]:
mnist_train = mnist_train.batch(64)
mnist_val = mnist_val.batch(64)

In [69]:
model = tf.keras.Sequential(

[
    tf.keras.layers.Conv2D(filters=6,kernel_size = (3,3),input_shape=(28,28,1)),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(filters=16,kernel_size = (3,3)),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=120,activation='tanh'),
    tf.keras.layers.Dense(units=84,activation='tanh'),
    tf.keras.layers.Dense(units=10,activation='softmax')
]

)

model.compile(loss="sparse_categorical_crossentropy",
                optimizer = "sgd",
             metrics = ['accuracy'])

In [70]:
model.fit(mnist_train, epochs = 10,validation_data=mnist_val)

Epoch 1/10
  1/591 [..............................] - ETA: 2:45 - loss: 2.3189 - accuracy: 0.0938

2022-01-11 14:45:10.559113: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




2022-01-11 14:45:16.658676: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x16e9bc160>

### Find the configuration of each convolutional layer in lenet 5:
- Kernel Size
- Stride Len
- Zero Padding that you will have to do


$$n_{out} = \frac{(n_{in}+2p-k)}{s}+1$$

- Layer 1 Conv2D:
    - Kernel Size: (5,5)
    - Stride: 1
    - Padding: 0
- Layer 2 Pool:
    - Kernel Size: (15,15)
    - Stride: 1
    - Padding: 0
- Layer 3 Conv2D:
    - Kernel Size: (5,5)
    - Stride: 1
    - Padding: 0
- Layer 4 Pool:
    - Kernel Size:6
    - Stride: 1
    - Padding: 0

In [71]:
### Pytorch Model

class Lenet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels = 1,out_channels=6,padding=0,stride = 1,kernel_size=(5,5))
        self.pool1 = nn.MaxPool2d(kernel_size=(15,15),padding=0,stride=1)
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels=16,padding=0,stride=1,kernel_size=(5,5))
        self.pool2 = nn.MaxPool2d(kernel_size = (6,6),stride=1,padding=0)
        self.linear1 = nn.Linear(in_features = 16*5*5,out_features=120)
        self.linear2 = nn.Linear(in_features=120,out_features=64)
        self.linear3 = nn.Linear(in_features=64,out_features=10)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
    def forward(self,x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = x.view(-1,16*5*5)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        x = self.relu(x)
        x = self.linear3(x)
        x = self.softmax(x)
        return x
    def __call__(self):
        return self.forward()
        

In [83]:
x = torch.randn(size = (20,1,32,32)) ## b,c,h,w

In [84]:
x.shape

torch.Size([20, 1, 32, 32])

In [85]:
mod = Lenet()

In [86]:
mod(x)

  x = self.softmax(x)


tensor([[0.1014, 0.1065, 0.1186, 0.1028, 0.0779, 0.1033, 0.0933, 0.0908, 0.1027,
         0.1027],
        [0.1000, 0.1077, 0.1175, 0.1009, 0.0782, 0.1049, 0.0942, 0.0920, 0.1021,
         0.1025],
        [0.1005, 0.1073, 0.1205, 0.1025, 0.0777, 0.1024, 0.0937, 0.0910, 0.1023,
         0.1020],
        [0.0998, 0.1081, 0.1184, 0.1009, 0.0761, 0.1051, 0.0933, 0.0937, 0.1023,
         0.1024],
        [0.1005, 0.1070, 0.1177, 0.1027, 0.0780, 0.1036, 0.0933, 0.0915, 0.1014,
         0.1042],
        [0.1010, 0.1080, 0.1189, 0.1023, 0.0761, 0.1030, 0.0938, 0.0906, 0.1024,
         0.1039],
        [0.1000, 0.1079, 0.1171, 0.1018, 0.0783, 0.1044, 0.0938, 0.0923, 0.1016,
         0.1027],
        [0.1005, 0.1076, 0.1179, 0.1016, 0.0782, 0.1037, 0.0938, 0.0914, 0.1027,
         0.1025],
        [0.0997, 0.1086, 0.1184, 0.0996, 0.0768, 0.1040, 0.0940, 0.0924, 0.1029,
         0.1037],
        [0.1018, 0.1063, 0.1203, 0.1013, 0.0762, 0.1034, 0.0930, 0.0909, 0.1029,
         0.1038],
        [0