# Pytorch installation

In [89]:
import torch

In [90]:
import numpy as np

# Tensors

#1. The only datatype the model will understand is tensor (multi dimensional matrix)

#2. You will have to convert the feature vector to a tensor 

#3. The entries in the tensor could be float, int,....

#4. https://pytorch.org/docs/stable/tensors.html (lists all the available types)

In [91]:
# general way of creating tensor - torch.tensor(<list/numpy array>, dtype=<dtype>)
x = torch.tensor([1,2,3,4])
y = torch.tensor([1,2,3,4],dtype=torch.float32)
z = torch.FloatTensor([1,2,3,4])

In [92]:
z

tensor([1., 2., 3., 4.])

In [93]:
z.dtype

torch.float32

In [94]:
x = np.array([1,2,3,4])
x_t = torch.tensor(x)

In [95]:
x_t

tensor([1, 2, 3, 4], dtype=torch.int32)

In [96]:
# you can get back numpy array back from tensor
x_n = x_t.numpy()

In [97]:
x_n

array([1, 2, 3, 4])

In [98]:
# similar to numpy there are different ways of creating tensors
x = torch.ones((1,8))
y = torch.zeros((1,2))

In [99]:
x

tensor([[1., 1., 1., 1., 1., 1., 1., 1.]])

In [100]:
# similar way of accessing elements as numpy
y[0][0] = 1.0

In [101]:
y[0,0] # this also works

tensor(1.)

In [102]:
# reshaping matrix
# We want to reshape the matrix of form 3x3 to 1x9
z = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
z.shape

torch.Size([3, 3])

In [107]:
z

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [105]:
w = z.reshape(1,9)
w.shape

torch.Size([1, 9])

In [106]:
w

tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [104]:
w = z.reshape(1,8)
w.shape

RuntimeError: shape '[1, 8]' is invalid for input of size 9

In [108]:
# in many cases that we will encounter, we won't know beforehand the shape
# In the previous example lets say we do not know if the matrix is 3x3 or 4x4... 
# But we want to 'flatten' the tensor
# So the output would be 1xn.. depending on the shape of the input tensor
q = torch.ones((4,4))
t = torch.ones((8,8))

In [109]:
w = q.reshape(1,-1)

In [110]:
w.shape

torch.Size([1, 16])

In [111]:
w = t.reshape(2,-1)

In [112]:
w.shape

torch.Size([2, 32])

In [None]:
# Check these out yourself....
# Other useful functions torch.squeeze(), torch.unsqueeze(), torch.detach()....

# Calculating gradients

In [None]:
# Now that we know how to create and manipulate tensors, we move onto the second part which is calculating gradients
# You donot need to calculate gradients from scratch..
# Autograd module in Pytorch does it for you

In [113]:
x.requires_grad

False

In [114]:
x = torch.ones((1,7),requires_grad=True)

In [115]:
y = torch.sum(x*x) 

In [116]:
y.backward() # calculates gradient of y wrt all variables (x in this example)

In [117]:
print(x.grad) # dy/dx

tensor([[2., 2., 2., 2., 2., 2., 2.]])


In [118]:
w = torch.ones((1,7))

In [119]:
t = torch.sum(w*w)

In [120]:
t.backward() # we did not set requires_grad to True for w

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

# Creating an architecture

<img src="Images/neural_net.png">

<img src="Images/linear_matrix.png">

In [121]:
import torch.nn as nn
import torch.nn.functional as F

In [122]:
class Classifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Classifier, self).__init__()
        self.W_1 = nn.Parameter(torch.randn(input_size, hidden_size))
        self.b_1 = nn.Parameter(torch.randn(1, hidden_size))
        #self.W_3 = nn.Parameter(torch.randn(hidden_size, hidden_size))
        #self.b_3 = nn.Parameter(torch.randn(1, hidden_size))
        self.W_2 = nn.Parameter(torch.randn(hidden_size, output_size))
        self.b_2 = nn.Parameter(torch.randn(1, output_size))
        self.relu = nn.ReLU()
    
    def forward(self, inp):
        x = inp @ self.W_1 + self.b_1 # inp @ self.W_1 can be replaced by torch.matmul(inp, self.W_1)
        h = self.relu(x)
        y = h @ self.W_2 + self.b_2
        return y

In [126]:
clf = Classifier(4,6,1)

In [127]:
print(clf)

Classifier(
  (relu): ReLU()
)


In [128]:
inp = torch.randn(1,4)

In [129]:
inp.shape

torch.Size([1, 4])

In [130]:
yhat = clf(inp) # forward pass for one datapoint in your dataset

In [131]:
yhat

tensor([[-1.1882]], grad_fn=<AddBackward0>)

In [132]:
from torchviz import make_dot

In [133]:
make_dot(yhat, params=dict(list(clf.named_parameters()))).render("clf_torchviz", format="png")

'clf_torchviz.png'

In [None]:
## You can design any deep learning architecture from scratch by first initializing the parameters in init and then 
## specfying the sequence of operation in forward pass in the forward function

In [None]:
## But of course Pytorch provides you higher level APIs so that you do not need to implement everything from scratch
## However, it makes sense to know these as in future you might come up with your own architecture for which 
# a ready-made implementation is not available

In [None]:
# Lets look back at the architecture we have already created and see how we can make life easier by using the 
# higher level APIs

In [None]:
# We start with the linear module which packs the whole operation Wx + b

In [134]:
class Classifier_1(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Classifier_1, self).__init__()
        self.i2h = nn.Linear(input_size, hidden_size) # W1x + b1
        self.h2o = nn.Linear(hidden_size, output_size) # w2h + b2
        self.relu = nn.ReLU()
    
    def forward(self, inp):
        x = self.i2h(inp) # inp @ self.W_1 can be replaced by torch.matmul(inp, self.W_1)
        h = self.relu(x)
        y = self.h2o(h)
        return y

In [135]:
clf = Classifier_1(4,6,1)

In [136]:
inp = torch.randn(1,4)

In [137]:
clf(inp)

tensor([[0.6841]], grad_fn=<AddmmBackward0>)

# Back propagation and training

# Loss

 Loss functions are already implemented in neural network module https://pytorch.org/docs/stable/nn.html#loss-functions
 
 Have a look over the rest
 
 We will see an example with cross-entropy loss (This is the one we are going to use predominantly)

<img src="Images/cross_entropy_loss.png">

In [None]:
## Takes as input the unnormalized probabilities and the true/groundtruth class
## Since the softmax is already performed by the function, you need not perform softmax yourself
## For some other loss function, you might have to explicitly do it

In [138]:
criterion = nn.BCEWithLogitsLoss()

In [139]:
label = torch.FloatTensor([1]) ## recall pytorch only understands tensor
out = clf(inp)

In [140]:
criterion(out[0], label)

tensor(0.4085, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [141]:
clf = Classifier_1(4,6,2)

In [142]:
inp = torch.randn(1,4)

In [143]:
criterion = nn.CrossEntropyLoss() # create an instance of the loss function

In [144]:
# we consider one input instance and assume the ground-truth label as 1 (2 class so either 0 or 1)
label = torch.LongTensor([1]) ## recall pytorch only understands tensor
out = clf(inp)

In [145]:
loss = criterion(out, label)

In [None]:
loss

# Optimization through backpropagation/training

In [146]:
# we will use the optimization module in pytorch
import torch.optim as optim

In [147]:
optimizer = optim.SGD(clf.parameters(),lr=0.001) ## we will use Stochastic gradient descent SGD, lr - learning rate 

In [148]:
optimizer.zero_grad() # flushes out any previously calculated gradient

In [149]:
loss.backward() # calculates gradient with respect to the parameters dL/dx

In [150]:
optimizer.step() # updates the parameters based on the optmization strategy x = x - lr*dL/dx

In [None]:
# the above four lines of code constitute the whole backpropagation

# Evaluation

In [None]:
# Once you have trained the model, you will like to check how good it does for the test set
# We will just need the forward pass
# Optimization is not needed

In [151]:
inp_test = torch.randn(1,4)

In [152]:
clf.eval() # We want to use the model in evaluation mode.. 

Classifier_1(
  (i2h): Linear(in_features=4, out_features=6, bias=True)
  (h2o): Linear(in_features=6, out_features=2, bias=True)
  (relu): ReLU()
)

In [153]:
with torch.no_grad():
    out = clf(inp_test) # Perform forward pass in the example

In [154]:
out_prob = F.softmax(out, dim=1) 
# remember cross-entropy loss was doing the softmax for us.. For evaluation we have to do it explicitly

In [155]:
out_prob

tensor([[0.4789, 0.5211]], grad_fn=<SoftmaxBackward0>)

In [156]:
torch.argmax(out_prob) # the class with the highest probability is the inferred class

tensor(1)

In [157]:
torch.argmax(out_prob).item() # get back value from pytorch tensor

1

In [None]:
# Given you know the true/ground-truth label, you can compute the accuracy, fraction of cases where the 
# inferred class matches the output class

In [None]:
# Lets now work with a real-world dataset...
# We will try with the classic MNIST dataset...
# Handwritten digit recognition dataset with the labels being digits (0-9)

<img src="Images/mnist.png">

# Data preprocessing

In [None]:
# We would like to train the model in batches...
# Ideally examples in a batch should be picked at random...
# Of course we can code it ourselves, but pytorch provides a Datset module to do just that...

https://drive.google.com/drive/folders/1y6_ddgZuxdMgHlM4BjZp9mmtwFWI7p4t?usp=sharing (Link to the dataset)

In [None]:
# original data available at http://yann.lecun.com/exdb/mnist/ but is somewhat difficult to use
# I have done some preprocessing so that it is easier to use
# Each image is 28x28 which I have flattened to 1x784
# There are two files, train and test which should be used for training and testing respectively
# Each line in the files is a datapoint
# the first element in each line is the label rest are the feature values

In [158]:
data_points = []
class_labels = []

with open('mnist_train.csv') as fs:
    for line in fs:
        data = list(map(int, line.strip().split(','))) 
        label = data[0]
        datapoint = data[1:]
        data_points.append(datapoint)
        class_labels.append(label)

In [159]:
class_labels[0]

5

In [160]:
data_points[0]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 18,
 18,
 18,
 126,
 136,
 175,
 26,
 166,
 255,
 247,
 127,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 30,
 36,
 94,
 154,
 170,
 253,
 253,
 253,
 253,
 253,
 225,
 172,
 253,
 242,
 195,
 64,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 49,
 238,
 253,
 253,
 253,
 253,
 253,
 253,
 253,
 253,
 251,
 93,
 82,
 82,
 56,
 39,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 18,

In [None]:
len(class_labels)

In [None]:
len(data_points[0])

In [None]:
clf = Classifier_1(784, 1056, 10)

Now train the classifier for MNIST putting together all the elements that we have learnt....

In [None]:
# Train...
for e in range(epochs):
    for i in range(len(class_labels)):
        cls_label = class_labels[i]
        features = data_points[i]
        # tensorize features
        # tensorize label
        # perform forward pass out = clf(features)
        # Compute loss loss(out, cls_label)
        # Run optimizer...
        ....

Write an evaluation function to evaluate on the test set

Try different hidden layer sizes