In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import grad
import numpy as np

This is a simple classification Neuron

### Basic Architecture

In [3]:
class simpleNN(nn.Module):
    def __init__(self):
        super(simpleNN,self).__init__()
        self.input_linear = nn.Linear(in_features=3,out_features=3,bias=True) # takes 2 features and outputs 3 
        self.hidden= nn.Linear(in_features=3,out_features=2,bias=True) #one output for each class

        
    def forward(self,x):
        x = F.relu(self.input_linear(x))
        x= torch.sigmoid(self.hidden(x))
        x = F.softmax(x,dim=1)
        return x

In [4]:
net = simpleNN()
print(net) # this is the archetecture of the neural network

simpleNN(
  (input_linear): Linear(in_features=3, out_features=3, bias=True)
  (hidden): Linear(in_features=3, out_features=2, bias=True)
)


In [5]:
# next lets look at the parameters

In [35]:
params = list(net.parameters())
print(" Number of parameters ",len(params))
print("Weight matrix for input features: ",list(params[0].shape))
print("\t",str(params[0]).split('\n')[1])
print("Bias for input to hidden layer mapping: ",list(params[1].shape))
print("\t",str(params[1]).split('\n')[1])
print("Weight matrix for hidden to output:1*3",list(params[2].shape))
print("\t",str(params[2]).split('\n')[1])
print("Bias for  hidden to output:1 ")
print("\t",str(params[3]).split('\n')[1],list(params[3].shape))

 Number of parameters  4
Weight matrix for input features:  [3, 3]
	 tensor([[-0.5458,  0.0255, -0.4028],
Bias for input to hidden layer mapping:  [3]
	 tensor([0.2715, 0.4314, 0.3151], requires_grad=True)
Weight matrix for hidden to output:1*3 [2, 3]
	 tensor([[-0.4984, -0.5088, -0.3487],
Bias for  hidden to output:1 
	 tensor([-0.0551, -0.4452], requires_grad=True) [2]


###  Data & Parameters

In [42]:
inData = torch.tensor([[ 0.5, -0.3,0.2]],requires_grad=True)
target_label = torch.Tensor([[0,1]]) # we are saying that this record should be classified as the second class
learning_rate = 0.01

### Forward + BackPropogation

In [43]:
net = simpleNN() #instantiate
output = net(inData) # call forward function once
#A loss function takes the (output, target) pair of inputs, and computes a value that estimates how far away the output is from the target.
criterion = nn.MSELoss() # more on this later
loss = criterion(output, target_label)
#Zero the gradient buffers of all parameters
net.zero_grad()
#Backward is the function which actually calculates the gradient by passing it's argument (1x1 unit tensor by default) through the backward graph all the way up to every leaf node traceable from the calling root tensor. 
# when we call loss.backward(), the whole graph is differentiated w.r.t. the loss, and all Tensors in the graph that has requires_grad=True will have their .grad Tensor accumulated with the gradient.
print('simpleNN.input_linear.bias.grad before backward')
print(net.input_linear.bias.grad)
loss.backward() # creates the gradients and stores it in parameters
print('simpleNN.input_linear.bias.grad after backward')
print(net.input_linear.bias.grad)

simpleNN.input_linear.bias.grad before backward
None
simpleNN.input_linear.bias.grad after backward
tensor([0.0299, 0.0000, 0.0463])


### Updating Gradients


In [44]:
# Update the weights using gradient descent. Each parameter is a Tensor, so
# we can access its gradients like we did before.
with torch.no_grad():
    for param in net.parameters():
        param -= learning_rate * param.grad

### Understanding loss function

In [46]:

loss = criterion(output, target_label) 
print("Loss from library",loss)
print("diff between output and actual target",(output - target_label))
print("taking square",(output - target_label)**2 )
print("taking default mean for MSE loss" ,torch.mean((output - target_label)**2) )

Loss from library tensor(0.2696, grad_fn=<MseLossBackward>)
diff between output and actual target tensor([[ 0.5192, -0.5192]], grad_fn=<SubBackward0>)
taking square tensor([[0.2696, 0.2696]], grad_fn=<PowBackward0>)
taking default mean for MSE loss tensor(0.2696, grad_fn=<MeanBackward0>)
