In [1]:
import torch

In [2]:

x = torch.randn(2,5,requires_grad=True)
y = torch.randn(5,2)
print(f"Tensor : \n {x}")

device = "cuda" if torch.cuda.is_available() else "cpu"
x = x.to(device)
print("device : ", x.device)

a = x + 2
b = x * y.T
c = torch.matmul(x,y)

print(f"a: {a} \n b : {b} \n c: {c}")

Tensor : 
 tensor([[ 0.5074,  0.4985, -1.1697, -1.1067, -0.6041],
        [-0.3250,  2.9579, -0.1799,  0.3334, -0.0798]], requires_grad=True)
device :  cpu
a: tensor([[2.5074, 2.4985, 0.8303, 0.8933, 1.3959],
        [1.6750, 4.9579, 1.8201, 2.3334, 1.9202]], grad_fn=<AddBackward0>) 
 b : tensor([[-0.4139, -0.7604, -1.9714, -1.8819, -0.1336],
        [-0.1130, -4.7638, -0.1702, -0.2524, -0.0717]], grad_fn=<MulBackward0>) 
 c: tensor([[-5.1612, -1.4372],
        [-4.0014, -5.3711]], grad_fn=<MmBackward0>)


In [3]:
import torch.nn as nn

In [4]:
#learnable parameters
W = torch.randn(3,3,requires_grad=True)
b = torch.randn(3,requires_grad=True)

#input tensor
x = torch.randn(2,5,3)

#manual forward pass
y_manual = torch.matmul(x,W) + b

#using nn
linear_layer = nn.Linear(3,3)
y_layer = linear_layer(x)

loss_manual = (y_manual**2).sum()
loss_layer = (y_layer**2).sum()

loss_manual.backward()
loss_layer.backward()

print("gradients of W: ", W.grad)
print("gradients of b: ", b.grad)

print("Gradients of linear_layer.weight: ",linear_layer.weight.grad)

gradients of W:  tensor([[  3.5120,   1.7310,  38.0778],
        [-10.1691,   5.1328,   9.5008],
        [  1.8542,  -7.0999,  13.5316]])
gradients of b:  tensor([ 22.7652, -13.4443,   7.3671])
Gradients of linear_layer.weight:  tensor([[ 5.9470,  1.9529, -3.3295],
        [ 0.6204,  1.9220,  1.6168],
        [-2.7810, -8.7726, -8.4541]])


In [5]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN,self).__init__()
        self.layer1 = nn.Linear(10,64) #input of 10 features, output of 64 features
        self.layer2 = nn.Linear(64,1) #ouput of 1
    
    def forward(self,x):
        x = torch.relu(self.layer1(x))
        x = self.layer2(x)
        return x

model = SimpleNN().to(device = "cpu")

input_tensor = torch.randn(5,10)

output = model(input_tensor)
print(f"output: {output}")        
        

output: tensor([[ 0.0281],
        [-0.0207],
        [ 0.2064],
        [ 0.0416],
        [ 0.1367]], grad_fn=<AddmmBackward0>)


## MNIST problem

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib as plt

In [8]:
#defining the transformatioin this will convert the image to tensors and we will also normalize it
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5,))
])

#loading dataset from torchvision 
trainset = torchvision.datasets.MNIST(root='./data',train=True,download=True,transform=transform)
testset = torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=transform)

#create a dataloader for batching and shuffling of the data
trainloader = torch.utils.data.DataLoader(trainset,batch_size=64,shuffle=True)
testloader = torch.utils.data.DataLoader(testset,batch_size=64,shuffle=True)


In [9]:
#lets make the nn

class mnistNN(nn.Module):
    def __init__(self):
        super(mnistNN,self).__init__()
        self.layer1 = nn.Linear(28*28,128)
        self.layer2 = nn.Linear(128,64)
        self.layer3 = nn.Linear(64,10)
        
    def forward(self,x):
        x = x.view(-1,28*28) #????
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.layer3(x)
        return x

In [10]:
model = mnistNN()

criterion = nn.CrossEntropyLoss() #cross entropy loss for classification

optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.9)#what the fk is momentum

In [11]:
epochs = 5

for epoch in range(epochs):
    running_loss = 0.0 
    correct = 0 
    total = 0
    
    for inputs,labels in trainloader:    
        optimizer.zero_grad() #zero the gradients
        
        outputs = model(inputs)
        
        loss = criterion(outputs,labels)
        
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.item() #loss.item() returns the scalar value held in the loss tensor
        
        _,predicted = torch.max(outputs.data,1)
        
        total += labels.size(0)
        
        correct += (predicted == labels).sum().item()  
    
    print(f"Epoch [{epoch+1}/{epochs}], loss: {running_loss/len(trainloader):.4f}, accuracy:{100* correct/ total:.2f}%")  

Epoch [1/5], loss: 0.4361, accuracy:86.60%
Epoch [2/5], loss: 0.1795, accuracy:94.66%
Epoch [3/5], loss: 0.1288, accuracy:96.12%
Epoch [4/5], loss: 0.1046, accuracy:96.75%
Epoch [5/5], loss: 0.0902, accuracy:97.12%


In [12]:
correct = 0
total = 0 

with torch.no_grad():
    for inputs,labels in testloader:
        outputs = model(inputs)
        _,predicted = torch.max(outputs.data,1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print(f"Accuracy of the network on the 10000 test images: {100*correct/total}%")

Accuracy of the network on the 10000 test images: 97.08%


In [14]:
#saving the model
torch.save(model.state_dict(),"mnist_nn.pth")


In [15]:
# Load the saved model state dictionary
model = mnistNN()
model.load_state_dict(torch.load("mnist_nn.pth"))

  model.load_state_dict(torch.load("mnist_nn.pth"))


<All keys matched successfully>