# AutoGrad in PyTorch

In [None]:
import torch

In [None]:
input = torch.tensor([[1,2,3],[4,5,6],[7,8,9]], requires_grad=True, dtype=torch.float32)
y_true = torch.tensor([0,2,1])

torch.manual_seed(42)
weights = torch.rand((3,3), requires_grad=True, dtype=torch.float32)   # the requires_grad attribute is set true for all parametres with respect to which we want the gradients, they are called leaf node.
bias = torch.rand((1,3), requires_grad=True, dtype=torch.float32)

output = torch.matmul(input, weights) + bias    # forward pass
probabilities = torch.softmax(output, dim=1)

if len(y_true.shape) == 1:
  y_true = torch.eye(y_true.shape[0])[y_true]

negative_log_probabilities = -torch.log(probabilities)
categorical_cross_entropy_loss = torch.sum(negative_log_probabilities * y_true, dim=1)  # the categorical cross entropy loss

categorical_cross_entropy_loss.sum().backward()

print(weights.grad)
print(bias.grad)
print(input.grad)

tensor([[ 0.8660, -0.0701, -0.7958],
        [ 0.2974,  0.5302, -0.8276],
        [-0.2712,  1.1304, -0.8593]])
tensor([[-0.5686,  0.6003, -0.0317]])
tensor([[-0.2041, -0.4086,  0.5372],
        [ 0.3594, -0.0613, -0.1787],
        [-0.1197,  0.1399, -0.0578]])


Above was a manual implementation of a network with a single layer of three neurons using PyTorch and its Auto-Grad feature. Now we will use the NeuralNetwork module (nn module) of PyTorch to create a training pipeline.


# Training Pipeline and the NN Module of PyTorch

In [None]:
import torch.nn as nn

In [None]:
class Model(nn.Module):
  def __init__(self, num_features):
    super().__init__()
    self.network = nn.Sequential(
        nn.Linear(num_features, 3),
        nn.ReLU(),
        nn.Linear(3, 3),
        nn.Softmax(dim=1)
    )

  def forward(self, input):
    out = self.network(input)
    return out

In [None]:
# example usage

input = torch.rand(10,5)
model = Model(input.shape[1])
output = model(input)
output

tensor([[0.3286, 0.3238, 0.3476],
        [0.3428, 0.3321, 0.3251],
        [0.3584, 0.3444, 0.2972],
        [0.3359, 0.4158, 0.2483],
        [0.3372, 0.3508, 0.3120],
        [0.3574, 0.3353, 0.3074],
        [0.3361, 0.3291, 0.3347],
        [0.3730, 0.3344, 0.2926],
        [0.3422, 0.3559, 0.3019],
        [0.3640, 0.3481, 0.2879]], grad_fn=<SoftmaxBackward0>)

In [None]:
# we can look at the parameter values of the trained model

model.network[2].weight     # 0 indexing, accessing the third member of network

Parameter containing:
tensor([[-0.2982, -0.3936,  0.3063],
        [-0.2334,  0.3504, -0.1370],
        [ 0.3303, -0.4486, -0.2914]], requires_grad=True)

In [None]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [None]:
# we can get a summary of the model using the torchinfo library

from torchinfo import summary
summary(model, input_size = input.shape)

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [10, 3]                   --
├─Sequential: 1-1                        [10, 3]                   --
│    └─Linear: 2-1                       [10, 3]                   18
│    └─ReLU: 2-2                         [10, 3]                   --
│    └─Linear: 2-3                       [10, 3]                   12
│    └─Softmax: 2-4                      [10, 3]                   --
Total params: 30
Trainable params: 30
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

# Dataset and Dataloader Classes

Dataset and Dataloader are  essential for managing and feeding data efficiently during training and evaluation of neural networks. These help us implement mini-batch Gradient Descent, which is often more effective than batch Gradient Descent.

## 1.torch.utils.data.Dataset
This is an abstract class. We must create a class CustomDataset which inherits from this class. The CustomDataset must implement three methods, namely __init__, __len__, and __getitem__.

## 2.torch.utils.data.Dataloader
This class provides batching and shuffling perform training batch-wise. It takes in an object of the Customdataset class and handles the data to be sent accordingly during training and testing.


In [None]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
  def __init__(self, input, y_true):
    self.input = input
    self.y_true = y_true

  def __len__(self):
    return len(self.input)

  def __getitem__(self, index):
    return self.input[index], self.y_true[index]

torch.manual_seed(42)
x = torch.rand(10,3)
y = torch.randint(0,2,(10,))
dataset = CustomDataset(x,y)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

# while training and testing to load the batches we use

for batch_input, batch_y_true in dataloader:
  print(batch_input)
  print(batch_y_true)

tensor([[0.8694, 0.5677, 0.7411],
        [0.4294, 0.8854, 0.5739]])
tensor([1, 0])
tensor([[0.1332, 0.9346, 0.5936],
        [0.4414, 0.2969, 0.8317]])
tensor([1, 0])
tensor([[0.2566, 0.7936, 0.9408],
        [0.1994, 0.5472, 0.0062]])
tensor([1, 0])
tensor([[0.2666, 0.6274, 0.2696],
        [0.8823, 0.9150, 0.3829]])
tensor([1, 0])
tensor([[0.9593, 0.3904, 0.6009],
        [0.1053, 0.2695, 0.3588]])
tensor([0, 0])


# A complete Neural Network using NN Module (training and testing with Breast Cancer Dataset)

In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
raw_data = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
raw_data.tail()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
564,926424,M,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,...,26.4,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115,
565,926682,M,20.13,28.25,131.2,1261.0,0.0978,0.1034,0.144,0.09791,...,38.25,155.0,1731.0,0.1166,0.1922,0.3215,0.1628,0.2572,0.06637,
566,926954,M,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,...,34.12,126.7,1124.0,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782,
567,927241,M,20.6,29.33,140.1,1265.0,0.1178,0.277,0.3514,0.152,...,39.42,184.6,1821.0,0.165,0.8681,0.9387,0.265,0.4087,0.124,
568,92751,B,7.76,24.54,47.92,181.0,0.05263,0.04362,0.0,0.0,...,30.37,59.16,268.6,0.08996,0.06444,0.0,0.0,0.2871,0.07039,


In [None]:
raw_data.drop(columns=['id', 'Unnamed: 32'], inplace=True)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(raw_data.iloc[:,1:], raw_data.iloc[:,0], test_size=0.2, random_state=42)

In [None]:
# preprocessing steps

scaler = StandardScaler()     # neural networks and even regular ml models tend to work better with scaled inputs
x_train = scaler.fit_transform(x_train)   # fit_transform to learn a method of fitting from and provided data and also apply it to the same
x_test = scaler.transform(x_test)     # transform to apply the learned fitting model

In [None]:
y_train

Unnamed: 0,diagnosis
68,B
181,M
63,B
248,B
60,B
...,...
71,B
106,B
270,B
435,M


In [None]:
# encoding the output to binary

encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [None]:
x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

In [None]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
  def __init__(self, input, y_true):
    self.input = input
    self.y_true = y_true

  def __len__(self):
    return len(self.input)

  def __getitem__(self, index):
    return self.input[index], self.y_true[index]


In [None]:
train_dataset = CustomDataset(x_train, y_train)
test_dataset = CustomDataset(x_test, y_test)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=5, shuffle=False)

In [None]:
class Model(nn.Module):
  def __init__(self, num_features):
    super().__init__()
    self.network = nn.Sequential(
        nn.Linear(num_features, 3),
        nn.ReLU(),
        nn.Linear(3, 3),
    )     # the softmax activation is included along with the loss method we are going to use aheah

  def forward(self, input):
    out = self.network(input)
    return out

In [None]:
learning_rate = 0.1
epochs = 100

In [None]:
torch.manual_seed
model = Model(x_train.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [None]:
for epoch in range(epochs):
  epoch_loss = 0
  for input_batch, y_true_batch in train_loader:  # loading batch-wise

    output = model.forward(input_batch)   # forward pass

    batch_loss = criterion(output, y_true_batch)    # calculating loss, softmax is first applie while doing this

    optimizer.zero_grad()     # when autograd evaluates gradients during backpropagation, they will keep on accumulating. Thus, before running the next backward pass we first clear the gradients
    batch_loss.backward()     # backward pass

    optimizer.step()          # update parameters

    epoch_loss += batch_loss

  if not epoch % 10:
    print(f'Epoch: {epoch}, Loss: {epoch_loss}')



Epoch: 0, Loss: 22.431434631347656
Epoch: 10, Loss: 4.239330768585205
Epoch: 20, Loss: 3.2176060676574707
Epoch: 30, Loss: 2.5604028701782227
Epoch: 40, Loss: 2.1185994148254395
Epoch: 50, Loss: 2.18373441696167
Epoch: 60, Loss: 1.5750418901443481
Epoch: 70, Loss: 1.5251399278640747
Epoch: 80, Loss: 1.3654459714889526
Epoch: 90, Loss: 1.1393002271652222


In [32]:
# test
num_correct = 0
num_samples = 0

with torch.no_grad():     # we switch off gradient calculations during evaluation to improve efficiency and saves memory
  for input_batch, y_true_batch in test_loader:
    output = model(input_batch)
    _,y_pred = torch.max(output, dim=1)
    num_correct += (y_pred == y_true_batch).sum()
    num_samples += y_pred.shape[0]

print(f'Accuracy: {num_correct/num_samples}')


Accuracy: 0.9736841917037964
