![pic](https://i.pinimg.com/736x/ca/d6/38/cad6386be7f7a662ca9507a42bbaf02a.jpg)

> In this notebook, we learn how a simple PyTorch pipeline works


- Lecture: https://youtu.be/MKxEbbKpL5Q?si=RHXZRKLfZ6AswqON
- My PyTroch Repo: https://github.com/Rudra-G-23/deep-learning-using-pytorch

In [1]:
import numpy as np
import pandas as pd
import torch

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

import warnings
warnings.filterwarnings('ignore')

# Dataset

In [2]:
path = "https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv"
df = pd.read_csv(path)
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [3]:
df.shape

(569, 33)

In [4]:
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)

In [5]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


# Train Test Split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    df.iloc[:, 1:],
    df.iloc[:, 0],
    test_size=0.2
)

# Scaling

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
X_train

array([[ 1.75828210e+00,  1.10140787e+00,  2.16709854e+00, ...,
         2.06317228e+00,  4.08888040e+00,  8.95140759e-01],
       [-8.48136210e-01, -1.06644904e+00, -8.77679574e-01, ...,
        -1.51708327e+00, -1.67136988e-01, -5.52105746e-01],
       [-3.22766338e-01,  2.14636731e-03, -4.04880096e-01, ...,
        -1.60210536e+00, -7.76760811e-01, -1.19387374e+00],
       ...,
       [-4.39515198e-01, -2.66771682e-01, -4.01511978e-01, ...,
         3.79551488e-01,  5.05719094e-01,  1.10318955e+00],
       [ 3.33176872e-02,  7.66439771e-01,  6.49722798e-02, ...,
         3.10738649e-01, -3.56833762e-01, -2.17518615e-02],
       [-7.78086894e-01, -1.11126872e+00, -7.67794744e-01, ...,
        -6.35667262e-01,  3.71518470e-02,  4.49484334e-01]])

In [9]:
y_train

78     M
333    B
58     B
378    B
336    B
      ..
22     M
553    B
496    B
536    M
478    B
Name: diagnosis, Length: 455, dtype: object

# Label Encoding

In [11]:
encoder = LabelEncoder()
# Convert both to a string to ensure consistency
y_train = encoder.fit_transform(y_train.astype(str))
y_test = encoder.transform(y_test.astype(str))

# Numpy arrays to PyTorch tnesors

In [19]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)

y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [20]:
X_train_tensor.shape

torch.Size([455, 30])

In [21]:
y_train_tensor.shape

torch.Size([455])

# Defining the Model

- First, we create a class
- Weights, bias
- Then create the functions 
    -  Forward calculation
    -  Loss find
    -  Update the Weights


In [36]:
class SimpleNN():

    def __init__(self, X):

        self.weights = torch.rand(X.shape[1], dtype=torch.float64, requires_grad=True)
        self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

    def forward(self, X):
        z = torch.matmul(X, self.weights) + self.bias
        y_pred = torch.sigmoid(z)
        
        return y_pred

    def loss_function(self, y_pred, y):
        
        # Clamp preditions to avoid log(0)
        epsilon = 1e-7

        # manually calculate the loss function 
        y_pred = torch.clamp( y_pred, epsilon, 1 - epsilon)
        loss = - (y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred)).mean()

        return loss

**Loss**

$$\mathcal{L} = - \Big[ y_{\text{target}} \log(y_{\text{pred}}) + (1 - y_{\text{target}}) \log(1 - y_{\text{pred}}) \Big]$$

# Imp Parameters

In [29]:
LR = 0.1
EPOCHS = 25

# Training Pipeline

> Inside the training pipeline
- In a loop
    - Forward pass
    - Loss finding
    - Then gradientdescent calculate
    - Parameters update
    - Zero the weights and bias before the 2nd iteration


In [40]:
# create model
model = SimpleNN(X_train_tensor)

# define loop
for epoch in range(EPOCHS):

    # forward pass
    y_pred = model.forward(X_train_tensor)

    # loss calculate
    loss = model.loss_function(y_pred, y_train_tensor)

    # backward pass
    loss.backward()

    # parameters update
    with torch.no_grad():
        model.weights -= LR * model.weights.grad
        model.bias -= LR * model.bias.grad

    # Zero gradients
    model.weights.grad.zero_()
    model.bias.grad.zero_()

    print(f"Epoch: {epoch + 1} | Loss: {loss.item()} ")

Epoch: 1 | Loss: 0.5689408448358553 
Epoch: 2 | Loss: 0.5536276733824913 
Epoch: 3 | Loss: 0.5388093863844939 
Epoch: 4 | Loss: 0.5244793924089977 
Epoch: 5 | Loss: 0.5106289619107548 
Epoch: 6 | Loss: 0.4972473648935506 
Epoch: 7 | Loss: 0.48432209173543816 
Epoch: 8 | Loss: 0.4718391281299878 
Epoch: 9 | Loss: 0.45963587066339257 
Epoch: 10 | Loss: 0.4467971838334657 
Epoch: 11 | Loss: 0.43438168780119896 
Epoch: 12 | Loss: 0.42237166282084393 
Epoch: 13 | Loss: 0.4107494301704099 
Epoch: 14 | Loss: 0.3994977299747019 
Epoch: 15 | Loss: 0.3886000594379159 
Epoch: 16 | Loss: 0.3780409569976613 
Epoch: 17 | Loss: 0.3678062233119918 
Epoch: 18 | Loss: 0.35788307584942025 
Epoch: 19 | Loss: 0.34826023860481947 
Epoch: 20 | Loss: 0.3389279713154532 
Epoch: 21 | Loss: 0.32987804344605265 
Epoch: 22 | Loss: 0.32110365757881815 
Epoch: 23 | Loss: 0.31259932570713067 
Epoch: 24 | Loss: 0.30436070158888767 
Epoch: 25 | Loss: 0.29638437339399737 


# Model bias and weights

In [41]:
model.bias

tensor([-0.0663], dtype=torch.float64, requires_grad=True)

In [42]:
model.weights

tensor([ 0.2160,  0.5827,  0.6129,  0.5237,  0.4711,  0.3250,  0.4762,  0.8541,
         0.0073,  0.4265,  0.8646,  0.4017,  0.8453,  0.8454,  0.6143,  0.6375,
         0.0135, -0.1149,  0.5482,  0.6252,  0.9366,  0.9323,  0.8441,  0.3226,
         0.8910,  0.8315,  0.4163,  0.0790,  0.1285, -0.1372],
       dtype=torch.float64, requires_grad=True)

# Evaluation

In [46]:
# Model evaluation
with torch.no_grad():
    y_pred = model.forward(X_test_tensor)
    y_pred = (y_pred > 0.5).float()

    accuracy = (y_pred == y_test_tensor).float().mean()
    print(f"Accuracy: {accuracy.item()}")

Accuracy: 0.8947368264198303
