Creating basic pipeline with NN module

In [23]:
#create model class
import torch
import torch.nn as nn

class Model(nn.Module): #create a class inheriting nn.Module
    
    def __init__(self,num_features): #constructor
        super().__init__()  #call the constructor of the parent class
        self.linear=nn.Linear(num_features,3) #define a linear layer with num_features input features and 1 output feature
        self.relu=nn.ReLU() #define a ReLU activation function
        self.linear2=nn.Linear(3,1) #define a second linear layer
        self.sigmoid=nn.Sigmoid() #define a sigmoid activation function

    def forward(self,features): #define the forward pass
        y_pred=self.linear(features) #apply the linear layer
        y_pred=self.relu(y_pred) #apply the ReLU activation function
        y_pred=self.linear2(y_pred) #apply the second linear layer
        y_pred=self.sigmoid(y_pred) #apply the sigmoid activation function
        return y_pred #return the output
    

In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import torch


#dataset import 

df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.drop(columns=['id','Unnamed: 32'],inplace=True)
df.describe()

#dataset building
#splitting and keeping 20% data for testing , split based on first column (label) and rest as features
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2,random_state=42)

#scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print(X_train.shape,X_test.shape)

#encoding the y
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
print(y_train.shape,y_test.shape)

#converting to tensors
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
y_train = torch.from_numpy(y_train).float()
y_test = torch.from_numpy(y_test).float()

#reshaping y to be of shape (n_samples,1) and converting to float
# as BCELoss expects target to be of float type
y_train = y_train.view(-1, 1)
y_test = y_test.view(-1, 1)

(455, 30) (114, 30)
(455,) (114,)


In [24]:
#installing torchinfo for model summary
from torchinfo import summary   

#using our model


num_features = X_train.shape[1] #number of features
model = Model(num_features) #create an instance of the model
s=summary(model, input_size=(X_train.shape[0], X_train.shape[1]))
print(s)

#loss and optimizer
criterion = nn.BCELoss() #binary cross entropy loss
optimizer = torch.optim.SGD(model.parameters(),lr=0.01) #stochastic gradient descent optimizer

#training the model
num_epochs = 100
for epoch in range(num_epochs):
    #forward pass
    y_train_pred = model.forward(X_train)
    loss = criterion(y_train_pred,y_train)

    #backward pass and optimization
    optimizer.zero_grad() #zero the gradients
    loss.backward() #backpropagation
    optimizer.step() #update the weights

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [455, 1]                  --
├─Linear: 1-1                            [455, 3]                  93
├─ReLU: 1-2                              [455, 3]                  --
├─Linear: 1-3                            [455, 1]                  4
├─Sigmoid: 1-4                           [455, 1]                  --
Total params: 97
Trainable params: 97
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.04
Input size (MB): 0.05
Forward/backward pass size (MB): 0.01
Params size (MB): 0.00
Estimated Total Size (MB): 0.07
Epoch [10/100], Loss: 0.6619
Epoch [20/100], Loss: 0.6544
Epoch [30/100], Loss: 0.6475
Epoch [40/100], Loss: 0.6409
Epoch [50/100], Loss: 0.6343
Epoch [60/100], Loss: 0.6273
Epoch [70/100], Loss: 0.6199
Epoch [80/100], Loss: 0.6119
Epoch [90/100], Loss: 0.6037
Epoch [100/100], Loss: 0.5953
