BINARY CLASSIFICATION BREAST CANCER with Single NN without NN module



In [1]:
#basic imports
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder

In [4]:
#dataset import
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.drop(columns=['id','Unnamed: 32'],inplace=True)
df.describe()

#dataset building
#splitting and keeping 20% data for testing , split based on first column (label) and rest as features
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2,random_state=42)

#scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print(X_train.shape,X_test.shape)

#encoding the y
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
print(y_train.shape,y_test.shape)

#converting to tensors
X_train = torch.from_numpy(X_train)
X_test = torch.from_numpy(X_test)
y_train = torch.from_numpy(y_train)
y_test = torch.from_numpy(y_test)

#reshaping y to be of shape (n_samples,1) and converting to float
# as BCELoss expects target to be of float type
y_train = y_train.view(-1, 1).float()
y_test = y_test.view(-1, 1).float()

(455, 30) (114, 30)
(455,) (114,)


In [5]:
#Now pytorch comes into picture
import torch.nn as nn


#Defining the model
class MySimpleNN():
    def __init__(self,X):
        self.weights=torch.randn(X.shape[1],1,requires_grad=True) #weights initialization according to number of features
        self.bias=torch.randn(1,requires_grad=True) #bias initialization

    def forward(self,X):
        linear_output = torch.matmul(X.float(),self.weights)+self.bias
        y_pred = torch.sigmoid(linear_output)
        return y_pred
    
    def compute_loss(self,y_pred,y):
        #avoiding log(0) situation
        epsilon = 1e-7
        y_pred = torch.clamp(y_pred, epsilon, 1. - epsilon)

        m = y.shape[0]
        loss = - (1/m) * torch.sum(y.float()*torch.log(y_pred)+(1-y.float())*torch.log(1-y_pred)) #binary cross entropy loss
        return loss
    
    def parameters_update(self,lr):
        with torch.no_grad():
            self.weights -= lr * self.weights.grad
            self.bias -= lr * self.bias.grad

            #zeroing the gradients after updating
            self.weights.grad.zero_()
            self.bias.grad.zero_()
    


#parameters
learning_rate=0.1
num_epochs=100

#creating model object
model = MySimpleNN(X_train)
model.weights.shape,model.bias.shape

#training loop
for epoch in range(num_epochs):
    #forward pass
    y_pred = model.forward(X_train)

    #loss computation
    loss = model.compute_loss(y_pred,y_train)

    #backward pass
    loss.backward()

    #parameters update
    model.parameters_update(learning_rate)

    if (epoch+1)%10==0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print("Training complete")
print(f'Final Loss: {loss.item():.4f}')
print(f"Final first 5 weights: {model.weights[:5].T}")


Epoch [10/100], Loss: 0.7726
Epoch [20/100], Loss: 0.4659
Epoch [30/100], Loss: 0.3494
Epoch [40/100], Loss: 0.2961
Epoch [50/100], Loss: 0.2618
Epoch [60/100], Loss: 0.2367
Epoch [70/100], Loss: 0.2173
Epoch [80/100], Loss: 0.2016
Epoch [90/100], Loss: 0.1887
Epoch [100/100], Loss: 0.1776
Training complete
Final Loss: 0.1776
Final first 5 weights: tensor([[ 0.9617,  0.1089, -1.2443,  2.3398, -0.6122]],
       grad_fn=<PermuteBackward0>)


In [6]:
#Testing the model
with torch.no_grad():
    y_test_pred = model.forward(X_test)
    y_test_pred_label = (y_test_pred>=0.5).float() #converting probabilities to labels
    accuracy = (y_test_pred_label.eq(y_test).sum().item())/y_test.shape[0] #calculating accuracy
    print(f'Test Accuracy: {accuracy*100:.2f}%')

Test Accuracy: 93.86%
