In [2]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [4]:
df.shape

(569, 33)

In [5]:
df.drop(columns=['id','Unnamed: 32'],inplace=True)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,1:], df.iloc[:,0], random_state=42, test_size=0.2)
X_train.shape, X_test.shape

((455, 30), (114, 30))

### Scaling of inputs

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Label Encoding

In [8]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.fit_transform(y_test)

### All of these outputs are numpy array, we have to convert them into pytorch tensors

In [9]:
X_train_tensor, X_test_tensor = torch.from_numpy(X_train), torch.from_numpy(X_test)
y_train_tensor, y_test_tensor = torch.from_numpy(y_train), torch.from_numpy(y_test)

In [10]:
X_train_tensor

tensor([[-1.4408, -0.4353, -1.3621,  ...,  0.9320,  2.0972,  1.8865],
        [ 1.9741,  1.7330,  2.0917,  ...,  2.6989,  1.8912,  2.4978],
        [-1.4000, -1.2496, -1.3452,  ..., -0.9702,  0.5976,  0.0579],
        ...,
        [ 0.0488, -0.5550, -0.0651,  ..., -1.2390, -0.7086, -1.2715],
        [-0.0390,  0.1021, -0.0314,  ...,  1.0500,  0.4343,  1.2134],
        [-0.5486,  0.3133, -0.6035,  ..., -0.6110, -0.3345, -0.8463]],
       dtype=torch.float64)

### Defining the model

In [11]:
class TorchNeuralNetwork:
    
    def __init__(self,X) -> None:
        # training dataset has 30 columns(features) therefore 30 weights and 1 bias has to be constructed for the neural network
        self.weights = torch.rand(X.shape[1], 1, dtype=torch.float64, requires_grad=True) #tensor of size (30,1) with autograd on
        self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True) # single bias of value 0 as neural network will contain a single neuron

    def forward(self,X):
        # z = wx + b
        z = torch.matmul(X, self.weights) + self.bias 
        y_pred = torch.sigmoid(z) #activation function
        return y_pred

    def loss_function(self, y_pred, y):
        #clamp prediction to avoid log(0)
        epsilon = 1e-7
        y_pred = torch.clamp(y_pred, epsilon, 1-epsilon) # limits all the values inside the range of epsilon and 1-epsilon

        #calculate loss
        loss = -(y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred) ).mean() 
        return loss

In [12]:
input = torch.tensor([1,2,3,4,5], dtype=torch.float64)
torch.clamp(input, 2,4)

tensor([2., 2., 3., 4., 4.], dtype=torch.float64)

### Formula for Binary cross Entropy :
![alt text](1_rdBw0E-My8Gu3f_BOB6GMA.webp)  
Here, pi is the probability of class 1, and (1-pi) is the probability of class 0.

### Important parameters

In [13]:
learning_rate = 0.1
epochs = 25

### Training pipeline

In [14]:
#creating model
model = TorchNeuralNetwork(X_train_tensor)

for epoch in range(epochs):
    # forward pass
    y_pred = model.forward(X_train_tensor)

    #loss calculation (binary cross entropy)
    loss = model.loss_function(y_pred, y_train_tensor)

    #backward pass / calculate derivates
    loss.backward()

    #parameters update
    with torch.no_grad():
        model.weights -= learning_rate * model.weights.grad
        model.bias -= learning_rate * model.bias.grad
    
    # zero gradients
    model.weights.grad.zero_()
    model.bias.grad.zero_()

    print(f'Epoch {epoch}, Loss : {loss}')

# 

Epoch 0, Loss : 3.7280539969727213
Epoch 1, Loss : 3.6007978867126256
Epoch 2, Loss : 3.471191137068036
Epoch 3, Loss : 3.336495545157953
Epoch 4, Loss : 3.1974466525114553
Epoch 5, Loss : 3.0559122344816365
Epoch 6, Loss : 2.9109033577972356
Epoch 7, Loss : 2.7606421514906416
Epoch 8, Loss : 2.6013453932507606
Epoch 9, Loss : 2.435796147334183
Epoch 10, Loss : 2.2632078435585465
Epoch 11, Loss : 2.088540563988243
Epoch 12, Loss : 1.9191714725568934
Epoch 13, Loss : 1.7544051886784642
Epoch 14, Loss : 1.5943731811300685
Epoch 15, Loss : 1.4408998612517367
Epoch 16, Loss : 1.2932707468710714
Epoch 17, Loss : 1.1623860528996295
Epoch 18, Loss : 1.0507360679998468
Epoch 19, Loss : 0.9598536297766465
Epoch 20, Loss : 0.889681598450936
Epoch 21, Loss : 0.8383277735045899
Epoch 22, Loss : 0.8024477419463245
Epoch 23, Loss : 0.7781161657593761
Epoch 24, Loss : 0.761721155612807


### Evaluation

In [15]:
with torch.no_grad():
    y_pred = model.forward(X_test_tensor)
    y_pred = (y_pred > 0.5).float()

    accuracy = (y_pred == y_test_tensor).float().mean()
    print(f'Accuracy : {accuracy.item()}')



Accuracy : 0.5193905830383301
