In [56]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

### **Exploring data**

In [57]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [58]:
df.shape

(569, 33)

In [59]:
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)

In [60]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


### **Train test split**

In [61]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:], df.iloc[:, 0], test_size=0.2)

### **Scaling**

In [62]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [63]:
X_train

array([[-1.20645505, -0.4716794 , -1.19423271, ..., -0.85850253,
         0.14769885, -0.54810793],
       [-0.34131697, -0.71930749, -0.37919883, ..., -0.82923841,
         1.13742155, -0.7254733 ],
       [-1.38214108, -1.49956958, -1.25412472, ..., -0.14793322,
         0.2265548 ,  2.31874126],
       ...,
       [-1.3373527 ,  2.00693761, -1.34752275, ..., -1.71021023,
        -0.67948403, -0.77734431],
       [ 0.26260374, -0.04417053,  0.23438012, ...,  0.55928291,
         0.10424761,  0.06095809],
       [ 0.38107622,  0.81084722,  0.36714771, ...,  0.53946866,
        -0.54913031, -0.85598745]])

In [64]:
y_train

Unnamed: 0,diagnosis
206,B
268,B
504,B
278,B
476,B
...,...
246,B
555,B
557,B
291,B


### **Label Encoding**

In [65]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [66]:
y_train

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,

### **Numpy arrays to PyTorch Tensors**

In [67]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [68]:
X_train_tensor.shape

torch.Size([455, 30])

In [69]:
y_train_tensor.shape

torch.Size([455])

### **Defining the model**

In [70]:
class MySimpleNN():
  def __init__(self, X):
    self.weights = torch.rand(X.shape[1], 1, dtype=torch.float64, requires_grad=True)
    self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

  def forward(self, X):
    z = torch.matmul(X,self.weights) + self.bias
    y_pred = torch.sigmoid(z)
    return y_pred

  def loss_function(self, y_pred, y):
    # Clamp predictions to avoid log(0)
    epsilon = 1e-7
    y_pred = torch.clamp(y_pred, epsilon, 1-epsilon)

    # Calculate loss
    loss = -(y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred)).mean()
    return loss

### **Important Parameters**

In [71]:
learning_rate = 0.1
epochs = 25

### **Training Pipeline**

In [72]:
# create model
model = MySimpleNN(X_train_tensor)

# define loop
for epoch in range(epochs):
  # forward pass
  y_pred = model.forward(X_train_tensor)

  # loss calculate
  loss = model.loss_function(y_pred, y_train_tensor)

  # backward pass
  loss.backward()

  # parameters update
  with torch.no_grad():
    model.weights -= learning_rate * model.weights.grad
    model.bias -= learning_rate * model.bias.grad

  # zero gradients
  model.weights.grad.zero_()
  model.bias.grad.zero_()

  # print loss in each epoch
  print(f'Epoch: {epoch+1}, Loss: {loss.item()}')

Epoch: 1, Loss: 3.3065040096953857
Epoch: 2, Loss: 3.1808896085998457
Epoch: 3, Loss: 3.0483668505888244
Epoch: 4, Loss: 2.9150556419284026
Epoch: 5, Loss: 2.777586382646196
Epoch: 6, Loss: 2.6343428026749134
Epoch: 7, Loss: 2.4859674581921825
Epoch: 8, Loss: 2.3302981091798802
Epoch: 9, Loss: 2.171123358033299
Epoch: 10, Loss: 2.016940440197314
Epoch: 11, Loss: 1.8656970668030224
Epoch: 12, Loss: 1.7200831433810118
Epoch: 13, Loss: 1.5779490891837802
Epoch: 14, Loss: 1.4470073469581153
Epoch: 15, Loss: 1.3257582802099535
Epoch: 16, Loss: 1.2189865708053544
Epoch: 17, Loss: 1.1273426687934864
Epoch: 18, Loss: 1.050837675335558
Epoch: 19, Loss: 0.9887805192007544
Epoch: 20, Loss: 0.9398102248069974
Epoch: 21, Loss: 0.9020063869125802
Epoch: 22, Loss: 0.8731082554577865
Epoch: 23, Loss: 0.8508538441493315
Epoch: 24, Loss: 0.8333094582509705
Epoch: 25, Loss: 0.8190220526957663


In [74]:
model.weights

tensor([[-0.3845],
        [ 0.4900],
        [ 0.1710],
        [-0.1396],
        [-0.1453],
        [-0.0917],
        [-0.1517],
        [ 0.0279],
        [-0.2154],
        [ 0.2736],
        [ 0.0555],
        [ 0.6888],
        [-0.2796],
        [ 0.4640],
        [-0.1317],
        [-0.2010],
        [-0.2765],
        [ 0.1964],
        [ 0.3638],
        [ 0.3122],
        [ 0.1943],
        [-0.3200],
        [-0.0244],
        [ 0.4468],
        [-0.1694],
        [-0.4029],
        [-0.1375],
        [ 0.3461],
        [-0.1331],
        [ 0.4396]], dtype=torch.float64, requires_grad=True)

In [75]:
model.bias

tensor([-0.1446], dtype=torch.float64, requires_grad=True)

### **Evaluation**

In [79]:
# model evaluation
with torch.no_grad():
  y_pred = model.forward(X_test_tensor)
  y_pred = (y_pred > 0.5).float()
  accuracy = (y_pred == y_test_tensor).float().mean()
  print(f'Accuracy: {accuracy.item()}')

Accuracy: 0.5270852446556091
