In [66]:
import pandas as pd
import numpy as np
import torch

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [67]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head(4)

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,


In [68]:
df.shape

(569, 33)

In [69]:
df.columns

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')

In [70]:
df.drop(['id', 'Unnamed: 32'], axis = 1, inplace = True)

In [71]:
df.shape

(569, 31)

In [72]:
df['diagnosis'].value_counts()

Unnamed: 0_level_0,count
diagnosis,Unnamed: 1_level_1
B,357
M,212



B = Benign → non-cancerous tumor

M = Malignant → cancerous tumor

# Train-Test Split

In [73]:
y = df['diagnosis']
x = df.iloc[:, 1:]

In [74]:
len(x.columns)

30

In [75]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 2)

In [76]:
print("shape of x_train:", x_train.shape)
print("shape of x_test:", x_test.shape)
print("shape of y_train:", y_train.shape)
print("shape of y_test:", y_test.shape)

shape of x_train: (455, 30)
shape of x_test: (114, 30)
shape of y_train: (455,)
shape of y_test: (114,)


# Scaling

In [77]:
sc = StandardScaler()
x_train_transform = sc.fit_transform(x_train)
x_test_transform = sc.fit_transform(x_test)

In [78]:
x_train_transform

array([[-0.01330339,  1.7757658 , -0.01491962, ..., -0.13236958,
        -1.08014517, -0.03527943],
       [-0.8448276 , -0.6284278 , -0.87702746, ..., -1.11552632,
        -0.85773964, -0.72098905],
       [ 1.44755936,  0.71180168,  1.47428816, ...,  0.87583964,
         0.4967602 ,  0.46321706],
       ...,
       [-0.46608541, -1.49375484, -0.53234924, ..., -1.32388956,
        -1.02997851, -0.75145272],
       [-0.50025764, -1.62161319, -0.527814  , ..., -0.0987626 ,
         0.35796577, -0.43906159],
       [ 0.96060511,  1.21181916,  1.00427242, ...,  0.8956983 ,
        -1.23064515,  0.50697397]])

# Label Encoder

In [79]:
le = LabelEncoder()
y_train_new = le.fit_transform(y_train)
y_test_new = le.fit_transform(y_test)

# Numpy array to Pytorch

In [80]:
x_train_tensor = torch.from_numpy(x_train_transform)
x_test_tensor = torch.from_numpy(x_test_transform)
y_train_tensor = torch.from_numpy(y_train_new)
y_test_tensor = torch.from_numpy(y_test_new)

In [81]:
y_train_tensor.dtype

torch.int64

In [82]:
x_train_tensor.dtype

torch.float64

In [83]:
x_train_tensor.shape

torch.Size([455, 30])

In [84]:
y_train_tensor.shape

torch.Size([455])

# Definig Model

In [91]:
class MySimpleNN():

  def __init__(self, x):

    self.weights = torch.rand(x_train_tensor.shape[1], 1, dtype=torch.float64, requires_grad = True)    # 1 because we are taking 1 neuron only
    self.bias = torch.zeros(1, dtype=torch.float64, requires_grad = True)                         # 1 because we are taking 1 neuron only


  def forward_pass(self, x):

    a = torch.matmul(x, self.weights) + self.bias
    y_pred = torch.sigmoid(a)
    return y_pred

  def loss(self, y_pred, y_act):

    e = 1e-7
    y_pred = torch.clamp(y_pred, e, 1-e)
    loss = -(y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred)).mean()
    return loss

# Parameters

In [98]:
epochs = 25
learning_rate = 0.1

# Training Pipeline

In [99]:
# create model
model = MySimpleNN(x_train_tensor)

# starting loop
for i in range(epochs):

# forward pass
  y_pred = model.forward_pass(x_train_tensor)

# loss calculation
  loss = model.loss(y_pred, y_train_tensor)

# backward pass
  loss.backward()

# parameters update
  with torch.no_grad():
    model.weights -= learning_rate * model.weights.grad
    model.bias -= learning_rate * model.bias.grad

# gradients clean
  model.weights.grad.zero_()
  model.bias.grad.zero_()

# print loss function
  print(f"loss for epoch {i+1} : {loss.item()}")

loss for epoch 1 : 3.9646577324106294
loss for epoch 2 : 3.851823606760203
loss for epoch 3 : 3.737260208504904
loss for epoch 4 : 3.6198582629850184
loss for epoch 5 : 3.4963153243712126
loss for epoch 6 : 3.367694536102508
loss for epoch 7 : 3.235535941030096
loss for epoch 8 : 3.097813566880774
loss for epoch 9 : 2.9544218570971643
loss for epoch 10 : 2.8057266930328013
loss for epoch 11 : 2.6560920505951855
loss for epoch 12 : 2.4950144623302513
loss for epoch 13 : 2.329926851964995
loss for epoch 14 : 2.1619151616154113
loss for epoch 15 : 1.9970849660989232
loss for epoch 16 : 1.8368685911412976
loss for epoch 17 : 1.6853521149844346
loss for epoch 18 : 1.5404259258549586
loss for epoch 19 : 1.407821691404279
loss for epoch 20 : 1.2834826186902988
loss for epoch 21 : 1.1702578365705802
loss for epoch 22 : 1.075263187845973
loss for epoch 23 : 0.9983508913662745
loss for epoch 24 : 0.9382483712238946
loss for epoch 25 : 0.8927260781282053


In [100]:
model.bias

tensor([-0.0890], dtype=torch.float64, requires_grad=True)

In [101]:
model.weights

tensor([[ 0.4004],
        [ 0.5432],
        [ 0.1109],
        [ 0.4937],
        [ 0.3000],
        [-0.6644],
        [ 0.0755],
        [ 0.3052],
        [ 0.3749],
        [ 0.4461],
        [ 0.0947],
        [ 0.2127],
        [ 0.1266],
        [ 0.3994],
        [-0.0578],
        [ 0.3417],
        [-0.2911],
        [-0.2740],
        [ 0.3170],
        [ 0.2168],
        [-0.1333],
        [-0.1221],
        [-0.4174],
        [-0.1205],
        [-0.2357],
        [ 0.3482],
        [-0.2898],
        [-0.2626],
        [-0.0057],
        [-0.0279]], dtype=torch.float64, requires_grad=True)

# Model Evaluate

In [108]:
with torch.no_grad():
  y_pred1 = model.forward_pass(x_test_tensor)

In [112]:
y_pred1 = (y_pred1 > 0.5).float()

# Comparison(y_pred1 > 0.5) creates a boolean tensor(true or false) and .float() converts it into numerical class labels (0 and 1).

In [116]:
acc = (y_pred1 == y_test_tensor).float().mean()
print(f"Accuracy -> {acc}")

Accuracy -> 0.5221606492996216
