In [1]:
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [2]:
data = pd.read_csv("https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv")

## Basic Preprocessing

In [3]:
data.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       569 non-null    int64  
 1   diagnosis                569 non-null    object 
 2   radius_mean              569 non-null    float64
 3   texture_mean             569 non-null    float64
 4   perimeter_mean           569 non-null    float64
 5   area_mean                569 non-null    float64
 6   smoothness_mean          569 non-null    float64
 7   compactness_mean         569 non-null    float64
 8   concavity_mean           569 non-null    float64
 9   concave points_mean      569 non-null    float64
 10  symmetry_mean            569 non-null    float64
 11  fractal_dimension_mean   569 non-null    float64
 12  radius_se                569 non-null    float64
 13  texture_se               569 non-null    float64
 14  perimeter_se             5

In [5]:
data.drop(columns=["id", "Unnamed: 32"], inplace=True)

In [6]:
data.shape

(569, 31)

In [7]:
X = data.iloc[:, 1:]
y = data.iloc[:, 0]

In [8]:
y.shape

(569,)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [10]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

In [11]:
X_test = scaler.transform(X_test)

In [12]:
X_train

array([[-0.57401813, -1.52483062, -0.61269691, ..., -0.67522058,
        -0.64567147, -0.3004602 ],
       [-0.25774847,  0.56314832, -0.25724572, ...,  0.7196263 ,
         1.32861929,  0.66863058],
       [-0.41445866,  1.06039578, -0.42585718, ...,  0.06114137,
        -0.12321284,  0.65003756],
       ...,
       [-0.6737428 , -0.50040876, -0.70218112, ..., -0.06377029,
        -0.54854775, -0.75176352],
       [-0.57116886, -1.36368561, -0.57168331, ..., -0.61847059,
        -0.19856745, -0.21369277],
       [-2.02116553, -1.36598768, -1.98188825, ..., -1.75855936,
         0.05428912,  0.53566231]])

In [13]:
encoder = LabelEncoder()

In [14]:
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

## Tensor Declaration

In [15]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)

In [16]:
X_train_tensor.shape

torch.Size([426, 30])

In [17]:
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [18]:
y_test_tensor

tensor([1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
        0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
        0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0,
        1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0,
        0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0])

In [19]:
X_test_tensor

tensor([[ 0.6255,  0.3099,  0.6326,  ...,  0.6826,  0.3406, -0.1303],
        [-0.3632, -0.4383, -0.3554,  ..., -0.4792, -1.5265, -0.7766],
        [-0.9045, -1.4742, -0.9503,  ..., -1.1490, -0.2706, -0.4205],
        ...,
        [-0.3432,  0.0406, -0.3712,  ..., -0.7741,  0.6923, -0.3923],
        [ 0.5799, -0.3024,  0.5581,  ...,  0.1167, -0.1801, -0.1540],
        [-1.0869, -1.0713, -1.0535,  ..., -1.1303, -0.7796,  0.0320]],
       dtype=torch.float64)

## Model Creation

In [20]:
class MyNeuralNetwork:

  def __init__(self, X):
    self.weights = torch.rand(X.shape[1], 1, dtype=torch.float64,  requires_grad=True)
    self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)


  def forward_pass(self, X):
    z = torch.matmul(X, self.weights) + self.bias
    y_pred = torch.sigmoid(z)
    return y_pred

  def loss_function(self, y, y_pred):
    # Clamp predictions to avoid log(0)
    epsilon = 1e-7
    y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)

    # Calculate loss
    loss = -(y * torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred)).mean()
    return loss

## Important Parameters

In [21]:
learning_rate = 0.1
epochs = 25

In [22]:
model = MyNeuralNetwork(X_train_tensor)

for e in range(epochs):
  #forward pass
  y_pred = model.forward_pass(X= X_train_tensor)

  loss = model.loss_function(y_train_tensor, y_pred)

  print(f"for every {e + 1} epochs the loss is {loss}")

  loss.backward()

  with torch.no_grad():
    model.weights -= learning_rate * model.weights.grad
    model.bias -= learning_rate * model.bias.grad

  model.weights.grad.zero_()
  model.bias.grad.zero_()

for every 1 epochs the loss is 3.373519500448187
for every 2 epochs the loss is 3.233473326290796
for every 3 epochs the loss is 3.091284106548139
for every 4 epochs the loss is 2.9404649182465006
for every 5 epochs the loss is 2.78527776184238
for every 6 epochs the loss is 2.629762415056396
for every 7 epochs the loss is 2.472339458856339
for every 8 epochs the loss is 2.3126357250121643
for every 9 epochs the loss is 2.1496950885691586
for every 10 epochs the loss is 1.988050865182038
for every 11 epochs the loss is 1.8299415183550998
for every 12 epochs the loss is 1.6807206186948582
for every 13 epochs the loss is 1.5386600885507857
for every 14 epochs the loss is 1.4058798309756002
for every 15 epochs the loss is 1.2872988444562503
for every 16 epochs the loss is 1.183681049886986
for every 17 epochs the loss is 1.0956275714460275
for every 18 epochs the loss is 1.0230386419670956
for every 19 epochs the loss is 0.9648269465041874
for every 20 epochs the loss is 0.919086217460987

In [23]:
model.weights

tensor([[-0.3261],
        [ 0.3816],
        [-0.2305],
        [ 0.3497],
        [ 0.2517],
        [-0.1928],
        [-0.2396],
        [-0.5630],
        [ 0.3436],
        [ 0.6013],
        [-0.2181],
        [ 0.4233],
        [-0.3056],
        [-0.0998],
        [-0.0241],
        [-0.1455],
        [-0.0039],
        [ 0.3935],
        [ 0.1738],
        [-0.2349],
        [ 0.0196],
        [-0.0711],
        [ 0.5540],
        [ 0.3628],
        [-0.1029],
        [ 0.2637],
        [-0.3488],
        [ 0.3687],
        [ 0.4076],
        [-0.2592]], dtype=torch.float64, requires_grad=True)

In [24]:
model.bias

tensor([-0.1727], dtype=torch.float64, requires_grad=True)

In [25]:
with torch.no_grad():
  y_pred = model.forward_pass(X_test_tensor)
  y_pred = (y_pred > 0.9).float()
  accuracy = (y_pred == y_test_tensor).float().mean()
  print(accuracy)

tensor(0.6276)
