In [1]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split # split data into training and testing sets.
from sklearn.preprocessing import StandardScaler # scale numerical data in same range
from sklearn.preprocessing import LabelEncoder # Converts text labels into numbers.


In [2]:
import torch
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using Device: ", device )

Using Device:  mps


In [3]:
# Reads a CSV file from the internet and loads it into a Pandas DataFrame called df
df = pd.read_csv(
    'https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv'
)

In [4]:
# Shows the first 5 rows of the dataset.
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [5]:
df.shape

(569, 33)

In [6]:
# id and unnamed is not required
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)

In [7]:
df.shape

(569, 31)

In [8]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


# train test split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:], df.iloc[:, 0], test_size=0.2)

In [10]:
X_train

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
170,12.320,12.39,78.85,464.1,0.10280,0.06981,0.03987,0.037000,0.1959,0.05955,...,13.500,15.64,86.97,549.1,0.1385,0.1266,0.1242,0.09391,0.2827,0.06771
551,11.130,22.44,71.49,378.4,0.09566,0.08194,0.04824,0.022570,0.2030,0.06552,...,12.020,28.26,77.80,436.6,0.1087,0.1782,0.1564,0.06413,0.3169,0.08032
539,7.691,25.44,48.34,170.4,0.08668,0.11990,0.09252,0.013640,0.2037,0.07751,...,8.678,31.89,54.49,223.6,0.1596,0.3064,0.3393,0.05000,0.2790,0.10660
154,13.150,15.34,85.31,538.9,0.09384,0.08498,0.09293,0.034830,0.1822,0.06207,...,14.770,20.50,97.67,677.3,0.1478,0.2256,0.3009,0.09722,0.3849,0.08633
46,8.196,16.84,51.71,201.9,0.08600,0.05943,0.01588,0.005917,0.1769,0.06503,...,8.964,21.96,57.26,242.2,0.1297,0.1357,0.0688,0.02564,0.3105,0.07409
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
526,13.460,18.75,87.44,551.1,0.10750,0.11380,0.04201,0.031520,0.1723,0.06317,...,15.350,25.16,101.90,719.8,0.1624,0.3124,0.2654,0.14270,0.3518,0.08665
72,17.200,24.52,114.20,929.4,0.10710,0.18300,0.16920,0.079440,0.1927,0.06487,...,23.320,33.82,151.60,1681.0,0.1585,0.7394,0.6566,0.18990,0.3313,0.13390
6,18.250,19.98,119.60,1040.0,0.09463,0.10900,0.11270,0.074000,0.1794,0.05742,...,22.880,27.66,153.20,1606.0,0.1442,0.2576,0.3784,0.19320,0.3063,0.08368
472,14.920,14.93,96.45,686.9,0.08098,0.08549,0.05539,0.032210,0.1687,0.05669,...,17.180,18.22,112.00,906.6,0.1065,0.2791,0.3151,0.11470,0.2688,0.08273


In [11]:
X_train.shape

(455, 30)

In [12]:
y_train.shape

(455,)

In [13]:
X_test.shape

(114, 30)

In [14]:
# Scaling the data so that all data points comes under the same scale

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [15]:
X_train

array([[-0.48673608, -1.56547131, -0.51431208, ..., -0.29582422,
        -0.10609809, -0.85033362],
       [-0.83150341,  0.74172307, -0.82335632, ..., -0.75143264,
         0.44017211, -0.17065264],
       [-1.82785201,  1.43043781, -1.79541804, ..., -0.9676095 ,
        -0.1651975 ,  1.2458435 ],
       ...,
       [ 1.23130616,  0.17697698,  1.19676848, ...,  1.22322748,
         0.2708603 ,  0.01045189],
       [ 0.26653708, -0.9823595 ,  0.22470676, ...,  0.02224492,
        -0.3281202 , -0.04075326],
       [-0.12748273, -1.21881822, -0.10533233, ..., -0.34187464,
         0.27405486, -0.09249741]])

In [16]:
y_train

170    B
551    B
539    B
154    B
46     B
      ..
526    B
72     M
6      M
472    B
221    B
Name: diagnosis, Length: 455, dtype: object

In [17]:
# Using label encoder so that y_train data convert to number
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.fit_transform(y_test)

In [18]:
y_train

array([0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,

## Numpy arrays to PyTorch tensors

In [19]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [20]:
X_train_tensor

tensor([[-0.4867, -1.5655, -0.5143,  ..., -0.2958, -0.1061, -0.8503],
        [-0.8315,  0.7417, -0.8234,  ..., -0.7514,  0.4402, -0.1707],
        [-1.8279,  1.4304, -1.7954,  ..., -0.9676, -0.1652,  1.2458],
        ...,
        [ 1.2313,  0.1770,  1.1968,  ...,  1.2232,  0.2709,  0.0105],
        [ 0.2665, -0.9824,  0.2247,  ...,  0.0222, -0.3281, -0.0408],
        [-0.1275, -1.2188, -0.1053,  ..., -0.3419,  0.2741, -0.0925]],
       dtype=torch.float64)

In [21]:
y_train_tensor

tensor([0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1,
        1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
        0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
        1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0,
        1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0,
        0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
        0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
        0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0,
        0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
        0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1,
        1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0,

In [22]:
y_test_tensor.shape

torch.Size([114])

# Defining the Model

In [23]:
# class mySimpleNN():
#   def __init__(self, X):
#     self.weights = torch.rand(X.shape[1], 1, dtype=torch.float64, requires_grad=True)
#     self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

#   def forward(self, X):
#     z = torch.chain_matmul(X, self.weights) + self.bias
#     y_pred = torch.sigmoid(z)
#     return y_pred

#   def binary_cross_entropy_loss(self, y, y_pred):
#     epsilon = 1e-7
#     y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
#     loss = -(y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred)).mean()
#     return loss

import torch.nn as nn
class mySimpleNN(nn.Module):
  def __init__(self, X):

    super().__init__()
    self.linear = nn.Linear(X,1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, X):
    out = self.linear(X)
    out = self.sigmoid(out)
    return out


In [24]:
learning_rate = 0.1
epochs = 30

In [25]:
loss_function = nn.BCELoss()

In [26]:

# Training pipeline
model = mySimpleNN(X_train_tensor.shape[1])


# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
  # forward pass
  # y_pred = model.forward(X_train_tensor)
  y_pred = model(X_train_tensor)

  # Loss
  # loss = model.binary_cross_entropy_loss(y_train_tensor, y_pred)
  loss = loss_function(y_pred, y_train_tensor.view(-1,1))

  # Backward Pass
  loss.backward()

  # update the parameters
  optimizer.step()
   # Zero gradients
  optimizer.zero_grad


  # do not track the graidents
  # with torch.no_grad():
    #  model.weights -= learning_rate * model.weights.grad
    #  model.bias -= learning_rate * model.bias.grad
    # model.linear.weight -= learning_rate * model.linear.weight.grad
    # model.linear.bias -= learning_rate * model.linear.bias.grad

  # Zero gradients
  # model.weights.grad.zero_()
  # model.bias.grad.zero_()
  # model.linear.weight.grad.zero_()
  # model.linear.bias.grad.zero_()

  # print loss in each epoch
  print(f"Epoch: {epoch + 1}, Loss = {loss.item()}")


RuntimeError: mat1 and mat2 must have the same dtype, but got Double and Float

In [None]:
#model.weights
model.linear.weight

Parameter containing:
tensor([[ 0.0755,  0.2240,  0.3962,  0.2182,  0.0282,  0.0216,  0.3050,  0.4369,
          0.1269,  0.0370,  0.3705, -0.0746,  0.0887,  0.0220, -0.0511,  0.0063,
         -0.0963,  0.0042, -0.1128,  0.0975,  0.4206,  0.2689,  0.3336,  0.0956,
          0.1018,  0.2345,  0.0820,  0.3734,  0.1180,  0.0329]],
       requires_grad=True)

In [None]:
# model.bias
model.linear.bias

Parameter containing:
tensor([-0.1865], requires_grad=True)

#Evaluation

In [None]:
# model evaluation
with torch.no_grad():
  y_pred = model.forward(X_test_tensor)
  y_pred = (y_pred >0.5).float()
  accuracy = (y_pred == y_test_tensor).float().mean()
print(f"Accuracy: {accuracy.item()}")

#print(y_pred)
#print(y_train_tensor)



Accuracy: 0.5369344353675842
