In [None]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split # split data into training and testing sets.
from sklearn.preprocessing import StandardScaler # scale numerical data in same range
from sklearn.preprocessing import LabelEncoder # Converts text labels into numbers.


In [None]:
import torch
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using Device: ", device )

Using Device:  mps


In [None]:
# Reads a CSV file from the internet and loads it into a Pandas DataFrame called df
df = pd.read_csv(
    'https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv'
)

In [None]:
# Shows the first 5 rows of the dataset.
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [None]:
df.shape

(569, 33)

In [None]:
# id and unnamed is not required
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)

In [None]:
df.shape

(569, 31)

In [None]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


# train test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:], df.iloc[:, 0], test_size=0.2)

In [None]:
X_train

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
461,27.420,26.27,186.90,2501.0,0.10840,0.19880,0.363500,0.168900,0.2061,0.05623,...,36.04,31.37,251.20,4254.0,0.13570,0.42560,0.68330,0.26250,0.2641,0.07427
97,9.787,19.94,62.11,294.5,0.10240,0.05301,0.006829,0.007937,0.1350,0.06890,...,10.92,26.29,68.81,366.1,0.13160,0.09473,0.02049,0.02381,0.1934,0.08988
112,14.260,19.65,97.83,629.9,0.07837,0.22330,0.300300,0.077980,0.1704,0.07769,...,15.30,23.73,107.00,709.0,0.08949,0.41930,0.67830,0.15050,0.2398,0.10820
299,10.510,23.09,66.85,334.2,0.10150,0.06797,0.024950,0.018750,0.1695,0.06556,...,10.93,24.22,70.10,362.7,0.11430,0.08614,0.04158,0.03125,0.2227,0.06777
7,13.710,20.83,90.20,577.9,0.11890,0.16450,0.093660,0.059850,0.2196,0.07451,...,17.06,28.14,110.60,897.0,0.16540,0.36820,0.26780,0.15560,0.3196,0.11510
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,12.900,15.92,83.74,512.2,0.08677,0.09509,0.048940,0.030880,0.1778,0.06235,...,14.48,21.82,97.17,643.8,0.13120,0.25480,0.20900,0.10120,0.3549,0.08118
153,11.150,13.08,70.87,381.9,0.09754,0.05113,0.019820,0.017860,0.1830,0.06105,...,11.99,16.30,76.25,440.8,0.13410,0.08971,0.07116,0.05506,0.2859,0.06772
470,9.667,18.49,61.49,289.1,0.08946,0.06258,0.029480,0.015140,0.2238,0.06413,...,11.14,25.62,70.88,385.2,0.12340,0.15420,0.12770,0.06560,0.3174,0.08524
430,14.900,22.53,102.10,685.0,0.09947,0.22250,0.273300,0.097110,0.2041,0.06898,...,16.35,27.57,125.40,832.7,0.14190,0.70900,0.90190,0.24750,0.2866,0.11550


In [None]:
X_train.shape

(455, 30)

In [None]:
y_train.shape

(455,)

In [None]:
X_test.shape

(114, 30)

In [None]:
# Scaling the data so that all data points comes under the same scale

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [None]:
X_train

array([[ 3.76240461,  1.55971872,  3.90561203, ...,  2.24857648,
        -0.37624051, -0.50687745],
       [-1.21932657,  0.14418522, -1.21835202, ..., -1.34551829,
        -1.53624061,  0.33545546],
       [ 0.0443996 ,  0.07933455,  0.24833598, ...,  0.56212705,
        -0.77493927,  1.32402299],
       ...,
       [-1.25322935, -0.18006811, -1.24380965, ..., -0.71626184,
         0.49827158,  0.08507592],
       [ 0.22521444,  0.72336875,  0.42366515, ...,  2.02271271,
        -0.007075  ,  1.71793908],
       [-0.54889907, -0.28293468, -0.55398999, ..., -0.84726283,
        -0.76181339, -0.3439149 ]])

In [None]:
y_train

Unnamed: 0,diagnosis
461,M
97,B
112,B
299,B
7,M
...,...
143,B
153,B
470,B
430,M


In [None]:
# Using label encoder so that y_train data convert to number
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.fit_transform(y_test)

In [None]:
y_train

array([1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,

## Numpy arrays to PyTorch tensors

In [None]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [None]:
X_train_tensor

tensor([[ 3.7624,  1.5597,  3.9056,  ...,  2.2486, -0.3762, -0.5069],
        [-1.2193,  0.1442, -1.2184,  ..., -1.3455, -1.5362,  0.3355],
        [ 0.0444,  0.0793,  0.2483,  ...,  0.5621, -0.7749,  1.3240],
        ...,
        [-1.2532, -0.1801, -1.2438,  ..., -0.7163,  0.4983,  0.0851],
        [ 0.2252,  0.7234,  0.4237,  ...,  2.0227, -0.0071,  1.7179],
        [-0.5489, -0.2829, -0.5540,  ..., -0.8473, -0.7618, -0.3439]],
       dtype=torch.float64)

In [None]:
y_train_tensor

tensor([1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
        0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0,
        1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
        1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0,
        1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0,
        0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
        0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
        0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,
        0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1,
        0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1,
        0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,

In [None]:
y_test_tensor.shape

torch.Size([114])

# Defining the Model

In [None]:
# class mySimpleNN():
#   def __init__(self, X):
#     self.weights = torch.rand(X.shape[1], 1, dtype=torch.float64, requires_grad=True)
#     self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

#   def forward(self, X):
#     z = torch.chain_matmul(X, self.weights) + self.bias
#     y_pred = torch.sigmoid(z)
#     return y_pred

#   def binary_cross_entropy_loss(self, y, y_pred):
#     epsilon = 1e-7
#     y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
#     loss = -(y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred)).mean()
#     return loss

import torch.nn as nn
class mySimpleNN(nn.Module):
  def __init__(self, X):

    super().__init__()
    self.linear = nn.Linear(X,1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, X):
    out = self.linear(X)
    out = self.sigmoid(out)
    return out


In [None]:
learning_rate = 0.1
epochs = 30

In [None]:
loss_function = nn.BCELoss()

In [86]:

# Training pipeline
model = mySimpleNN(X_train_tensor.shape[1])


# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
  # forward pass
  # y_pred = model.forward(X_train_tensor)
  y_pred = model(X_train_tensor)

  # Loss
  # loss = model.binary_cross_entropy_loss(y_train_tensor, y_pred)
  loss = loss_function(y_pred, y_train_tensor.view(-1,1))

  # Backward Pass
  loss.backward()

  # update the parameters
  optimizer.step()
   # Zero gradients
  optimizer.zero_grad


  # do not track the graidents
  # with torch.no_grad():
    #  model.weights -= learning_rate * model.weights.grad
    #  model.bias -= learning_rate * model.bias.grad
    # model.linear.weight -= learning_rate * model.linear.weight.grad
    # model.linear.bias -= learning_rate * model.linear.bias.grad

  # Zero gradients
  # model.weights.grad.zero_()
  # model.bias.grad.zero_()
  # model.linear.weight.grad.zero_()
  # model.linear.bias.grad.zero_()

  # print loss in each epoch
  print(f"Epoch: {epoch + 1}, Loss = {loss.item()}")


RuntimeError: mat1 and mat2 must have the same dtype, but got Double and Float

In [None]:
#model.weights
model.linear.weight

Parameter containing:
tensor([[ 0.0755,  0.2240,  0.3962,  0.2182,  0.0282,  0.0216,  0.3050,  0.4369,
          0.1269,  0.0370,  0.3705, -0.0746,  0.0887,  0.0220, -0.0511,  0.0063,
         -0.0963,  0.0042, -0.1128,  0.0975,  0.4206,  0.2689,  0.3336,  0.0956,
          0.1018,  0.2345,  0.0820,  0.3734,  0.1180,  0.0329]],
       requires_grad=True)

In [None]:
# model.bias
model.linear.bias

Parameter containing:
tensor([-0.1865], requires_grad=True)

#Evaluation

In [None]:
# model evaluation
with torch.no_grad():
  y_pred = model.forward(X_test_tensor)
  y_pred = (y_pred >0.5).float()
  accuracy = (y_pred == y_test_tensor).float().mean()
print(f"Accuracy: {accuracy.item()}")

#print(y_pred)
#print(y_train_tensor)



Accuracy: 0.5369344353675842
