## Pytorch NN Module

In [1]:
# create model class 
import torch 
import torch.nn as nn 

In [2]:
class Model(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(num_features, 3),
            nn.ReLU(),
            nn.Linear(3,1),
            nn.Sigmoid()
        )
    def forward(self, features):  ## Use __call__ 
        out = self.network(features)
        return out 


In [3]:
# create dataset 
features = torch.rand(10, 5)

#cerate model 
model = Model(features.shape[1])

#call model for forward pass
# model.forward(features)
model(features)

tensor([[0.3970],
        [0.3977],
        [0.3658],
        [0.3606],
        [0.3848],
        [0.3751],
        [0.3717],
        [0.4033],
        [0.3814],
        [0.3970]], grad_fn=<SigmoidBackward0>)

In [4]:
from torchinfo import summary

summary(model, input_size=(10,5))

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    [10, 1]                   --
├─Sequential: 1-1                        [10, 1]                   --
│    └─Linear: 2-1                       [10, 3]                   18
│    └─ReLU: 2-2                         [10, 3]                   --
│    └─Linear: 2-3                       [10, 1]                   4
│    └─Sigmoid: 2-4                      [10, 1]                   --
Total params: 22
Trainable params: 22
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

# Breast Cancer Detection

In [1]:
import numpy as np 
import pandas as pd 
import torch
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [3]:
df.shape

(569, 33)

In [4]:
df.drop(columns=['id', 'Unnamed: 32'], inplace=True) 

In [5]:
df.head() 

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


## Train Test Split 

In [6]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,1:], df.iloc[:,0], test_size=0.2)

## Scaling

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
X_train

array([[-0.90242968,  0.44738658, -0.8274596 , ...,  0.41979863,
         0.09218017,  0.67098202],
       [-1.19822563, -0.33849825, -1.12757776, ..., -0.23659635,
        -0.46149885,  1.77758979],
       [-0.38775603,  0.71492184, -0.42373901, ..., -0.32436833,
        -0.12138174, -0.17463335],
       ...,
       [-1.24403724, -0.08529524, -1.237347  , ..., -1.04321937,
         0.45919027, -0.20650189],
       [-1.48553795, -1.15543628, -1.36643892, ..., -1.01335567,
        -1.03099728,  1.3490128 ],
       [-0.59419107,  2.0693191 , -0.62395482, ..., -0.89799387,
        -0.79370627, -0.95925892]])

In [9]:
y_train

41     M
176    B
135    M
109    B
214    M
      ..
318    B
151    B
424    B
71     B
471    B
Name: diagnosis, Length: 455, dtype: object

### Label Encoding

In [10]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

### Numpy array to pytorch tensor

In [25]:
X_train_tensor = torch.from_numpy(X_train.astype(np.float32))
X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.astype(np.float32))

In [26]:
X_train_tensor.shape

torch.Size([455, 30])

In [27]:
y_train_tensor.shape

torch.Size([455])

### Defining the model

In [28]:
import torch.nn as nn 
class MySimpleNN(nn.Module): 
    def __init__(self, num_features):
        super().__init__()
        self.linear = nn.Linear(num_features, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, features): 
        out = self.linear(features)
        out = self.sigmoid(out)

        return out

In [29]:
learning_rate = 0.1
epochs = 25 

In [30]:
loss_function = nn.BCELoss()

#### Training Pipeline

In [32]:
model = MySimpleNN(X_train_tensor.shape[1])

optimizer = torch.optim.SGD(model.parameters(), lr= learning_rate)

for epoch in range(epochs):

    #Forward pass 
    y_pred = model.forward(X_train_tensor)

    # Loss calculate 
    loss = loss_function(y_pred, y_train_tensor.view(-1,1))

    # zero gradients 
    optimizer.zero_grad()

    #backward loss 
    loss.backward() 

    #Parameters update
    optimizer.step()



    # print loss in each epoch 
    print(f"Epoch: {epoch + 1}, Loss: {loss.item()}")

Epoch: 1, Loss: 0.7450246214866638
Epoch: 2, Loss: 0.5636553764343262
Epoch: 3, Loss: 0.46629565954208374
Epoch: 4, Loss: 0.4061422049999237
Epoch: 5, Loss: 0.364722341299057
Epoch: 6, Loss: 0.33412498235702515
Epoch: 7, Loss: 0.3104093670845032
Epoch: 8, Loss: 0.29137665033340454
Epoch: 9, Loss: 0.2756941616535187
Epoch: 10, Loss: 0.262502521276474
Epoch: 11, Loss: 0.25121989846229553
Epoch: 12, Loss: 0.24143706262111664
Epoch: 13, Loss: 0.23285672068595886
Epoch: 14, Loss: 0.22525730729103088
Epoch: 15, Loss: 0.21846990287303925
Epoch: 16, Loss: 0.21236318349838257
Epoch: 17, Loss: 0.20683354139328003
Epoch: 18, Loss: 0.20179778337478638
Epoch: 19, Loss: 0.19718840718269348
Epoch: 20, Loss: 0.1929500252008438
Epoch: 21, Loss: 0.18903662264347076
Epoch: 22, Loss: 0.1854097843170166
Epoch: 23, Loss: 0.18203695118427277
Epoch: 24, Loss: 0.17889055609703064
Epoch: 25, Loss: 0.175946906208992


### Evaluation

In [34]:
# Model evaluation
with torch.no_grad():
    y_pred = model.forward(X_test_tensor)
    y_pred = (y_pred > 0.9).float() 
    accuracy = (y_pred == y_test_tensor).float().mean() 
    print(f"Accuracy: {accuracy.item()}")

Accuracy: 0.6001846790313721
