### PyTorch Pipeline

In [1]:
import torch

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
device

'cuda'

In [4]:
print(f"The present GPU is {torch.cuda.get_device_name()}")

The present GPU is NVIDIA A100-SXM4-40GB MIG 1g.5gb


In [5]:
# importing libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [6]:
df = pd.read_csv('https://github.com/prashant-kikani/breast-cancer-detection/raw/master/breast-cancer-data.csv')

In [7]:
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [8]:
df.shape

(569, 33)

In [9]:
#id column and unnamed column are not required
df.drop(columns = ['id', 'Unnamed: 32'], inplace = True)

In [10]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [11]:
#train test split

X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,1:], df.iloc[:,0], test_size=0.2)

In [12]:
y_train.shape

(455,)

In [13]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [14]:
X_train

array([[-0.99295599,  0.19706141, -0.88267633, ...,  0.48459481,
        -0.48556611,  1.92443349],
       [-1.14724692,  0.44298663, -1.17126809, ..., -1.34011685,
        -0.87280827, -0.19755211],
       [-0.07006767, -0.8307204 , -0.04177964, ...,  0.36063958,
        -0.4980578 ,  1.00862351],
       ...,
       [-0.47008119, -0.39223109, -0.41330007, ..., -0.61588575,
        -0.54958599,  0.50169575],
       [ 1.75285105,  0.03929806,  1.7565285 , ...,  1.55484239,
         0.19366912, -0.19001015],
       [ 0.75567451,  0.19242131,  0.6494308 , ..., -0.40924332,
         2.76227134, -0.43350786]])

In [15]:
y_train

376    B
425    B
73     M
23     M
422    B
      ..
29     M
384    B
431    B
432    M
489    M
Name: diagnosis, Length: 455, dtype: object

In [16]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.fit_transform(y_test)

In [17]:
y_train

array([0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,

In [18]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [19]:
print(f"Data type of X_train_tensor is {X_train_tensor.dtype} and its shape is {X_train_tensor.shape}")
print(f"Data type of X_test_tensor is {X_test_tensor.dtype} and its shape is {X_test_tensor.shape}")
print(f"Data type of y_train_tensor is {y_train_tensor.dtype} and its shape is {y_train_tensor.shape}")
print(f"Data type of y_test_tensor is {y_test_tensor.dtype} and its shape is {y_test_tensor.shape}")

Data type of X_train_tensor is torch.float64 and its shape is torch.Size([455, 30])
Data type of X_test_tensor is torch.float64 and its shape is torch.Size([114, 30])
Data type of y_train_tensor is torch.int64 and its shape is torch.Size([455])
Data type of y_test_tensor is torch.int64 and its shape is torch.Size([114])


In [20]:
class SimpleNN():
    def __init__(self, X):
        
        self.weights = torch.rand(X.shape[1],1, dtype = torch.float64, requires_grad=True)
        self.bias = torch.zeros(1, dtype = torch.float64, requires_grad=True)
    def forward(self,X):
        z = torch.matmul(X,self.weights) + self.bias
        y_pred = torch.sigmoid(z)
        return y_pred
    def loss_function(self, y_pred,y):
        epsilon = 1e-8
        y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
        
        loss = -(y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred)).mean()
        return loss
        

In [24]:
learning_rate = 0.1
epochs = 100

In [25]:
model = SimpleNN(X_train_tensor)\

for epoch in range(epochs):
    
#forward pass
    y_pred = model.forward(X_train_tensor)
    
#loss calculation
    loss = model.loss_function(y_pred, y_train_tensor)
    
#backward pass
    loss.backward()
#update params
    with torch.no_grad():
        model.weights -= learning_rate * model.weights.grad
        model.bias -= learning_rate * model.bias.grad
    
#zero the gradients
    model.weights.grad.zero_()
    model.bias.grad.zero_()

    print(f"Epoch: {epoch + 1}, Loss: {loss.item()}")

Epoch: 1, Loss: 3.9601487279847034
Epoch: 2, Loss: 3.8273976043394167
Epoch: 3, Loss: 3.6905851854022544
Epoch: 4, Loss: 3.5502677108832312
Epoch: 5, Loss: 3.405079578981897
Epoch: 6, Loss: 3.256969006315924
Epoch: 7, Loss: 3.1038276999631003
Epoch: 8, Loss: 2.9511455579399115
Epoch: 9, Loss: 2.7956401905360906
Epoch: 10, Loss: 2.6393285430517244
Epoch: 11, Loss: 2.4798606341696394
Epoch: 12, Loss: 2.3181397338108902
Epoch: 13, Loss: 2.1565380561861045
Epoch: 14, Loss: 1.9961846852613305
Epoch: 15, Loss: 1.841470794178553
Epoch: 16, Loss: 1.6924933237059914
Epoch: 17, Loss: 1.5493858206304283
Epoch: 18, Loss: 1.4181687187638206
Epoch: 19, Loss: 1.3003360935454367
Epoch: 20, Loss: 1.1970996112696455
Epoch: 21, Loss: 1.1090491320489726
Epoch: 22, Loss: 1.0358530126592846
Epoch: 23, Loss: 0.9762333807533763
Epoch: 24, Loss: 0.928284274025092
Epoch: 25, Loss: 0.8899128209846133
Epoch: 26, Loss: 0.8591602701763099
Epoch: 27, Loss: 0.8343499724310148
Epoch: 28, Loss: 0.8141235575347973
Epoch

In [28]:
#model evaluation

with torch.no_grad():
    y_pred = model.forward(X_test_tensor)
    y_pred = (y_pred > 0.9).float()
    accuracy = (y_pred == y_test_tensor).float().mean()
    print(f"Accuracy is {accuracy.item()}")

Accuracy is 0.6315789222717285
