<a href="https://colab.research.google.com/github/Bisma-Shafiq/Deep-Learning_Pytorch/blob/main/Pytorch_Training_Pipeline_Dataset_Class_%26_DataLoader_07.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
data.drop(columns=['id', 'Unnamed: 32'], inplace=True)

# Define features (X) and target variable (y)
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

# Convert diagnosis to numerical values (0 for 'B', 1 for 'M')
y = y.map({'B': 0, 'M': 1})


# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Added random_state for reproducibility


# Scale the features using StandardScaler
st = StandardScaler()
x_train = st.fit_transform(x_train)
x_test = st.transform(x_test)


In [20]:
x_train

array([[-1.44075296, -0.43531947, -1.36208497, ...,  0.9320124 ,
         2.09724217,  1.88645014],
       [ 1.97409619,  1.73302577,  2.09167167, ...,  2.6989469 ,
         1.89116053,  2.49783848],
       [-1.39998202, -1.24962228, -1.34520926, ..., -0.97023893,
         0.59760192,  0.0578942 ],
       ...,
       [ 0.04880192, -0.55500086, -0.06512547, ..., -1.23903365,
        -0.70863864, -1.27145475],
       [-0.03896885,  0.10207345, -0.03137406, ...,  1.05001236,
         0.43432185,  1.21336207],
       [-0.54860557,  0.31327591, -0.60350155, ..., -0.61102866,
        -0.3345212 , -0.84628745]])

In [21]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [22]:
y_test

array([0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 1])

# convert array to tensor


In [23]:
import torch

In [24]:
x_train_tensor= torch.from_numpy(x_train)
x_test_tensor= torch.from_numpy(x_test)
y_train_tensor= torch.from_numpy(y_train)
y_test_tensor= torch.from_numpy(y_test)

In [25]:
x_train_tensor

tensor([[-1.4408, -0.4353, -1.3621,  ...,  0.9320,  2.0972,  1.8865],
        [ 1.9741,  1.7330,  2.0917,  ...,  2.6989,  1.8912,  2.4978],
        [-1.4000, -1.2496, -1.3452,  ..., -0.9702,  0.5976,  0.0579],
        ...,
        [ 0.0488, -0.5550, -0.0651,  ..., -1.2390, -0.7086, -1.2715],
        [-0.0390,  0.1021, -0.0314,  ...,  1.0500,  0.4343,  1.2134],
        [-0.5486,  0.3133, -0.6035,  ..., -0.6110, -0.3345, -0.8463]],
       dtype=torch.float64)

In [26]:
x_test_tensor.shape

torch.Size([114, 30])

# Data Class and Data Loader

In [38]:
from torch.utils.data import Dataset, DataLoader

class customdataset(Dataset):

  def __init__(self, features, labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self, index):
    return self.features[index] , self.labels[index]

In [39]:
train_dataset = customdataset(x_train_tensor,y_train_tensor)
test_dataset = customdataset(x_test_tensor,y_test_tensor)

In [40]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# model selection

In [41]:
import torch.nn as nn
class myneuralnetwork(nn.Module):

  def __init__(self, num_features):
    super().__init__()
    self.linear = nn.Linear(num_features,1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    out = self.linear(x)
    out = self.sigmoid(out)
    return out


In [42]:
# some important parameters

learning_rate = 0.1
epocs = 25


In [43]:
loss_function = nn.BCELoss()

# Training Pipeline

In [44]:
#create model
model = myneuralnetwork(x_train_tensor.shape[1])

x_train_tensor= x_train_tensor.type(torch.FloatTensor)
y_train_tensor= y_train_tensor.type(torch.FloatTensor)


# create optimizers
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)    # model.parameters is a function in pytorch that register all ur weight and bias , then iterate it

for epoch in range(epocs):
  for batch_features , batch_labels in train_dataloader:

    # forward
    y_pred = model(batch_features)
    # print(y_pred)

    # loss calculation
    loss = loss_function(y_pred, batch_labels.view(-1,1))
    # print(f"Epoch: {epoch+1}, Loss: {loss.item()}")

    # clear gradients
    optimizer.zero_grad()

    # backward
    loss.backward()
    # update
    optimizer.step()

  print(f"Epoch: {epoch+1}, Loss: {loss.item()}")

Epoch: 1, Loss: 0.6143571734428406
Epoch: 2, Loss: 0.3604610562324524
Epoch: 3, Loss: 0.1850954294204712
Epoch: 4, Loss: 0.21391774713993073
Epoch: 5, Loss: 0.0320395864546299
Epoch: 6, Loss: 0.03592661768198013
Epoch: 7, Loss: 0.5025883316993713
Epoch: 8, Loss: 0.06470581144094467
Epoch: 9, Loss: 0.052718162536621094
Epoch: 10, Loss: 0.05133407190442085
Epoch: 11, Loss: 0.02945670671761036
Epoch: 12, Loss: 0.032998181879520416
Epoch: 13, Loss: 0.013601471669971943
Epoch: 14, Loss: 0.10719441622495651
Epoch: 15, Loss: 0.05042911320924759
Epoch: 16, Loss: 0.33659276366233826
Epoch: 17, Loss: 0.10060317069292068
Epoch: 18, Loss: 0.014538951218128204
Epoch: 19, Loss: 0.012551157735288143
Epoch: 20, Loss: 0.03765958547592163
Epoch: 21, Loss: 0.015645580366253853
Epoch: 22, Loss: 0.04610741510987282
Epoch: 23, Loss: 0.03966598957777023
Epoch: 24, Loss: 0.13405568897724152
Epoch: 25, Loss: 0.048367615789175034


In [45]:
model.linear.weight

Parameter containing:
tensor([[ 0.3332,  0.5218,  0.5635,  0.4566,  0.2027, -0.1136,  0.4774,  0.6277,
          0.0337, -0.2480,  0.5796, -0.0215,  0.4035,  0.5588,  0.0623, -0.2843,
         -0.1400,  0.1179, -0.1105, -0.4586,  0.8057,  0.7979,  0.5500,  0.6178,
          0.4868,  0.1940,  0.4602,  0.5868,  0.5024,  0.2279]],
       requires_grad=True)

In [46]:
model.linear.bias

Parameter containing:
tensor([-0.5024], requires_grad=True)

# Evaluation

In [52]:
x_test_tensor= x_test_tensor.type(torch.FloatTensor)
y_test_tensor= y_test_tensor.type(torch.FloatTensor)
model.eval()
accuracy_list=[]

with torch.no_grad():
  for batch_features , batch_labels in test_dataloader:

    y_pred = model(batch_features)
    y_pred = (y_pred > 0.7).float()

    # calculate batch
    batch_accuracy = (y_pred == batch_labels.view(-1,1)).float().mean()
    accuracy_list.append(batch_accuracy)
    #accuracy
    accuracy = (y_pred == y_test_tensor).float().mean()
    print(f"Accuracy: {accuracy.item()}")
    #print(y_pred)

Accuracy: 0.5383771657943726
Accuracy: 0.5537280440330505
Accuracy: 0.530701756477356
Accuracy: 0.5
