### Training Pipeline in pytorch using Dataset & DataLoader Pytorch Module.

## Important Library

In [16]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
import torch
import torch.nn as nn
from torchinfo import summary
from torch.utils.data import Dataset,DataLoader

In [11]:
df=load_breast_cancer()
X=df.data
y=df.target
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
sc=StandardScaler()
le=LabelEncoder()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)
y_train=le.fit_transform(y_train)
y_test=le.transform(y_test)

In [13]:
X_train_tensor = torch.from_numpy(X_train.astype(np.float32))
X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.astype(np.float32))

## Create Dataset and DataLoader class and object.

In [18]:
class CustomDataset(Dataset):
    def __init__(self,features,labels):
        self.features=features
        self.labels=labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self,idx):
        return self.features[idx], self.labels[idx]


In [20]:
train_dataset=CustomDataset(X_train_tensor,y_train_tensor)
test_dataset=CustomDataset(X_test_tensor,y_test_tensor)

In [22]:
train_loader=DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader=DataLoader(test_dataset, batch_size=32, shuffle=True)

In [36]:
# Create Model

class MySimpleNN(nn.Module):
    def __init__(self,num_features):
        super().__init__()
        self.network=nn.Sequential(
            nn.Linear(num_features,20),
            nn.ReLU(),
            nn.Linear(20,15),
            nn.ReLU(),
            nn.Linear(15,10),
            nn.ReLU(),
            nn.Linear(10,1),
            nn.Sigmoid()
        )
    def forward(self,features):
        out=self.network(features)
        return out

In [38]:
epochs=25
learning_rate=0.001

In [40]:
model=MySimpleNN(X_train_tensor.shape[1])
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)
loss_func=nn.BCELoss()

## Training PipeLine

In [43]:
for epoch in range(epochs):
    for batch_features, batch_labels in train_loader:
        y_pred=model(batch_features)
        loss=loss_func(y_pred,batch_labels.view(-1,1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch: {epoch + 1}, Loss: {loss.item()}")

Epoch: 1, Loss: 0.6812464594841003
Epoch: 2, Loss: 0.7002885937690735
Epoch: 3, Loss: 0.7105280756950378
Epoch: 4, Loss: 0.7306874394416809
Epoch: 5, Loss: 0.786434531211853
Epoch: 6, Loss: 0.755487322807312
Epoch: 7, Loss: 0.7551029324531555
Epoch: 8, Loss: 0.698119580745697
Epoch: 9, Loss: 0.7284613251686096
Epoch: 10, Loss: 0.7491809129714966
Epoch: 11, Loss: 0.7185091972351074
Epoch: 12, Loss: 0.697050929069519
Epoch: 13, Loss: 0.703363299369812
Epoch: 14, Loss: 0.6765782237052917
Epoch: 15, Loss: 0.7289671897888184
Epoch: 16, Loss: 0.7149478793144226
Epoch: 17, Loss: 0.7130299806594849
Epoch: 18, Loss: 0.716282844543457
Epoch: 19, Loss: 0.718336284160614
Epoch: 20, Loss: 0.7138637900352478
Epoch: 21, Loss: 0.6677461862564087
Epoch: 22, Loss: 0.660225510597229
Epoch: 23, Loss: 0.7257775068283081
Epoch: 24, Loss: 0.6927793622016907
Epoch: 25, Loss: 0.6800435185432434


## Evaluation

In [48]:
model.eval()  # Set the model to evaluation mode because on testing we have to not calculate any backpropagation so we have to model we are doing testing.
accuracy_list=[]
with torch.no_grad():
    for batch_features,batch_labels in test_loader:
        y_pred=model(batch_features)
        y_pred=(y_pred>0.4).float()
        batch_accuracy = (y_pred.view(-1) == batch_labels).float().mean().item()
        accuracy_list.append(batch_accuracy)

# Calculate overall accuracy
overall_accuracy = sum(accuracy_list) / len(accuracy_list)
print(f'Accuracy: {overall_accuracy:.4f}')


Accuracy: 0.6215
