In [101]:
from sklearn.datasets import make_classification
import torch

In [102]:
X, y = make_classification(
        n_samples=10,          # Number of samples
        n_features=2,          # Number of features
        n_informative=2,       # Number of informative features
        n_redundant=0,         # Number of redundant features
        n_classes=2,           # Number of classes
        random_state=42        # For reproducibility
)

In [103]:
X

array([[ 1.06833894, -0.97007347],
       [-1.14021544, -0.83879234],
       [-2.8953973 ,  1.97686236],
       [-0.72063436, -0.96059253],
       [-1.96287438, -0.99225135],
       [-0.9382051 , -0.54304815],
       [ 1.72725924, -1.18582677],
       [ 1.77736657,  1.51157598],
       [ 1.89969252,  0.83444483],
       [-0.58723065, -1.97171753]])

In [104]:
X.shape

(10, 2)

In [105]:
y

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [106]:
y.shape

(10,)

In [107]:
# Convert the data into Pytorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

In [108]:
X

tensor([[ 1.0683, -0.9701],
        [-1.1402, -0.8388],
        [-2.8954,  1.9769],
        [-0.7206, -0.9606],
        [-1.9629, -0.9923],
        [-0.9382, -0.5430],
        [ 1.7273, -1.1858],
        [ 1.7774,  1.5116],
        [ 1.8997,  0.8344],
        [-0.5872, -1.9717]])

In [109]:
y

tensor([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [110]:
from torch.utils.data import Dataset, DataLoader

In [111]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]

In [112]:
dataset = CustomDataset(X, y)

In [113]:
len(dataset)

10

In [114]:
dataset[0]

(tensor([ 1.0683, -0.9701]), tensor(1))

In [115]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [116]:
for batch_features, batch_labels in dataloader:
    print(batch_features)
    print(batch_labels)
    print("-"*50)

tensor([[ 1.0683, -0.9701],
        [-0.7206, -0.9606]])
tensor([1, 0])
--------------------------------------------------
tensor([[ 1.7774,  1.5116],
        [-0.9382, -0.5430]])
tensor([1, 1])
--------------------------------------------------
tensor([[-1.1402, -0.8388],
        [ 1.8997,  0.8344]])
tensor([0, 1])
--------------------------------------------------
tensor([[-2.8954,  1.9769],
        [-0.5872, -1.9717]])
tensor([0, 0])
--------------------------------------------------
tensor([[ 1.7273, -1.1858],
        [-1.9629, -0.9923]])
tensor([1, 0])
--------------------------------------------------


## Modifying The Previous Training Pipeline Using Dataset and DataLoader Class

In [117]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [118]:
csv_file_path = fr"C:\Users\sanje\Desktop\Python_Frameworks_For_Machine_Learning\datafiles\breast_cancer_dataset.csv"
df =pd.read_csv(csv_file_path)

In [119]:
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [120]:
df.drop(columns=['id'], inplace = True)

In [121]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2)

In [122]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [123]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.fit_transform(y_test)

In [124]:
X_train_tensor = torch.from_numpy(X_train).to(torch.float32)
X_test_tensor = torch.from_numpy(X_test).to(torch.float32)
y_train_tensor = torch.from_numpy(y_train).to(torch.float32)
y_test_tensor = torch.from_numpy(y_test).to(torch.float32)

In [125]:
X_train_tensor.shape

torch.Size([455, 30])

In [126]:
X_train_tensor.dtype

torch.float32

In [127]:
y_train_tensor.shape

torch.Size([455])

In [128]:
y_train_tensor.dtype

torch.float32

In [129]:
class CustomDataset(Dataset):
    def __init__(self, features,labels):
        self.features = features
        self.labels = labels
    
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]


In [130]:
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)

In [131]:
train_dataset[10]

(tensor([-0.8498, -1.2328, -0.8556, -0.7771, -1.7095, -0.8316, -0.5545, -0.8788,
         -0.4841, -0.2956,  0.0762, -0.7048,  0.2584, -0.2621, -0.5090,  0.4313,
          0.3860,  0.4590,  1.2159,  0.3505, -0.8567, -1.6098, -0.8209, -0.7521,
         -1.9256, -0.8156, -0.7276, -1.1279, -0.5406, -0.7613]),
 tensor(0.))

In [132]:
len(train_dataset)

455

In [133]:
len(test_dataset)

114

In [134]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [135]:
class mySimpleNN(nn.Module):
    def __init__(self, num_features):

        super().__init__()
        self.linear = nn.Linear(num_features, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, features):
        out = self.linear(features)
        out = self.sigmoid(out)
        return out

In [136]:
learning_rate = 0.01
epochs = 2000

In [137]:
# define loss function
loss_function = nn.BCELoss()

# create model
model = mySimpleNN(X_train_tensor.shape[1])

# define optimizer
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

# define loop
for epoch in range(epochs):


   for batch_features, batch_labels in train_dataloader:
      # forward pass
      y_pred = model(batch_features)

      # loss calculate
      loss = loss_function(y_pred,batch_labels.view(-1,1))
      
      # zero gradients -> clearing gradients which prevents gradient accumulation
      optimizer.zero_grad()
      
      # backward pass
      loss.backward()

      # parameters update
      optimizer.step()


   # print loss in each epoch
   print(f"Epoch: {epoch+1}, Loss:{loss.item()}")


Epoch: 1, Loss:0.5618093013763428
Epoch: 2, Loss:0.32575589418411255
Epoch: 3, Loss:0.31943461298942566
Epoch: 4, Loss:0.22212421894073486
Epoch: 5, Loss:0.22797183692455292
Epoch: 6, Loss:0.26862049102783203
Epoch: 7, Loss:0.16609932482242584
Epoch: 8, Loss:0.25295767188072205
Epoch: 9, Loss:0.2573569715023041
Epoch: 10, Loss:0.17046192288398743
Epoch: 11, Loss:0.20934860408306122
Epoch: 12, Loss:0.16198956966400146
Epoch: 13, Loss:0.2777561843395233
Epoch: 14, Loss:0.08561241626739502
Epoch: 15, Loss:0.10907210409641266
Epoch: 16, Loss:0.37512531876564026
Epoch: 17, Loss:0.21618793904781342
Epoch: 18, Loss:0.244827538728714
Epoch: 19, Loss:0.05300159379839897
Epoch: 20, Loss:0.1166539266705513
Epoch: 21, Loss:0.11058545112609863
Epoch: 22, Loss:0.1451951116323471
Epoch: 23, Loss:0.06566278636455536
Epoch: 24, Loss:0.17509566247463226
Epoch: 25, Loss:0.15003164112567902
Epoch: 26, Loss:0.09101986140012741
Epoch: 27, Loss:0.14993976056575775
Epoch: 28, Loss:0.11423441022634506
Epoch: 2

In [138]:
# Model evaluation using test loader

model.eval()    # Set the model to evaluation mode
accuracy_list =[]

with torch.no_grad():
    for batch_features, batch_labels in test_dataloader:
        #Forward pass
        y_pred = model(batch_features)
        y_pred = (y_pred > 0.8).float()    #Convert probabilities to binary predictions

        #calculate accuracy for the current batch
        batch_accuracy =(y_pred.view(-1) == batch_labels).float().mean().item()
        accuracy_list.append(batch_accuracy)

#Calculate the overall accuracy
overall_accuracy = sum(accuracy_list) / len(accuracy_list)
print(f"Accuracy : {overall_accuracy:.4f}")

Accuracy : 0.9766
