<a href="https://colab.research.google.com/github/MLParas/Pytorch/blob/main/6_Dataset%26Loader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

Getting data and preprocessing

In [28]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.drop(["id","Unnamed: 32"],axis = 1,inplace=True)
X_train,X_test, y_train, y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size = 0.2)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)


encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

X_train_tensor = torch.from_numpy(X_train.astype(np.float32))
X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.astype(np.float32))

In [29]:
a = [1,23,25,65,14,2]

Dataset and Dataloaders

In [30]:
from sklearn.datasets import make_classification
import torch

In [31]:
# Step 1: Create a synthetic classification dataset using sklearn
X, y = make_classification(
    n_samples=10,       # Number of samples
    n_features=2,       # Number of features
    n_informative=2,    # Number of informative features
    n_redundant=0,      # Number of redundant features
    n_classes=2,        # Number of classes
    random_state=42     # For reproducibility
)

In [32]:
X = torch.tensor(X,dtype=torch.float32)
y = torch.tensor(y,dtype=torch.long)

In [33]:
from torch.utils.data import Dataset, DataLoader

In [34]:
class CustomDataset(Dataset):
  ## create 3 methods as in notes

  def __init__(self,features,labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self,index):
    return self.features[index], self.labels[index]

In [35]:
dataset = CustomDataset(X,y)

In [36]:
len(dataset)

10

In [37]:
dataset[0]

(tensor([ 1.0683, -0.9701]), tensor(1))

DataLoader

In [38]:
dataloader = DataLoader(dataset, batch_size=2, shuffle= True)

Getting batches from dataloader

In [39]:
for batch_features, batch_labels in dataloader:
  print(batch_features)
  print(batch_labels)
  print("-"*50)

tensor([[ 1.7774,  1.5116],
        [-2.8954,  1.9769]])
tensor([1, 0])
--------------------------------------------------
tensor([[-0.5872, -1.9717],
        [ 1.7273, -1.1858]])
tensor([0, 1])
--------------------------------------------------
tensor([[ 1.8997,  0.8344],
        [-1.1402, -0.8388]])
tensor([1, 0])
--------------------------------------------------
tensor([[-1.9629, -0.9923],
        [-0.7206, -0.9606]])
tensor([0, 0])
--------------------------------------------------
tensor([[-0.9382, -0.5430],
        [ 1.0683, -0.9701]])
tensor([1, 1])
--------------------------------------------------


In [50]:
len(test_dataset)

455

Breast Cancer Model

In [51]:
train_dataset = CustomDataset(X_train_tensor,y_train_tensor)
test_dataset = CustomDataset(X_test_tensor,y_test_tensor)

In [52]:
train_dataloader = DataLoader(train_dataset,batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=True)

In [53]:
## simple NN model

class MySimpleNN(nn.Module):
  def __init__(self,d):
    super().__init__()
    self.linear = nn.Linear(d,1)
    self.sigmoid = nn.Sigmoid()

  def forward(self,X):
    out = self.linear(X)
    return self.sigmoid(out)

In [54]:
lr = 0.1
batch_size=32
epochs = 25

In [55]:
model = MySimpleNN(X_train_tensor.shape[1])
optimizer = torch.optim.SGD(model.parameters(),lr=lr)
loss_fxn = nn.BCELoss()

Train

In [56]:
for epoch in range(epochs):
  for batch_features,batch_labels in train_dataloader:
    y_pred = model(batch_features)
    loss = loss_fxn(y_pred,batch_labels.view(-1,1))
    optimizer.zero_grad()
    loss.backward()
    ## update model weights
    optimizer.step()
  print(f"Epoch : {epoch} loss = {loss}")

Epoch : 0 loss = 0.1765708178281784
Epoch : 1 loss = 0.12946994602680206
Epoch : 2 loss = 0.10069769620895386
Epoch : 3 loss = 0.19194647669792175
Epoch : 4 loss = 0.05333400145173073
Epoch : 5 loss = 0.06774754077196121
Epoch : 6 loss = 0.18883247673511505
Epoch : 7 loss = 0.009018120355904102
Epoch : 8 loss = 0.029693450778722763
Epoch : 9 loss = 0.12208467721939087
Epoch : 10 loss = 0.14008863270282745
Epoch : 11 loss = 0.05854693427681923
Epoch : 12 loss = 0.04677879810333252
Epoch : 13 loss = 0.06568264961242676
Epoch : 14 loss = 0.0909925177693367
Epoch : 15 loss = 0.03955954313278198
Epoch : 16 loss = 0.018876267597079277
Epoch : 17 loss = 0.06790444254875183
Epoch : 18 loss = 0.025875434279441833
Epoch : 19 loss = 0.053681034594774246
Epoch : 20 loss = 0.010050124488770962
Epoch : 21 loss = 0.10231798887252808
Epoch : 22 loss = 0.023840058594942093
Epoch : 23 loss = 0.027703197672963142
Epoch : 24 loss = 0.02359458990395069


Code for evaluation

In [61]:
model.eval()
ac_list = []

with torch.no_grad():
  for batch_features,batch_labels in test_dataloader:
    y_pred = model(batch_features)
    y_pred = (y_pred>0.7).float()
    batch_acc = (batch_labels.view(-1,1) == y_pred).float().mean().items()
    ac_list.append(batch_acc)



In [65]:
##Overall
acc = sum(ac_list)/len(ac_list)
print(acc)

tensor(0.9375)
