<a href="https://colab.research.google.com/github/Devansh-react/PyTorch_fundamentals/blob/main/Ch_05.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.datasets import make_classification
import torch


In [2]:
# Step 1: Create a synthetic classification dataset using sklearn
X, y = make_classification(
    n_samples=10,       # Number of samples
    n_features=2,       # Number of features
    n_informative=2,    # Number of informative features
    n_redundant=0,      # Number of redundant features
    n_classes=2,        # Number of classes
    random_state=42     # For reproducibility
)

In [3]:
X.shape,X,y

((10, 2),
 array([[ 1.06833894, -0.97007347],
        [-1.14021544, -0.83879234],
        [-2.8953973 ,  1.97686236],
        [-0.72063436, -0.96059253],
        [-1.96287438, -0.99225135],
        [-0.9382051 , -0.54304815],
        [ 1.72725924, -1.18582677],
        [ 1.77736657,  1.51157598],
        [ 1.89969252,  0.83444483],
        [-0.58723065, -1.97171753]]),
 array([1, 0, 0, 0, 0, 1, 1, 1, 1, 0]))

In [4]:
# Convert the data to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

In [25]:
from torch.utils.data import Dataset,DataLoader

class CustomDataset(Dataset):
  def __init__(self,feature,labels):
    self.feature = feature
    self.labels = labels


  def __len__(self):
    return  self.feature.shape[0]

  def __getitem__(self,index):
    return self.feature[index],self.labels[index]

In [23]:
dataset = CustomDataset(X,y)

In [24]:
len(dataset),dataset[0]

(10, (tensor([ 1.0683, -0.9701]), tensor(1)))

In [26]:
dataloader = DataLoader(dataset,batch_size=2,shuffle=True)

In [27]:
for batch_feature,batch_lable in dataloader:
  print(batch_feature)
  print(batch_lable)
  print("-"*50)

tensor([[-0.9382, -0.5430],
        [-2.8954,  1.9769]])
tensor([1, 0])
--------------------------------------------------
tensor([[-0.5872, -1.9717],
        [ 1.7774,  1.5116]])
tensor([0, 1])
--------------------------------------------------
tensor([[-1.1402, -0.8388],
        [ 1.7273, -1.1858]])
tensor([0, 1])
--------------------------------------------------
tensor([[ 1.0683, -0.9701],
        [-1.9629, -0.9923]])
tensor([1, 0])
--------------------------------------------------
tensor([[ 1.8997,  0.8344],
        [-0.7206, -0.9606]])
tensor([1, 0])
--------------------------------------------------


# **Trainig model pipeline**

In [34]:
from torch.utils.data import Dataset,DataLoader

class CustomDataset(Dataset):
  def __init__(self,feature,labels):
    self.feature = feature
    self.labels = labels


  def __len__(self):
    return  self.feature.shape[0]

  def __getitem__(self,index):
    return self.feature[index],self.labels[index]

In [28]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.drop(["Unnamed: 32","id"],axis=1,inplace=True)
X_train,X_test,Y_train,Y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
encoder = LabelEncoder()
Y_train = encoder.fit_transform(Y_train)
Y_test = encoder.transform(Y_test)
X_train_tensor = torch.from_numpy(X_train).float()
X_test_tensor = torch.from_numpy(X_test).float()
Y_train_tensor = torch.from_numpy(Y_train).float()
Y_test_tensor = torch.from_numpy(Y_test).float()
X_train_tensor.shape,X_test_tensor.shape,Y_train_tensor.shape,Y_test_tensor.shape



(torch.Size([455, 30]),
 torch.Size([114, 30]),
 torch.Size([455]),
 torch.Size([114]))

In [29]:
import torch.nn as nn

class Trainig_class(nn.Module):
  def __init__(self,num_features):
    super().__init__()
    self.network = nn.Sequential(
        nn.Linear(num_features,1),
        nn.Sigmoid()
    )

  def forward(self,X):
    out = self.network(X)
    return out

#  paramneters
learning_rate = 0.001;
epochs = 25

In [31]:
train_dataset = CustomDataset(X_train_tensor,Y_train_tensor)
test_dataset = CustomDataset(X_test_tensor,Y_test_tensor)

In [38]:
train_dataloder = DataLoader(train_dataset,batch_size=32,shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=True)

In [39]:
loss_fun = nn.BCELoss()

In [43]:
model  = Trainig_class(X_train_tensor.shape[1])

#optimiser
optimizer = torch.optim.SGD(model.parameters(),lr = learning_rate)

for epoch in range(epochs):

  for batch_features , batch_labels in train_dataloder:

    # forward-pass
    y_pred = model(batch_features) # Pass features to the model
    #  loss function
    loss = loss_fun(y_pred,batch_labels.view(-1,1)) # Use labels for loss calculation

    #clearing gradient
    optimizer.zero_grad()

    # backward_propogation
    loss.backward()

    # parameter_update (using in-place operations)
    optimizer.step()

    # print loss in each epoch
    print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')

Epoch: 1, Loss: 0.8332481980323792
Epoch: 1, Loss: 0.7689734697341919
Epoch: 1, Loss: 0.8905272483825684
Epoch: 1, Loss: 0.7646952271461487
Epoch: 1, Loss: 0.8160474896430969
Epoch: 1, Loss: 0.8368940949440002
Epoch: 1, Loss: 0.8075826168060303
Epoch: 1, Loss: 0.7816641926765442
Epoch: 1, Loss: 0.8987591862678528
Epoch: 1, Loss: 0.8681702017784119
Epoch: 1, Loss: 0.8116330504417419
Epoch: 1, Loss: 0.8293501138687134
Epoch: 1, Loss: 0.7759166955947876
Epoch: 1, Loss: 0.8470827341079712
Epoch: 1, Loss: 0.7564606070518494
Epoch: 2, Loss: 0.7850606441497803
Epoch: 2, Loss: 0.7861133813858032
Epoch: 2, Loss: 0.7423028349876404
Epoch: 2, Loss: 0.8014739155769348
Epoch: 2, Loss: 0.7855833768844604
Epoch: 2, Loss: 0.8257344365119934
Epoch: 2, Loss: 0.7504908442497253
Epoch: 2, Loss: 0.7931350469589233
Epoch: 2, Loss: 0.8231688737869263
Epoch: 2, Loss: 0.8098085522651672
Epoch: 2, Loss: 0.7593505382537842
Epoch: 2, Loss: 0.7563107013702393
Epoch: 2, Loss: 0.8165281414985657
Epoch: 2, Loss: 0.78

In [44]:
model.eval() # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad(): # Disable gradient calculation during evaluation
    for batch_features, batch_labels in test_dataloader:
        outputs = model(batch_features)
        predicted = (outputs > 0.5).float() # Apply threshold to get binary predictions
        total += batch_labels.size(0)
        correct += (predicted.squeeze() == batch_labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the model on the test data: {accuracy:.2f}%')

Accuracy of the model on the test data: 93.86%
