In [2]:
import torch
from sklearn.datasets import make_classification

In [3]:
x, y = make_classification(
    n_samples = 10,       #no of samples
    n_features = 2,       #no. of features
    n_informative = 2,    #no. of informative features
    n_redundant = 0 ,     #no. of redundant features
    n_classes = 2,        #no. of classes
    random_state = 42     #for reproducibility
)

In [4]:
x

array([[ 1.06833894, -0.97007347],
       [-1.14021544, -0.83879234],
       [-2.8953973 ,  1.97686236],
       [-0.72063436, -0.96059253],
       [-1.96287438, -0.99225135],
       [-0.9382051 , -0.54304815],
       [ 1.72725924, -1.18582677],
       [ 1.77736657,  1.51157598],
       [ 1.89969252,  0.83444483],
       [-0.58723065, -1.97171753]])

In [5]:
x.shape

(10, 2)

In [6]:
y

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [7]:
# convert the data to pytorhch tensors
x = torch.tensor(x, dtype = torch.float32)
y = torch.tensor(y, dtype = torch.long)

In [8]:
y

tensor([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [9]:
from torch.utils.data import Dataset, DataLoader

In [16]:
class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self, index):   #we can do transformations at here
    return self.features[index], self.labels[index]


In [11]:
dataset = CustomDataset(x, y)

In [12]:
len(dataset)

10

In [13]:
dataset[3]

(tensor([-0.7206, -0.9606]), tensor(0))

In [14]:
dataloader = DataLoader(dataset, batch_size = 2, shuffle = True)

In [15]:
for batch_features, batch_label in dataloader:
  print(batch_features)
  print(batch_label)
  print("-"*50)

tensor([[-1.9629, -0.9923],
        [-1.1402, -0.8388]])
tensor([0, 0])
--------------------------------------------------
tensor([[ 1.0683, -0.9701],
        [ 1.8997,  0.8344]])
tensor([1, 1])
--------------------------------------------------
tensor([[-0.7206, -0.9606],
        [-0.9382, -0.5430]])
tensor([0, 1])
--------------------------------------------------
tensor([[-2.8954,  1.9769],
        [ 1.7273, -1.1858]])
tensor([0, 1])
--------------------------------------------------
tensor([[ 1.7774,  1.5116],
        [-0.5872, -1.9717]])
tensor([1, 0])
--------------------------------------------------


In [45]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader
import torch.nn as nn

df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()


df.drop(columns = ['id', 'Unnamed: 32'], inplace = True)

x_train, x_test, y_train, y_test = train_test_split(df.iloc[:, 1:], df.iloc[:, 0], test_size = 0.2)


scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

encoder = LabelEncoder()

y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

x_train_tensor = torch.from_numpy(x_train)
y_train_tensor = torch.from_numpy(y_train)
x_test_tensor = torch.from_numpy(x_test)
y_test_tensor = torch.from_numpy(y_test)


class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = features
    self.labels = labels
  def __len__(self):
    return len(self.features)

  def __getitem__(self, idx):
    return self.features[idx] ,self.labels[idx]


In [20]:
train_dataset = CustomDataset(x_train_tensor, y_train_tensor)
test_dataset = CustomDataset(x_test_tensor, y_test_tensor)

In [22]:
train_loader = DataLoader(train_dataset, batch_size= 32, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size= 32, shuffle = True)

In [50]:
train_loader


<torch.utils.data.dataloader.DataLoader at 0x7f12c66ae4e0>

In [33]:
class MySimpleNN(nn.Module):
  def __init__(self, num_features):
    super().__init__()
    self.linear = nn.Linear(num_features, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, features):
    out = self.linear(features)
    out = self.sigmoid(out)
    return out

In [34]:
learning_rate = 0.1
epochs = 25

In [38]:
# create model
model = MySimpleNN(x_train_tensor.shape[1])

# define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

# define loss function
loss_function = nn.BCELoss()


In [41]:
for epoch in range(epochs):
  for batch_features, batch_labels in train_loader:

    # forward pass
    y_pred = model(batch_features.float())

    # loss calculate
    loss = loss_function(y_pred, batch_labels.view(-1, 1).float())

    # clear gradients
    optimizer.zero_grad()

    # backward pass
    loss.backward()

    # parameter update
    optimizer.step()

  print(f'epoch : {epoch+1}, loss : {loss.item()}')

epoch : 1, loss : 0.009325219318270683
epoch : 2, loss : 0.044092629104852676
epoch : 3, loss : 0.04083958640694618
epoch : 4, loss : 0.11133676022291183
epoch : 5, loss : 0.056035157293081284
epoch : 6, loss : 0.008124392479658127
epoch : 7, loss : 0.004872833378612995
epoch : 8, loss : 0.052227672189474106
epoch : 9, loss : 0.010723303072154522
epoch : 10, loss : 0.005841082893311977
epoch : 11, loss : 0.0063381502404809
epoch : 12, loss : 0.06564582884311676
epoch : 13, loss : 0.01315861102193594
epoch : 14, loss : 0.021966038271784782
epoch : 15, loss : 0.11501730233430862
epoch : 16, loss : 0.030149143189191818
epoch : 17, loss : 0.06402388960123062
epoch : 18, loss : 0.007511109579354525
epoch : 19, loss : 0.07937686145305634
epoch : 20, loss : 0.014368508942425251
epoch : 21, loss : 0.18309460580348969
epoch : 22, loss : 0.08735482394695282
epoch : 23, loss : 0.049340907484292984
epoch : 24, loss : 0.030790889635682106
epoch : 25, loss : 0.04841925948858261


In [46]:
# Model evaluatin using test loader
model.eval()
accuracy_list = []

with torch.no_grad():
  for batch_features, batch_labels in test_loader:
    # forward pass
    y_pred = model(batch_features.float())
    y_pred = (y_pred > 0.5 ).float()

    # calculate accuracy for the current batch
    batch_accuracy = (y_pred.view(-1) == batch_labels).float().mean().item()
    accuracy_list.append(batch_accuracy)


# claculate overall accuracy
overall_accuracy = sum(accuracy_list) / len(accuracy_list)
print(f'Accuracy: {overall_accuracy:.4f}')

Accuracy: 0.9783
