In [2]:
import numpy as np
import pandas as pd
import torch
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch.nn as nn
import torch.optim as optim

# -------------------------
# Fix Random Seeds
# -------------------------
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')

#**Dataset and DataLoader**

In [133]:
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)

In [134]:
# Target (diagnosis column mapped to 1 and 0)
y = df['diagnosis'].map({'M': 1, 'B': 0})

# Features (all columns except 'diagnosis')
X = df.drop('diagnosis', axis=1)

In [135]:
X = X.to_numpy()

y = y.to_numpy()

In [138]:
from torch.utils.data import Dataset, DataLoader

In [139]:
class CustomDataset(Dataset):

  def __init__(self, features, labels):

    self.features = features
    self.labels = labels

  def __len__(self):

    return self.features.shape[0]

  def __getitem__(self, index):

    return self.features[index], self.labels[index]

#**NN Module with DataLoader**

In [140]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [141]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [142]:
X_train_tensor = torch.from_numpy(X_train.astype(np.float32))
X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.astype(np.float32))
type(X_test_tensor)

torch.Tensor

In [143]:
train_dataset = CustomDataset(X_train_tensor, y_train_tensor)
test_dataset = CustomDataset(X_test_tensor, y_test_tensor)

In [144]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [146]:
# for batch_features, batch_labels in train_loader:

#   print(batch_features)
#   print(batch_labels)
#   print("-"*50)

In [147]:
class MySimpleNN(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()   # sigmoid is here
        )
    def forward(self, x):
        return self.model(x)

In [264]:
learning_rate = 00.1
epochs = 20

In [265]:
# create model
model = MySimpleNN(X_train.shape[1])

# define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# define loss function
loss_function = nn.BCELoss()

In [266]:
# define loop
for epoch in range(epochs):

  for batch_features, batch_labels in train_loader:

    # forward pass
    y_pred = model(batch_features)

    # loss calculate
    loss = loss_function(y_pred, batch_labels.view(-1, 1))

    # clear gradients
    optimizer.zero_grad()

    # backward pass
    loss.backward()

    # parameters update
    optimizer.step()

  # print loss in each epoch
  print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')

Epoch: 1, Loss: 0.3941633999347687
Epoch: 2, Loss: 0.3343452513217926
Epoch: 3, Loss: 0.15552031993865967
Epoch: 4, Loss: 0.027479151263833046
Epoch: 5, Loss: 0.1294943392276764
Epoch: 6, Loss: 0.019718823954463005
Epoch: 7, Loss: 0.027159005403518677
Epoch: 8, Loss: 0.005329739768058062
Epoch: 9, Loss: 0.005607367493212223
Epoch: 10, Loss: 0.7068694829940796
Epoch: 11, Loss: 0.00854849349707365
Epoch: 12, Loss: 0.02888217195868492
Epoch: 13, Loss: 0.007039327640086412
Epoch: 14, Loss: 0.030208123847842216
Epoch: 15, Loss: 0.0010586768621578813
Epoch: 16, Loss: 0.06967346370220184
Epoch: 17, Loss: 0.0025749888736754656
Epoch: 18, Loss: 0.12373130768537521
Epoch: 19, Loss: 0.030956828966736794
Epoch: 20, Loss: 0.015688816085457802


In [267]:
np.random.seed(42)
torch.manual_seed(42)

# Model evaluation using test_loader
model.eval()  # Set the model to evaluation mode
accuracy_list = []

with torch.no_grad():
    for batch_features, batch_labels in test_loader:
        # Forward pass
        y_pred = model(batch_features)
        y_pred = (y_pred > 0.5).float()  # Convert probabilities to binary predictions

        # Calculate accuracy for the current batch
        batch_accuracy = (y_pred.view(-1) == batch_labels).float().mean().item()
        accuracy_list.append(batch_accuracy)

# Calculate overall accuracy
overall_accuracy = sum(accuracy_list) / len(accuracy_list)
print(f'Accuracy: {overall_accuracy:.4f}')
print(model)

Accuracy: 0.9783
MySimpleNN(
  (model): Sequential(
    (0): Linear(in_features=30, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=1, bias=True)
    (5): Sigmoid()
  )
)


In [268]:
new_data = X_test[0:1]

new_tensor = torch.tensor(new_data, dtype=torch.float32).view(1, -1)

model.eval()
with torch.no_grad():
    y_proba = model(new_tensor)
    y_class = (y_proba > 0.5).float()
    print("Predicted Probability:", y_proba.item())
    print("Predicted Class:", int(y_class.item()))

Predicted Probability: 0.059101320803165436
Predicted Class: 0
