### Import Packages

In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset

### Load Dataset

In [2]:
class CancerDataset(Dataset):
    def __init__(self, X, y):
        self.X = X.astype(np.float32)
        self.y = y.astype(np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], selfs.y[index]


### Create a MLP

In [3]:
class MLPWithDropout(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_prob):
        super(MLPWithDropout, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout to the hidden layer
        x = self.fc2(x)
        return x


### Load and preprocess the dataset


In [4]:
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


### Create DataLoader objects for the training and test sets


In [5]:
train_dataset = CancerDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = CancerDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


### Create the model, optimizer, and loss function


In [6]:
input_size = X_train.shape[1]
hidden_size = 64
output_size = 1
dropout_prob = 0.5
model = MLPWithDropout(input_size, hidden_size, output_size, dropout_prob)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

In [8]:
input_size = X_train.shape[1]
input_size

30


### Train the model

In [9]:
epochs = 10
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        logits = model(X_batch)
        loss = criterion(logits.squeeze(), y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.shape[0]
    train_loss /= len(train_dataset)
    print(f'Epoch {epoch + 1}, train loss: {train_loss:.4f}')

Epoch 1, train loss: 0.6505
Epoch 2, train loss: 0.4816
Epoch 3, train loss: 0.3613
Epoch 4, train loss: 0.2811
Epoch 5, train loss: 0.2339
Epoch 6, train loss: 0.2003
Epoch 7, train loss: 0.1752
Epoch 8, train loss: 0.1531
Epoch 9, train loss: 0.1340
Epoch 10, train loss: 0.1311


### Test the model

In [10]:

model.eval()
test_loss = 0
correct = 0
for X_batch, y_batch in test_loader:
    logits = model(X_batch)
    loss = criterion(logits.squeeze(), y_batch)
    test_loss += loss.item() * X_batch.shape[0]
    pred = (logits > 0).squeeze().long()
    correct += (pred == y_batch).sum().item()
test_loss /= len(test_dataset)
accuracy = correct / len(test_dataset)
print(f'Test loss: {test_loss:.4f}, Accuracy: {accuracy * 100:.2f}%')

Test loss: 0.0889, Accuracy: 98.25%
