### Import Packages

In [8]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset

### Load Cancer Dataset

In [9]:
class CancerDataset(Dataset):
    def __init__(self, X, y):
        self.X = X.astype(np.float32)
        self.y = y.astype(np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

### Regularization

In [10]:
class RegularizedLogisticRegression(nn.Module):
    def __init__(self, input_size, reg_type='l2', reg_factor=0.01):
        super(RegularizedLogisticRegression, self).__init__()
        self.linear = nn.Linear(input_size, 1)
        self.reg_type = reg_type
        self.reg_factor = reg_factor

    def forward(self, x):
        logits = self.linear(x)
        return logits

    def calculate_regularization_loss(self):
        if self.reg_type == 'l1':
            reg_loss = self.reg_factor * torch.sum(torch.abs(self.linear.weight))
        elif self.reg_type == 'l2':
            reg_loss = self.reg_factor * torch.sum(torch.square(self.linear.weight))
        else:
            reg_loss = 0
        return reg_loss


###  Load and preprocess the dataset

In [11]:
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Create DataLoader objects for the training and test sets

In [12]:
train_dataset = CancerDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = CancerDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

### Create the model, optimizer, and loss function

In [16]:
input_size = X_train.shape[1]
model = RegularizedLogisticRegression(input_size, reg_type='l1', reg_factor=0.01)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()


### Train the model

In [17]:
epochs = 10
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        logits = model(X_batch)
        loss = criterion(logits.squeeze(), y_batch)
        reg_loss = model.calculate_regularization_loss()
        loss += reg_loss
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * X_batch.shape[0]
    train_loss /= len(train_dataset)
    print(f'Epoch {epoch + 1}, train loss: {train_loss:.4f}')

Epoch 1, train loss: 0.8063
Epoch 2, train loss: 0.7143
Epoch 3, train loss: 0.6377
Epoch 4, train loss: 0.5757
Epoch 5, train loss: 0.5254
Epoch 6, train loss: 0.4846
Epoch 7, train loss: 0.4511
Epoch 8, train loss: 0.4224
Epoch 9, train loss: 0.3985
Epoch 10, train loss: 0.3774


### Test the model

In [18]:
model.eval()
test_loss = 0
correct = 0
for X_batch, y_batch in test_loader:
    logits = model(X_batch)
    loss = criterion(logits.squeeze(), y_batch)
    reg_loss = model.calculate_regularization_loss()
    loss += reg_loss
    test_loss += loss.item() * X_batch.shape[0]
    pred = (logits > 0).squeeze().long()
    correct += (pred == y_batch).sum().item()
test_loss /= len(test_dataset)
accuracy = correct / len(test_dataset)

print(f'Test set accuracy: {100*accuracy:.2f}%')


Test set accuracy: 94.74%
