In [13]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.datasets import load_breast_cancer
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold,train_test_split
import torch.optim as optim
from torchmetrics import Accuracy,F1Score

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [16]:
data = load_breast_cancer()
X = torch.tensor(data.data, dtype=torch.float32)
y = torch.tensor(data.target, dtype=torch.long)

In [20]:
print(X.shape,y.shape)

torch.Size([569, 30]) torch.Size([569])


In [21]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=22,shuffle=True,stratify=y)
print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)
print(torch.sum(y_train)/len(y_train),torch.sum(y_test)/len(y_test))

cv = StratifiedKFold(n_splits=10,shuffle=True,random_state=22)

torch.Size([455, 30]) torch.Size([455]) torch.Size([114, 30]) torch.Size([114])
tensor(0.6264) tensor(0.6316)


In [22]:
class Custom_Dataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [23]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(30,4),
            nn.ReLU(),
            nn.Linear(4, 1),
        )

    def forward(self, x):
        return self.layers(x)

In [24]:
epochs = 10
batch_size = 32

In [25]:
def evaluate_model(model,val_loader):
    model.eval()
    f1 = F1Score(task="binary",threshold=0.5).to(device)
    accuracy = Accuracy(task="binary",threshold=0.5).to(device)

    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = torch.sigmoid(model(batch_X))
            all_preds.append(outputs.squeeze(-1))
            all_labels.append(batch_y)
        all_preds = torch.cat(all_preds, dim=0)
        all_labels = torch.cat(all_labels, dim=0)
    return f1(all_preds,all_labels).item(),accuracy(all_preds,all_labels).item()

In [26]:
for train_idx,val_idx in cv.split(X_train,y_train):
    #print(train_idx.shape,val_idx.shape)

    train_x , train_y = X_train[train_idx], y_train[train_idx]
    val_x , val_y = X_train[val_idx], y_train[val_idx]

    train_dataset = Custom_Dataset(train_x,train_y)
    train_loader  = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,pin_memory=True)
    val_dataset = Custom_Dataset(val_x,val_y)
    val_loader = DataLoader(val_dataset,batch_size=batch_size,shuffle=False,pin_memory=True)

    model = SimpleNN().to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0

        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y.unsqueeze(1).float())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        f1_score,accuracy = evaluate_model(model,val_loader)
        print(f"Epoch {epoch+1}, Loss: {epoch_loss/len(train_loader):.4f}, Accuracy: {accuracy:.4f}, F1: {f1_score:.4f}")
    print("-"*25)

Epoch 1, Loss: 2.4350, Accuracy: 0.3696, F1: 0.0000
Epoch 2, Loss: 0.6949, Accuracy: 0.6304, F1: 0.7733
Epoch 3, Loss: 0.6864, Accuracy: 0.6304, F1: 0.7733
Epoch 4, Loss: 0.6785, Accuracy: 0.6304, F1: 0.7733
Epoch 5, Loss: 0.6715, Accuracy: 0.6304, F1: 0.7733
Epoch 6, Loss: 0.6685, Accuracy: 0.6304, F1: 0.7733
Epoch 7, Loss: 0.6641, Accuracy: 0.6304, F1: 0.7733
Epoch 8, Loss: 0.6642, Accuracy: 0.6304, F1: 0.7733
Epoch 9, Loss: 0.6619, Accuracy: 0.6304, F1: 0.7733
Epoch 10, Loss: 0.6604, Accuracy: 0.6304, F1: 0.7733
-------------------------
Epoch 1, Loss: 2.7256, Accuracy: 0.4565, F1: 0.2424
Epoch 2, Loss: 0.9557, Accuracy: 0.9565, F1: 0.9655
Epoch 3, Loss: 0.6352, Accuracy: 0.8696, F1: 0.9062
Epoch 4, Loss: 0.3029, Accuracy: 0.8913, F1: 0.9206
Epoch 5, Loss: 0.2519, Accuracy: 0.9783, F1: 0.9831
Epoch 6, Loss: 0.3062, Accuracy: 0.9130, F1: 0.9355
Epoch 7, Loss: 0.2410, Accuracy: 0.9130, F1: 0.9355
Epoch 8, Loss: 0.2190, Accuracy: 0.8478, F1: 0.8679
Epoch 9, Loss: 0.1973, Accuracy: 0.97

# Test

In [27]:
train_dataset = Custom_Dataset(X_train,y_train)
train_loader  = DataLoader(train_dataset,batch_size=32,shuffle=True,pin_memory=True)

model = SimpleNN().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(epochs):
    epoch_loss = 0.0

    for batch_X, batch_y in train_loader:
        model.train()
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y.unsqueeze(1).float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

In [28]:
test_dataset = Custom_Dataset(X_test, y_test)
test_loader  = DataLoader(test_dataset,batch_size=32,shuffle=True,pin_memory=True)

evaluate_model(model,test_loader) #f1, accuracy

(0.8776978254318237, 0.8508771657943726)