# Breast Cancer Wisconsin Classification (PyTorch)
Breast Cancer Wisconsin

We use PyTorch for the model, scikit-learn for normalization and evaluation, and we will export the model in ONNX format.


In [27]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import joblib, random, os
from pathlib import Path


In [21]:
_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"
_COLS = ["id", "clump_thickness", "uniformity_cell_size", "uniformity_cell_shape",
         "marginal_adhesion", "single_epithelial_cell_size", "bare_nuclei",
         "bland_chromatin", "normal_nucleoli", "mitoses", "class"]
_FEATURE_COLS = _COLS[1:-1]

def load_data_set():
    df = pd.read_csv(_URL, names=_COLS)
    df = df.replace("?", np.nan).dropna()
    df[_FEATURE_COLS] = df[_FEATURE_COLS].astype(float)
    df["label"] = df["class"].map({2: 0, 4: 1}).astype(int)
    return df

df = load_data_set()
print(df.shape)
df.head()


(683, 12)


Unnamed: 0,id,clump_thickness,uniformity_cell_size,uniformity_cell_shape,marginal_adhesion,single_epithelial_cell_size,bare_nuclei,bland_chromatin,normal_nucleoli,mitoses,class,label
0,1000025,5.0,1.0,1.0,1.0,2.0,1.0,3.0,1.0,1.0,2,0
1,1002945,5.0,4.0,4.0,5.0,7.0,10.0,3.0,2.0,1.0,2,0
2,1015425,3.0,1.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,2,0
3,1016277,6.0,8.0,8.0,1.0,3.0,4.0,3.0,7.0,1.0,2,0
4,1017023,4.0,1.0,1.0,3.0,2.0,1.0,3.0,1.0,1.0,2,0


In [22]:
X = df[_FEATURE_COLS].to_numpy(dtype=np.float32)
y = df["label"].to_numpy(dtype=np.int64)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print(X_train.shape, X_test.shape)


(546, 9) (137, 9)


In [13]:
# exporting the scaler
Path("artifacts").mkdir(exist_ok=True)
joblib.dump(scaler, "artifacts/breast_minmax_scaler.joblib")

['artifacts/breast_minmax_scaler.joblib']

In [23]:
class WBCBinaryClassifier(nn.Module):
    def __init__(self, input_dim=9, hidden=32):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden), nn.ReLU(),
            nn.Linear(hidden, hidden), nn.ReLU(),
            nn.Linear(hidden, hidden), nn.ReLU(),
            nn.Linear(hidden, 2)
        )

    def forward(self, x):
        return self.net(x)
device='cpu'
model = WBCBinaryClassifier().to(device)
print(model)


WBCBinaryClassifier(
  (net): Sequential(
    (0): Linear(in_features=9, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=2, bias=True)
  )
)


In [24]:
def train_model(model, X_train, y_train, X_val, y_val,
                num_epochs=50, lr=1e-3, weight_decay=1e-4, batch_size=32):

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    train_ds = torch.utils.data.TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
    loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)

    best_state = None
    best_val_loss = float('inf')

    for epoch in range(1, num_epochs + 1):
        model.train()
        total_loss = 0
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            loss = F.cross_entropy(model(xb), yb)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        with torch.no_grad():
            val_loss = F.cross_entropy(model(torch.tensor(X_val).to(device)),
                                       torch.tensor(y_val).to(device)).item()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_state = model.state_dict()

        if epoch % 10 == 0:
            print(f"Epoch {epoch:03d} | Loss: {total_loss/len(loader):.4f} | Val loss: {val_loss:.4f}")

    model.load_state_dict(best_state)
    return model

model = train_model(model, X_train, y_train, X_test, y_test, num_epochs=100)


Epoch 010 | Loss: 0.0727 | Val loss: 0.1261
Epoch 020 | Loss: 0.0572 | Val loss: 0.1230
Epoch 030 | Loss: 0.0537 | Val loss: 0.1249
Epoch 040 | Loss: 0.0515 | Val loss: 0.1240
Epoch 050 | Loss: 0.0480 | Val loss: 0.1279
Epoch 060 | Loss: 0.0446 | Val loss: 0.1269
Epoch 070 | Loss: 0.0547 | Val loss: 0.1276
Epoch 080 | Loss: 0.0394 | Val loss: 0.1271
Epoch 090 | Loss: 0.0433 | Val loss: 0.1192
Epoch 100 | Loss: 0.0345 | Val loss: 0.1265


In [25]:
model.eval()
X_test_t = torch.tensor(X_test).to(device)
y_pred = model(X_test_t).argmax(dim=1).cpu().numpy()

print(classification_report(y_test, y_pred))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
with torch.no_grad():
    y_proba = F.softmax(model(X_test_t), dim=1)[:, 1].cpu().numpy()


print("ROC AUC score:", roc_auc_score(y_test, y_proba))


              precision    recall  f1-score   support

           0       0.99      0.96      0.97        89
           1       0.92      0.98      0.95        48

    accuracy                           0.96       137
   macro avg       0.95      0.97      0.96       137
weighted avg       0.96      0.96      0.96       137

Confusion matrix:
 [[85  4]
 [ 1 47]]
ROC AUC score: 0.9906367041198502


In [28]:
Path("artifacts").mkdir(exist_ok=True)
joblib.dump(scaler, "artifacts/breast_minmax_scaler.joblib")
torch.save(model.state_dict(), "artifacts/wbc_mlp.pt")
import torch.onnx

model = WBCBinaryClassifier()
model.load_state_dict(torch.load("artifacts/wbc_mlp.pt", map_location="cpu"))
model.eval()

dummy_input = torch.randn(1, 9)

torch.onnx.export(
    model,
    dummy_input,
    "artifacts/wbc_mlp.onnx",
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={"input": {0: "batch_size"}},
    opset_version=11,
)
print("Exported to 'artifacts/wbc_mlp.onnx'")


  torch.onnx.export(


OnnxExporterError: Module onnx is not installed!