In [216]:
import matplotlib as plt
!pip install graphviz
from torchview import draw_graph
from torch import nn
from typing import Tuple
!pip install torch
import torch
import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report





[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [217]:
data = pd.read_csv('bcancer_data.csv')
data

Unnamed: 0,Cl.thickness,Cell.size,Cell.shape,Marg.adhesion,Epith.c.size,Bare.nuclei,Bl.cromatin,Normal.nucleoli,Mitoses,Class
0,1,1,1,1,2,1,2,1,1,0
1,5,1,1,1,1,1,3,1,1,0
2,4,1,1,1,2,1,1,1,1,0
3,2,1,1,1,2,1,2,1,1,0
4,1,2,3,1,2,1,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...
535,1,1,1,1,2,1,1,1,1,0
536,3,1,1,1,2,1,2,3,1,0
537,4,1,1,1,2,1,1,1,1,0
538,2,1,1,1,2,1,1,1,1,0


In [218]:
# Task-1: Divide data into 2 datasets
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(data, test_size=0.2)

In [219]:
# Task-2: Build classification Model
train_set.shape, test_set.shape

((432, 10), (108, 10))

In [220]:
def create_dataset(data):
    features = torch.tensor(
        data[['Cl.thickness', 'Cell.size', 'Cell.shape', 'Marg.adhesion']].to_numpy(),
        dtype = torch.float
    )
    labels = torch.tensor(data['Class'].to_numpy(), dtype = torch.float)
    return features, labels

In [221]:
X_train, y_train = create_dataset(train_set)
X_test, y_test = create_dataset(test_set)

In [222]:
class PenguinClassifier(nn.Module):

    def __init__(self, n_features: int, n_classes: int):
        super().__init__()
        self.linear_layer_1 = nn.Linear(n_features, 10)
        self.linear_layer_2 = nn.Linear(10, n_classes)
    
    def forward(self, features):
        x = torch.relu(self.linear_layer_1(features))
        return self.linear_layer_2(x)

In [223]:
model = PenguinClassifier(n_features = 4, n_classes= 1)
model

PenguinClassifier(
  (linear_layer_1): Linear(in_features=4, out_features=10, bias=True)
  (linear_layer_2): Linear(in_features=10, out_features=1, bias=True)
)

In [224]:
untrained_preds = model(X_test)
print(f"Length of predictions: {len(untrained_preds)}, Shape: {untrained_preds.shape}")
print(f"Length of test samples: {len(y_test)}, Shape: {y_test.shape}")
print(f"\nFirst 10 predictions:\n{untrained_preds[:10]}")
print(f"\nFirst 10 test labels:\n{y_test[:10]}")

Length of predictions: 108, Shape: torch.Size([108, 1])
Length of test samples: 108, Shape: torch.Size([108])

First 10 predictions:
tensor([[-1.3941],
        [-1.2120],
        [-1.6865],
        [-0.1157],
        [-0.0805],
        [-0.0453],
        [-0.0629],
        [-0.1418],
        [-0.0453],
        [-2.0719]], grad_fn=<SliceBackward0>)

First 10 test labels:
tensor([1., 1., 1., 0., 0., 0., 0., 0., 0., 1.])


In [225]:
# Loss function
loss_fn = nn.BCEWithLogitsLoss()

In [226]:
# Optimizer
optimizer = torch.optim.SGD(params=model.parameters(),
                            lr=0.1)

In [227]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100
    return acc

In [228]:
# View the frist 5 outputs of the forward pass on the test data
y_logits = model(X_test)[:5]
y_logits

tensor([[-1.3941],
        [-1.2120],
        [-1.6865],
        [-0.1157],
        [-0.0805]], grad_fn=<SliceBackward0>)

In [229]:
# Use ReLu function
y_pred_probs = torch.relu(y_logits)
y_pred_probs

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.]], grad_fn=<ReluBackward0>)

In [230]:
# Find the predicted labels (round the prediction probabilities)
y_preds = torch.round(y_pred_probs)

# In full
y_pred_labels = torch.round(torch.relu(model(X_test)[:5]))

# Check for equality
print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze()))

# Get rid of extra dimension
y_preds.squeeze()

tensor([True, True, True, True, True])


tensor([0., 0., 0., 0., 0.], grad_fn=<SqueezeBackward0>)

In [231]:
y_test[:5]

tensor([1., 1., 1., 0., 0.])

In [232]:
# Set the number of epochs
torch.manual_seed(42)
epochs = 800

X_train, y_train = X_train, y_train
X_test, y_test = X_test, y_test

In [233]:
# Build training and evaluation loop
for epoch in range(epochs):
    ### Training
    model.train()

In [234]:
# 1. Forward pass (model outputs raw logits)
y_logits = model(X_train).squeeze() # squeeze to remove extra `1` dimensions
y_pred = torch.round(torch.sigmoid(y_logits)) # turn logits -> pred probs -> pred labls

In [235]:
# 2. Calculate loss/accuracy
loss = loss_fn(y_logits, # Using nn.BCEWithLogitsLoss works with raw logits since it has built-in sigmoid
                y_train)
acc = accuracy_fn(y_true=y_train,
                    y_pred=y_pred)

In [236]:
# 3. Optimizer zero grad
optimizer.zero_grad()

# 4. Loss backwards
loss.backward()

# 5. Optimizer step
optimizer.step()


In [237]:
### Testing
model.eval()
with torch.inference_mode():
    # 1. Forward pass
    test_logits = model(X_test).squeeze()
    test_pred = torch.round(torch.sigmoid(test_logits))
    # 2. Caculate loss/accuracy
    test_loss = loss_fn(test_logits,
                        y_test)
    test_acc = accuracy_fn(y_true=y_test,
                            y_pred=test_pred)

In [238]:
# Print out what's happening every 100 epochs
if epoch % 100 == 0:
    print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%")

In [239]:
!pip install torchvision
import requests
from pathlib import Path

# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download")
else:
  print("Downloading helper_functions.py")
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

from helper_functions import plot_predictions, plot_decision_boundary

helper_functions.py already exists, skipping download



[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [243]:
from sklearn.inspection import DecisionBoundaryDisplay
# Plot decision boundaries for training and test sets
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model, X_test, y_test)

TypeError: 'module' object is not callable