In [None]:
from sklearn.datasets import make_circles, make_blobs
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
from pathlib import Path
from helper_functions import plot_decision_boundary

# Path to models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# Device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"

print(torch.__version__)
print(f"Using device: {device}")

# Make classification data

In [None]:
NUM_SAMPLES = 1000

X, y = make_circles(NUM_SAMPLES, noise=0.03, random_state=42)
len(X), len(y)

In [None]:
print(f"First five samples of X:\n {X[:5]}")
print(f"First five samples of y:\n {y[:5]}")

### Make DataFrame

In [None]:
circles = pd.DataFrame({"X1": X[:, 0], "X2": X[:, 1], "label": y})
circles.head(10)

In [None]:
plt.scatter(x=X[:, 0], y=X[:, 1], c=y, cmap=plt.colormaps['RdYlBu']);

### Convert to Tensors

In [None]:
X = torch.from_numpy(X).to(device=device, dtype=torch.float)
y = torch.from_numpy(y).to(device=device, dtype=torch.float)

### Split into training and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

len(X_train), len(X_test), len(y_train), len(y_test)

# Building a model

In [None]:
class CircleModelV0(nn.Module):
    def __init__(self):
        super().__init__()
        
        # takes in the coordinate pair (X) and produces 5 features
        self.layer_1 = nn.Linear(in_features=2, out_features=5)
        
        # takes in the 5 features and produces the result (y)
        self.layer_2 = nn.Linear(in_features=5, out_features=1)
    
    def forward(self, x):
        return self.layer_2(self.layer_1(x))

In [None]:
model_0 = CircleModelV0().to(device)
print(model_0)

# Use nn.Sequential to replicate the functionality of the CircleModel
torch.manual_seed(42)
model_0 = nn.Sequential(
    nn.Linear(in_features=2, out_features=10),
    nn.Linear(in_features=10, out_features=10),
    nn.Linear(in_features=10, out_features=1)
).to(device)
print(model_0)

model_0.state_dict()

In [None]:
with torch.inference_mode():
    preds = model_0(X_test)
print(f"Length of predictions: {len(preds)}, Shape: {preds.shape}")
print(f"Length of test labels: {len(y_test)}, Shape: {y_test.shape}")

print(f"\nFirst 10 predictions:\n{torch.round(preds[:10])}")
print(f"\nFirst 10 test labels:\n{y_test[:10]}")


## Setup loss function and optimizer

In [None]:
# measures how wrong the predictions are
loss_fn = nn.BCEWithLogitsLoss()

# updates the model parameters to best lower the loss
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

# calculates how accurate the predictions are
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    accuracy = (correct / len(y_pred)) * 100
    return accuracy

In [None]:
# y_logits = model_0(X_test)[:5]
# y_pred_probs = torch.sigmoid(y_logits)
# y_preds = torch.round(y_pred_probs)

with torch.inference_mode():
    y_pred_labels = torch.round(torch.sigmoid(model_0(X_test)[:5]))

y_pred_labels.squeeze(), y_test[:5], torch.eq(y_pred_labels.squeeze(), y_test[:5])

# Training and Testing Loops

In [None]:
epochs = 1000

for epoch in range(epochs):
    ### Training ###
    
    # Model Training Mode
    model_0.train()
    
    # Forward pass
    y_logits = model_0(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits))
    
    # Calculate loss and accuracy
    loss = loss_fn(y_logits, y_train)
    accuracy = accuracy_fn(y_true=y_train, y_pred=y_pred)
    
    # Optimizer zero grad
    optimizer.zero_grad()
    
    # Backpropagation on the loss
    loss.backward()
    
    # Update optimizer
    optimizer.step()
    
    ### Testing ###
    model_0.eval()
    
    with torch.inference_mode():
        # Forward pass
        test_logits = model_0(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))
        
        # Calculate loss and accuracy
        test_loss = loss_fn(test_logits, y_test)
        test_accuracy = accuracy_fn(y_true=y_test, y_pred=test_pred)
        
    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {accuracy:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_accuracy:.2f}% ")

### Model is trying to calculate a circle using a straight line (bad)

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_0, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_0, X_test, y_test)

In [None]:
class CircleModelV1(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(in_features=2, out_features=10)
        self.layer_2 = nn.Linear(in_features=10, out_features=10)
        self.layer_3 = nn.Linear(in_features=10, out_features=1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))
    
model_1 = CircleModelV1().to(device)

In [None]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model_1.parameters(), lr=0.1)

In [None]:
epochs = 3000

for epoch in range(epochs):
    ### Training ###
    model_1.train()
    
    # Forward pass
    y_logits = model_1(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits))
    
    # Calculate loss
    loss = loss_fn(y_logits, y_train)
    accuracy = accuracy_fn(y_true=y_train, y_pred=y_pred)
    
    # Optimizer zero grad
    optimizer.zero_grad()
    
    # Backpropagation on the loss
    loss.backward()
    
    # Update optimizer
    optimizer.step()
    
    ### Testing ###
    model_1.eval()
    
    with torch.inference_mode():
        # Forward pass
        test_logits = model_1(X_test).squeeze()
        test_pred = torch.round(torch.sigmoid(test_logits))
        
        # Calculate loss and accuracy
        test_loss = loss_fn(test_logits, y_test)
        test_accuracy = accuracy_fn(y_true=y_test, y_pred=test_pred)
        
    if epoch % 300 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {accuracy:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_accuracy:.2f}%")
    

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Without Non-Linear Activation")
plot_decision_boundary(model_0, X_test, y_test)
plt.subplot(1, 2, 2)
plt.title("With Non-Linear Activation")
plot_decision_boundary(model_1, X_test, y_test)

### Save binary classification model

In [None]:
MODEL_NAME = "binary_classification_model.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_1.state_dict(), f=MODEL_SAVE_PATH)

# Replicating Non-Linear Activation Functions

In [None]:
def relu(x):
    return torch.maximum(torch.tensor(0), x)

def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

In [None]:
A = torch.arange(-10, 10, 1, dtype=torch.float)
plt.plot(A)
A

In [None]:
plt.plot(relu(A))
relu(A)

In [None]:
plt.plot(sigmoid(A))
sigmoid(A)

# Multi-class model

### Creating multi-class classification data

In [None]:
NUM_CLASSES = 4
NUM_FEATURES = 2
RANDOM_SEED = 42

# Create multi-class data
X, y = make_blobs(
    n_samples=1000, n_features=NUM_FEATURES, 
    centers=NUM_CLASSES, cluster_std=1.5, random_state=RANDOM_SEED
)

# Turn data into tensors
X = torch.from_numpy(X).type(torch.float).to(device)
y = torch.from_numpy(y).type(torch.long).to(device)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

# Plot data
plt.figure(figsize=(10, 7))
plt.scatter(X[:, 0].cpu(), X[:, 1].cpu(), c=y.cpu(), cmap=plt.colormaps['RdYlBu']);

### Build model

In [None]:
class BlobModel(nn.Module):
    def __init__(self, input_features, output_features, hidden_units=8):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_features)
        )
        
    def forward(self, x):
        return self.layer_stack(x)
    
model_2 = BlobModel(input_features=NUM_FEATURES, output_features=NUM_CLASSES).to(device)

### Create loss function and optimizer

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_2.parameters(), lr=0.1)

### Training and testing loop

In [None]:
epochs = 3000

for epoch in range(epochs):
    ### Training ###
    model_2.train()
    
    # Forward pass
    y_logits = model_2(X_train)
    y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1) # logits -> prediction probabilities -> prediction labels
    
    # Calculate loss and accuracy
    loss = loss_fn(y_logits, y_train)
    accuracy = accuracy_fn(y_true=y_train, y_pred=y_pred)
    
    # Optimizer zero grad
    optimizer.zero_grad()
    
    # Backpropagation on the loss
    loss.backward()
    
    # Update optimizer
    optimizer.step()
    
    ### Testing###
    model_2.eval()
    
    with torch.inference_mode():
        # Forward pass
        test_logits = model_2(X_test)
        test_pred = torch.softmax(test_logits, dim=1).argmax(dim=1)
        
        # Calculate loss and accuracy
        test_loss = loss_fn(test_logits, y_test)
        test_accuracy = accuracy_fn(y_true=y_test, y_pred=test_pred)
        
    if epoch % 250 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {accuracy:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_accuracy:.2f}%")
    

In [None]:
plt.figure(figsize=(12, 6))
plot_decision_boundary(model_2, X_test, y_test)

### Saving multi-class classifcation model

In [None]:
MODEL_NAME = "multi_class_classification_model.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_2.state_dict(), f=MODEL_SAVE_PATH)