# Setup

In [None]:
import sys
import platform

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
import torch

has_gpu = torch.cuda.is_available()
has_mps = torch.backends.mps.is_built()
custom_device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"

print(f"Python Platform: {platform.platform()}")
print(f"PyTorch Version: {torch.__version__}\n")
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Numpy {np.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print("NVIDIA/CUDA GPU is", "available" if has_gpu else "NOT AVAILABLE")
print("MPS (Apple Metal) is", "AVAILABLE" if has_mps else "NOT AVAILABLE")

print(f"\nCustome Device:\t{custom_device}")

# Option 1 on Mac (with Apple Silicon) is to use the CPU:
#torch.set_default_device("cpu") # <- setting it manually to "cpu"

# Option 2 on Mac (with Apple Silicon) is to use MPS:
torch.set_default_device(custom_device)

print(f"Active device:\t{torch.get_default_device()}")

# Testing
print("\nRun test:")
layer = torch.nn.Linear(20,30)
print(f"\tLayer weights are on device: {layer.weight.device}")
print(f"\tLayer creating data on device: {layer(torch.randn(128,20)).device}")

# Classification (of images)

## Creating random data for Classification

In [None]:
from sklearn.datasets import make_circles

# variables for num of samples
n_samples = 1000

# creating circles (noise defines randomness; with random_state a random seed is set)
X, y = make_circles(n_samples, noise=0.03, random_state=42)

In [None]:
len(X), len(y)

In [None]:
X[:5], y[:5]

## Visualizing the created data

In [None]:
data = {"X1": X[:,0],
        "X2": X[:,1],
        "label": y}

circles = pd.DataFrame(data)

In [None]:
circles.head()

In [None]:
circles["label"].value_counts()

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(x=circles["X1"],y=circles["X2"],c=y, cmap=plt.cm.RdYlBu);

## Transforming ndarray into Torch tensors

In [None]:
X = torch.from_numpy(X).type(torch.float).to(custom_device)
y = torch.from_numpy(y).type(torch.float).to(custom_device)

In [None]:
type(X), type(y), X.dtype, y.dtype, X.device, y.device

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

In [None]:
print(f"Training Data:\n\tNum of Features\t\t{len(X_train)}\n\tNum of Labels\t\t{len(y_train)}\n\nTesting Data:\n\tNum of Features\t\t{len(X_test)}\n\tNum of Labels\t\t{len(y_test)}")

## Model Building

In [None]:
class ClassificationModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = torch.nn.Linear(in_features=2, out_features=64)
        self.layer_2 = torch.nn.Linear(in_features=64, out_features=64)
        self.layer_3 = torch.nn.Linear(in_features=64, out_features=1)

    def forward(self, x):
        # Structure: x -> layer_1 -> layer_2 -> output
        return self.layer_3(self.layer_2(self.layer_1(x)))

In [None]:
# instantiation of model
model_0 = ClassificationModel()

# model summary
print(model_0.state_dict())
print(f"\nModel structure:\n{model_0}")
print(f"\nDevice:\t{next(model_0.parameters()).device}")

## Model replication with torch.nn.Sequential()

In [None]:
model_0 = torch.nn.Sequential(
    torch.nn.Linear(in_features=2, out_features=64),
    torch.nn.ReLU(),
    torch.nn.Linear(in_features=64, out_features=64),
    torch.nn.ReLU(),
    torch.nn.Linear(in_features=64, out_features=1)
)

In [None]:
#print(model_0.state_dict())
print(f"\nModel structure:\n{model_0}")
print(f"\nDevice:\t{next(model_0.parameters()).device}")

In [None]:
model_0.eval()
with torch.inference_mode():
    untrained_preds = model_0(X_test)

print(f"Length of preds: {len(untrained_preds)}, Shape: {untrained_preds.shape}")
print(f"Length of samples: {len(X_test)}, Shape: {X_test.shape}")
print(f"\nFirst 5 preds:\n{untrained_preds[:5]}")

## Loss Function & Optimizer

Classification problems are basically devided into binary and multi-class classification problems. <p>Hence, using either `torch.nn.BCELossWithLogits` (with built-in *sigmoid* activation function) or `torch.nn.BCELoss` for the former and `torch.nn.CrossEntropyLoss` for the latter as **Loss Function** is the logical choice. <p> Either "SGD" (`torch.optim.SGD()`) or "Adam" (`torch.optim.Adam()`) is used as **Optimizer**.

Official PyTorch Documentation
* [Loss functions overview](https://docs.pytorch.org/docs/stable/nn.html#loss-functions)
* [Optimizer overview](https://docs.pytorch.org/docs/stable/optim.html)

In [None]:
# Loss function
loss_function = torch.nn.BCEWithLogitsLoss()

# Optimizer
optimizer = torch.optim.Adam(params=model_0.parameters(),
                            lr=0.001)

## Accuracy Function

In [None]:
def accuracy_function(y_true, y_preds):
    correct = torch.eq(y_true,y_preds).sum().item()
    accuracy = (correct / len(y_preds)) * 100
    return accuracy

## Training Model

### Peeking on the raw logits

Why inspect logits?
	•	Debugging scale and distribution: You might want to make sure your scores aren’t exploding (e.g. all in the thousands) or collapsing to a very narrow band near zero.
	•	Margin analysis: In binary classification, a logit near 0 means the model is “unsure,” whereas a large-magnitude logit (positive or negative) shows strong confidence.
	•	Relative ordering: For multi-class, the largest logit determines the predicted class even before softmax, so you can check whether the model’s top-k order makes sense.

In [None]:
model_0.eval()
with torch.inference_mode():
    # logits (y_logits) are the raw, unnormalized outputs of a model before activation functions like softmax are applied 
    y_logits = model_0(X_test)[:5]

y_logits

In [None]:
y_pred_labels = torch.round(torch.sigmoid(model_0(X_test[:5])))

### Training & Test Loop

In [None]:
torch.mps.manual_seed(42) # or `torch.manual_seed(42)`

epochs = 100

for epoch in range(epochs):
    model_0.train()

    # FORWARD PROPAGATION
    y_logits = model_0(X_train).squeeze()
    y_preds = torch.round(torch.sigmoid(y_logits))

    # LOSS (loss function requires raw logits as input)
    loss = loss_function(y_logits,
                         y_train)
    accuracy = accuracy_function(y_true=y_train,
                                 y_preds=y_preds)

    # OPTIMIZER
    optimizer.zero_grad()

    # BACKPROPAGATION
    loss.backward()

    # GRADIENT DESCENT
    optimizer.step()

    # TESTING
    model_0.eval()
    with torch.inference_mode():
        # FORWARD PROPAGATION
        test_logits = model_0(X_test).squeeze()
        test_preds = torch.round(torch.sigmoid(test_logits))

        # LOSS / ACCURACY CALCULATION
        test_loss = loss_function(test_logits,
                                  y_test)

        test_accuracy = accuracy_function(y_true=y_test,y_preds=test_preds)


    if (epoch + 1) % (epochs / 10) == 0:
        #epoch_count.append(epoch)
        #loss_values.append(loss)
        #test_loss_values.append(test_loss)
        print(f"Epoch {epoch+1}/{epochs} | Loss: {loss.item():.4f} | Accuracy: {accuracy:.2f}% | Test Loss: {test_loss.item():.4f} | Test Accuracy: {test_accuracy:.2f}%")
 

In [None]:
import requests
from pathlib import Path 

# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download")
else:
  print("Downloading helper_functions.py")
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

from helper_functions import plot_predictions, plot_decision_boundary

In [None]:
# Plot decision boundaries for training and test sets
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(model_0, X_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(model_0, X_test, y_test)