# Neural Network Classification 

In [None]:
import sklearn
from sklearn.datasets import make_circles
import pandas as pd
import matplotlib.pyplot as plt
import torch
from sklearn.model_selection import train_test_split
from torch import nn
import requests
import numpy as np
from sklearn.datasets import make_blobs
import torchmetrics

## create classification data

In [None]:
#make 1000 samples
n_samples = 1000

In [None]:
#create circles
from torch import rand


x, y = make_circles(n_samples,
                    noise=0.03,
                    random_state=42)

In [None]:
len(x), len(y)

In [None]:
#make dataframe of circle data
circles = pd.DataFrame({"x1": x[:, 0],
                        "x2": x[:, 1],
                        "label": y})
circles.head(10)

In [None]:
#visualise
plt.scatter(x=x[:, 0],
            y=x[:, 1],
            c=y,
            cmap=plt.cm.RdYlBu)

## check input and output

In [None]:
x.shape, y.shape

In [None]:
#view first sample
x_sample = x[0]
y_sample = y[0]

print(f"{x_sample}, {y_sample}")

## turn data into tensors

In [None]:
#turn data into tensors
X = torch.from_numpy(x).type(torch.float)
Y = torch.from_numpy(y).type(torch.float)

X[:5], Y[:5]

In [None]:
X.shape, Y.shape

In [None]:
#split into train / test
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)

len(X_train), len(X_test)

## build model

In [None]:
class CircleModelV0(nn.Module):

    def __init__(self):
        super().__init__()

        self.layer_1 = nn.Linear(in_features=2, out_features=5)     # input layer
        self.layer_2 = nn.Linear(in_features=5, out_features=1)     # hidden layer


    def forward(self, x):
        return self.layer_2(self.layer_1(x))

In [None]:
#instantiate
model_1 = CircleModelV0()
model_1

In [None]:
#same model with nn.Sequential()
model_2 = nn.Sequential(
    nn.Linear(in_features=2, out_features=5),
    nn.Linear(in_features=5, out_features=1)
)

model_2

In [None]:
model_2.state_dict()

In [None]:
#make predictions
with torch.inference_mode():
    untrained_pred = model_2(X_test)
    print(f"len = {len(untrained_pred.round().abs())}, shape = {untrained_pred.shape}")
    print(f"len = {len(X_test)}, shape = {X_test.shape}")
    print(untrained_pred[:10].round().abs())
    print(Y_test[:10])

## setup loss function and optimizer

In [None]:
#for classification, best loss funcs are adam, BCE and SGD for optim
loss_fn = nn.BCEWithLogitsLoss()

In [None]:
#create optimizer
optimizer = torch.optim.SGD(params=model_2.parameters(),
                           lr=0.1)

In [None]:
#calculate accuracy (what % is correct)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

## train model

In [None]:
# view first 5 outputs
model_2.eval()
with torch.inference_mode():
    y_logits = model_2(X_test[:5])
y_logits

In [None]:
#use sigmoid activation function
y_pred_probs = torch.sigmoid(y_logits)
y_pred_probs

In [None]:
#find predicted labels
y_preds = torch.round(y_pred_probs)

y_pred_labels = torch.round(torch.sigmoid(model_2(X_test[:5])))

print(torch.eq(y_preds.squeeze(), y_pred_labels.squeeze()))

y_preds.squeeze()

In [None]:
Y_test[:5]

## build train and test loop

In [None]:
torch.manual_seed(1)

epochs = 1000

for epoch in range(epochs):

    model_2.train()

    y_logits = model_2(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits))

    loss = loss_fn(y_logits,
                   Y_train)

    acc = accuracy_fn(y_true=Y_train,
                      y_pred=y_pred)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    model_2.eval()

    with torch.inference_mode():
        test_logits = model_2(X_test).squeeze()

        test_pred = torch.round(torch.sigmoid(test_logits))

        test_loss = loss_fn(test_logits, Y_test)

        test_acc = accuracy_fn(y_true=Y_test, y_pred=test_pred)

        if epoch % 10 == 0:
            print(f"epoch: {epoch} | loss: {loss:.5f} | acc: {acc:.2f}% | test_loss: {test_loss:.5f} | test_acc: {test_acc:.2f}%")

## visualize

In [None]:
def plot_decision_boundary(model: torch.nn.Module, X: torch.Tensor, y: torch.Tensor):
    """Plots decision boundaries of model predicting on X in comparison to y.
    Source - https://madewithml.com/courses/foundations/neural-networks/ (with modifications)
    """
    # Put everything to CPU (works better with NumPy + Matplotlib)
    model.to("cpu")
    X, y = X.to("cpu"), y.to("cpu")

    # Setup prediction boundaries and grid
    x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
    y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 101), np.linspace(y_min, y_max, 101))

    # Make features
    X_to_pred_on = torch.from_numpy(np.column_stack((xx.ravel(), yy.ravel()))).float()

    # Make predictions
    model.eval()
    with torch.inference_mode():
        y_logits = model(X_to_pred_on)

    # Test for multi-class or binary and adjust logits to prediction labels
    if len(torch.unique(y)) > 2:
        y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1)  # mutli-class
    else:
        y_pred = torch.round(torch.sigmoid(y_logits))  # binary

    # Reshape preds and plot
    y_pred = y_pred.reshape(xx.shape).detach().numpy()
    plt.contourf(xx, yy, y_pred, cmap=plt.cm.RdYlBu, alpha=0.7)
    plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.RdYlBu)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())

In [None]:
#plot decision boundary of model
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("train")
plot_decision_boundary(model_3, X_train, Y_train)
plt.subplot(1, 2, 2)
plt.title("test")
plot_decision_boundary(model_3, X_test, Y_test)


## improve model

possibilities

* add more layers
* add more hidden units
* learn for longer
* change the activation function
* change the learning rate
* change the loss function

In [None]:
class CircleModelV1(nn.Module):

    def __init__(self):
        super().__init__()

        self.layer_1 = nn.Linear(in_features=2, out_features=10)
        self.layer_2 = nn.Linear(in_features=10, out_features=10)
        self.layer_3 = nn.Linear(in_features=10, out_features=1)

    def forward(self, x):
        return self.layer_3(self.layer_2(self.layer_1(x)))

model_3 = CircleModelV1()

In [None]:
#create loss func
loss_fn = nn.BCEWithLogitsLoss()

#create optimizer
optimizer = torch.optim.SGD(params=model_3.parameters(),
                           lr=0.1)

In [None]:
torch.manual_seed(1)

epochs = 1000

for epoch in range(epochs):

    model_3.train()

    y_logits = model_3(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits))

    loss = loss_fn(y_logits,
                   Y_train)

    acc = accuracy_fn(y_true=Y_train,
                      y_pred=y_pred)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    model_3.eval()

    with torch.inference_mode():
        test_logits = model_3(X_test).squeeze()

        test_pred = torch.round(torch.sigmoid(test_logits))

        test_loss = loss_fn(test_logits, Y_test)

        test_acc = accuracy_fn(y_true=Y_test, y_pred=test_pred)

        if epoch % 100 == 0:
            print(f"epoch: {epoch} | loss: {loss:.5f} | acc: {acc:.2f}% | test_loss: {test_loss:.5f} | test_acc: {test_acc:.2f}%")

## test if model can learn linear relationships

In [None]:
weight = 0.7
bias = 0.3

start = 0
end = 1
step = 0.01

X_regression = torch.arange(start, end, step).unsqueeze(dim=1)

y_regression = weight * X_regression + bias

#check data
print(len(X_regression))


In [None]:
#train and test split

train_split = int(0.8 * len(X_regression))
X_train_r, y_train_r = X_regression[:train_split], y_regression[:train_split]
X_test_r, y_test_r = X_regression[train_split:], y_regression[train_split:]

len(X_train_r), len(X_test_r)

In [None]:
# Plot linear data or training and test and predictions (optional)
def plot_predictions(
    train_data, train_labels, test_data, test_labels, predictions=None
):
    """
  Plots linear training data and test data and compares predictions.
  """
    plt.figure(figsize=(10, 7))

    # Plot training data in blue
    plt.scatter(train_data, train_labels, c="b", s=4, label="Training data")

    # Plot test data in green
    plt.scatter(test_data, test_labels, c="g", s=4, label="Testing data")

    if predictions is not None:
        # Plot the predictions in red (predictions were made on the test data)
        plt.scatter(test_data, predictions, c="r", s=4, label="Predictions")

    # Show the legend
    plt.legend(prop={"size": 14})

In [None]:
plot_predictions(train_data=X_train_r,
                 train_labels=y_train_r,
                 test_data=X_test_r,
                 test_labels=y_test_r)

In [None]:
class CircleModelV1_1(nn.Module):

    def __init__(self):
        super().__init__()

        self.layer_1 = nn.Linear(in_features=1, out_features=10)
        self.layer_2 = nn.Linear(in_features=10, out_features=10)
        self.layer_3 = nn.Linear(in_features=10, out_features=1)

    def forward(self, x):
        return self.layer_3(self.layer_2(self.layer_1(x)))

model_3_1 = CircleModelV1_1()

In [None]:
#create loss func
loss_fn = nn.L1Loss()

#create optimizer
optimizer = torch.optim.SGD(params=model_3_1.parameters(),
                           lr=0.01)

In [None]:
torch.manual_seed(1)

epochs = 1000

for epoch in range(epochs):

    y_pred = model_3_1(X_train_r)

    loss = loss_fn(y_pred,
                   y_train_r)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    model_3_1.eval()

    with torch.inference_mode():

        test_pred = model_3_1(X_test_r)

        test_loss = loss_fn(test_pred, y_test_r)


        if epoch % 100 == 0:
            print(f"epoch: {epoch} | loss: {loss:.5f} | test_loss: {test_loss:.5f}")

In [None]:
model_3_1.eval()

with torch.inference_mode():
    y_preds = model_3_1(X_test_r)

plot_predictions(train_data=X_train_r,
                 train_labels=y_train_r,
                 test_data=X_test_r,
                 test_labels=y_test_r,
                 predictions=y_preds)

## non-linear functions

In [None]:
class CircleModelV2(nn.Module):

    def __init__(self):
        super().__init__()

        self.layer_1 = nn.Linear(in_features=2, out_features=10)
        self.layer_2 = nn.Linear(in_features=10, out_features=10)
        self.layer_3 = nn.Linear(in_features=10, out_features=1)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))

model_4 = CircleModelV2()

In [None]:
#create optimizer
optimizer = torch.optim.SGD(params=model_4.parameters(),
                           lr=0.5)

#create loss func
loss_fn = nn.BCEWithLogitsLoss()

In [None]:
torch.manual_seed(1)

epochs = 1000

for epoch in range(epochs):

    model_4.train()

    y_logits = model_4(X_train).squeeze()
    y_pred = torch.round(torch.sigmoid(y_logits))

    loss = loss_fn(y_logits,
                   Y_train)

    acc = accuracy_fn(y_true=Y_train,
                      y_pred=y_pred)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    model_4.eval()

    with torch.inference_mode():
        test_logits = model_4(X_test).squeeze()

        test_pred = torch.round(torch.sigmoid(test_logits))

        test_loss = loss_fn(test_logits, Y_test)

        test_acc = accuracy_fn(y_true=Y_test, y_pred=test_pred)

        if epoch % 100 == 0:
            print(f"epoch: {epoch} | loss: {loss:.5f} | acc: {acc:.2f}% | test_loss: {test_loss:.5f} | test_acc: {test_acc:.2f}%")

In [None]:
#plot decision boundary of model
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("train")
plot_decision_boundary(model_4, X_train, Y_train)
plt.subplot(1, 2, 2)
plt.title("test")
plot_decision_boundary(model_4, X_test, Y_test)

## replicate non-linear activation functions

In [None]:
A = torch.arange(-10., 10., 1.)
A.dtype

In [None]:
plt.plot(A)

In [None]:
plt.plot(torch.relu(A))

In [None]:
def relu(x: torch.Tensor) -> torch.Tensor:
    return torch.maximum(torch.tensor(0), x)

plt.plot(relu(A))

In [None]:
plt.plot(torch.sigmoid(A))

In [None]:
def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

plt.plot(sigmoid(A))

## multi-class classification

In [None]:
# create dataset

NUM_CLASSES = 4
NUM_FEATURES = 2
RANDOM_SEED = 42

In [None]:
#create data
x_blob, y_blob = make_blobs(n_samples=1000,
                            n_features=NUM_FEATURES,
                            centers=NUM_CLASSES,
                            cluster_std=1.5,
                            random_state=RANDOM_SEED)

In [None]:
#turn data into tensors
x_blob = torch.from_numpy(x_blob).type(torch.float)
y_blob = torch.from_numpy(y_blob).type(torch.LongTensor)

In [None]:
#train test split
x_blob_t, x_blob_test, y_blob_t, y_blob_test = train_test_split(x_blob, y_blob, test_size=0.2, random_state=RANDOM_SEED)

In [None]:
#plot data
plt.figure(figsize=(10, 7))
plt.scatter(x_blob[:, 0], x_blob[:, 1], c=y_blob, cmap=plt.cm.RdYlBu)

In [None]:
#build model


class BlobModelV0(nn.Module):

    def __init__(self, input_features, output_features, hidden_units=8):
        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_features)
        )

    def forward(self, x):
        return self.linear_layer_stack(x)

model_5 = BlobModelV0(input_features=2, output_features=4)

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model_5.parameters(), lr=0.1)

In [None]:
#see current outputs
model_5.eval()

with torch.inference_mode():
    y_logits = model_5(x_blob_test)
    y_pred_probs = torch.softmax(y_logits, dim=1)
    y_preds = torch.argmax(y_pred_probs, dim=1)
    print(y_preds[:10])




In [None]:
#training loop
epochs = 5000

for epoch in range(epochs):
    model_5.train()

    y_logits = model_5(x_blob_t)
    y_preds = torch.softmax(y_logits, dim=1).argmax(dim=1)

    loss = loss_fn(y_logits, y_blob_t)

    acc = accuracy_fn(y_true=y_blob_t, y_pred=y_preds)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    model_5.eval()
    with torch.inference_mode():
        test_logits = model_5(x_blob_test)
        test_preds = torch.softmax(test_logits, dim=1).argmax(dim=1)

        test_loss = loss_fn(test_logits, y_blob_test)

        test_acc = accuracy_fn(y_true=y_blob_test, y_pred=test_preds)

        if epoch % 100 == 0:
            print(f"epoch = {epoch}")
            print(f"loss = {loss:4f} | acc = {acc:2f} | test loss = {test_loss:4f} | test acc = {test_acc:2f}")


In [None]:
# visualize

model_5.eval()
with torch.inference_mode():
    y_logits = model_5(x_blob_test)
    y_preds = torch.softmax(y_logits, dim=1).argmax(dim=1)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("train")
plot_decision_boundary(model_5, x_blob_t, y_blob_t)
plt.subplot(1, 2, 2)
plt.title("test")
plot_decision_boundary(model_5, x_blob_test, y_blob_test)

## Another more advanced model

In [None]:
# create dataset

NUM_CLASSES = 20
NUM_FEATURES = 3
RANDOM_SEED = 42

In [None]:
#create data
x_blob, y_blob = make_blobs(n_samples=1000,
                            n_features=NUM_FEATURES,
                            centers=NUM_CLASSES,
                            cluster_std=1,
                            random_state=RANDOM_SEED)

In [None]:
#turn data into tensors
x_blob = torch.from_numpy(x_blob).type(torch.float)
y_blob = torch.from_numpy(y_blob).type(torch.LongTensor)

In [None]:
#train test split
x_blob_t, x_blob_test, y_blob_t, y_blob_test = train_test_split(x_blob, y_blob, test_size=0.2, random_state=RANDOM_SEED)

In [None]:
#plot data


fig = plt.figure(figsize=(10, 7))
plot = fig.add_subplot(projection="3d")
plot.scatter(x_blob[:, 0], x_blob[:, 1], x_blob[:, 2], c=y_blob, cmap=plt.cm.RdYlBu)

In [None]:
#build model


class BlobModelV1(nn.Module):

    def __init__(self, input_features, output_features, hidden_units=8):
        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_features)
        )

    def forward(self, x):
        return self.linear_layer_stack(x)

model_6 = BlobModelV0(input_features=3, output_features=20, hidden_units=64)

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model_6.parameters(), lr=0.1)

In [None]:
#training loop
epochs = 5000

for epoch in range(epochs):
    model_5.train()

    y_logits = model_6(x_blob_t)
    y_preds = torch.softmax(y_logits, dim=1).argmax(dim=1)

    loss = loss_fn(y_logits, y_blob_t)

    acc = accuracy_fn(y_true=y_blob_t, y_pred=y_preds)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    model_6.eval()
    with torch.inference_mode():
        test_logits = model_6(x_blob_test)
        test_preds = torch.softmax(test_logits, dim=1).argmax(dim=1)

        test_loss = loss_fn(test_logits, y_blob_test)

        test_acc = accuracy_fn(y_true=y_blob_test, y_pred=test_preds)

        if epoch % 100 == 0:
            print(f"epoch = {epoch}")
            print(f"loss = {loss:4f} | acc = {acc:2f} | test loss = {test_loss:4f} | test acc = {test_acc:2f}")


## optimize model

In [None]:
#build model


class BlobModelV2(nn.Module):

    def __init__(self, input_features, output_features, hidden_units=8):
        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.Sigmoid(),
            nn.Linear(in_features=hidden_units, out_features=output_features)
        )

    def forward(self, x):
        return self.linear_layer_stack(x)

model_7 = BlobModelV0(input_features=3, output_features=20, hidden_units=128)

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model_7.parameters(), lr=0.1)

In [None]:
#training loop
epochs = 5000

for epoch in range(epochs):
    model_7.train()

    y_logits = model_7(x_blob_t)
    y_preds = torch.softmax(y_logits, dim=1).argmax(dim=1)

    loss = loss_fn(y_logits, y_blob_t)

    acc = accuracy_fn(y_true=y_blob_t, y_pred=y_preds)

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

    model_6.eval()
    with torch.inference_mode():
        test_logits = model_7(x_blob_test)
        test_preds = torch.softmax(test_logits, dim=1).argmax(dim=1)

        test_loss = loss_fn(test_logits, y_blob_test)

        test_acc = accuracy_fn(y_true=y_blob_test, y_pred=test_preds)

        if epoch % 250 == 0:
            print(f"epoch = {epoch}")
            print(f"loss = {loss:4f} | acc = {acc:2f} | test loss = {test_loss:4f} | test acc = {test_acc:2f}")
