In [None]:
import os, sys
sys.path.append(os.path.abspath(".."))


In [None]:
import torch
from torch import nn
import sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Classification

## The Objectives

There are 3 types of classification that exists
1. Binary classification (Yes or No)
2. Multi-class classification (Multiple Choice Question)
3. Multi-label classification (Multiple Answers for a Question)

The example here is using scikit learn to create a dataset of samples that are either grouped into circle 1 or 2.  
The model we make, when given a sample needs to predict if it is on circle 1 or 2. 

## The Data

### Making Data

In [None]:
#using scikit learn, make data points either on circle 1 or 2
#(x, y) = (data point tuple, label for which circle)
from sklearn.datasets import make_circles

number_of_samples = 1000
x, y = make_circles(n_samples=number_of_samples, noise=0.03, random_state=42)

In [None]:
#using pandas, make data frame of data (?)
#This syntax of ":" is slicing for numpy, without specifying "start_index : end_index : step" it just means include all rows
#So for example, x[:,0] means from all rows, select the "0" index element for each row.

circles = pd.DataFrame({"x1": x[:,0], "x2": x[:,1], "label": y})

In [None]:
#Using matplotlib, visualization of data
plt.scatter(x=x[:,0], y=x[:,1], c=y, cmap=plt.cm.RdYlBu)

### Formatting Data

In [None]:
#Using pytorch, turning data into tensors
x = torch.from_numpy(x).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

In [None]:
#using scikit learn, randomly distribute data into training and testing
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
#Checking length of our samples
len(x_train), len(x_test), len(y_train), len(y_test)

## Lv1 Network

### The Network

1. Setup device agnistic code to run code on GPU
2. Making Class using nn.Module
3. Define loss function and optimizier

In [None]:
#1. Device Agnostic Code
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
#2. Class by nn.Module
class Classification_lv1(nn.Module):

    #network layers
    def __init__(self):
        super().__init__()

        #In this case our input (x) has shape [2], output (y) has shape [1]
        #So we have made a network of [2, 5, 1] neurons per layer
        self.layer_1 = nn.Linear(in_features=2, out_features=5)
        self.layer_2 = nn.Linear(in_features=5, out_features=1)


       


    #forward propagation
    #by default, pytorch already made you one, we're overwriting it for the convenience of this case
    def forward(self, x):
        return self.layer_2(self.layer_1(x)) #x -> layer_1 -> layer_2
    

    #Gives back accuracy of training data
    def accuracy(self, predictions, labeled_data):
        correct = torch.eq(predictions, labeled_data).sum().item()
        total_samples = len(labeled_data)
        percentage_correct = (correct/total_samples) * 100
        return percentage_correct
    


#Throw model to target device
Model_Classification = Classification_lv1().to(device)

In [None]:
 #3. Define Loss Function and Optimizer inside init method

#This is binary cross entropy + sigmoid activations
loss_function = nn.BCEWithLogitsLoss()

#Good old stochastic gradient descent
optimizer = torch.optim.SGD(params=Model_Classification.parameters(), lr=0.01)

### The Training/Testing

#### Loop

We need to apply a sigmoid activation to squish our outputs between 0 and 1, then a round function to turn the output into either 1 or 0 (on this circle or that circle) 

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs = 200

#Sending to cuda device
x_test, x_train, y_test, y_train = x_test.to(device), x_train.to(device), y_test.to(device), y_train.to(device)


for epoch in range(epochs):


    #Training
    Model_Classification.train()

    #Round the output predictions
    y_predictions_s = Model_Classification(x_train).squeeze()
    y_predictions = torch.round(torch.sigmoid(y_predictions_s))

    #So we calculate something additional, the percentage of correct predictions 
    train_loss = loss_function(y_predictions_s, y_train)
    percentage_correct = Model_Classification.accuracy(y_predictions, y_train)

    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()



    #Testing
    Model_Classification.eval()

    with torch.inference_mode():
        y_test_predictions_s = Model_Classification(x_test).squeeze()
        y_test_predictions = torch.round(torch.sigmoid(y_test_predictions_s))

        test_loss = loss_function(y_test_predictions_s, y_test)
        test_percentage_correct = Model_Classification.accuracy(y_test_predictions, y_test)

    
    
    #Feedback

    if epoch % 10 == 0:
        print(f"Epoch: {epoch} | Loss: {train_loss:.5}, Accuracy: {percentage_correct}%")


#### Visualization

Don't worry about what this code is doing, just know it helps us visualize why learning sucks right now

In [None]:
import requests
from pathlib import Path 

# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download")
else:
  print("Downloading helper_functions.py")
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

from helper_functions import plot_decision_boundary

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(Model_Classification, x_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(Model_Classification, x_test, y_test)

## The Improving

So what can we do when our model isn't performing how we like it?

1. More Training
2. More hidden layers & neurons
3. Change Optimizers
4. Change Activation Functions
5. Alter Hyper parameters (learning rate, regularization rate etc)

A neural network can mimic any function in any dimension, as long as you give it enough variables to mimic the function

But why mimic functions? It's finding patterns to solving problems practically, in a way that we cannot understand<br>
There's linear functions, and non linear function in a neural network

For more please refer to http://neuralnetworksanddeeplearning.com/chap4.html

# Lv2 Network

## The Network

The following changes will be made for improving performance on the data<br>
The addition of RELU will allow the model to handle non linear data

1. More Neurons Per Hidden Layer `5 -> 10`
2. More Hidden Layers `2 -> 4`
3. Every Hidden layer Add `RELU` 

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
class Classification_lv2(nn.Module):


    def __init__(self):
        super().__init__()

        self.layer_1 = nn.Linear(in_features=2, out_features=10)
        self.layer_2 = nn.Linear(in_features=10, out_features=10)
        self.layer_3 = nn.Linear(in_features=10, out_features=1)

        self.ReLU = nn.ReLU()
        


    #Later down the road, we can throw all this in a for loop, with all the layers in an iterable list
    #Because right now manually doing a forward pass, and defining what layers use which activation functions... kinda dumb
    def forward(self, x):
        return self.layer_3(self.ReLU(self.layer_2(self.ReLU(self.layer_1(x)))))
    

    def accuracy(self, predictions, labeled_data):
        correct = torch.eq(predictions, labeled_data).sum().item()
        total_samples = len(labeled_data)
        percentage_correct = (correct/total_samples) * 100
        return percentage_correct
    


#Throw model to target device
Model_Classification_Upgrade = Classification_lv2().to(device)

In [None]:
#setting loss function and optimizer
loss_function = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(params=Model_Classification_Upgrade.parameters(), lr=0.1)

## The Training/Testing

Actually it's the same code lol, except we train 2000 epochs and not 200

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

epochs = 2000

#Sending to cuda device
x_test, x_train, y_test, y_train = x_test.to(device), x_train.to(device), y_test.to(device), y_train.to(device)


for epoch in range(epochs):


    #Training
    Model_Classification_Upgrade.train()

    #1. Forward Pass, round the output predictions
    y_predictions_s = Model_Classification_Upgrade(x_train).squeeze()
    y_predictions = torch.round(torch.sigmoid(y_predictions_s))

    #2. Calculate the loss, with something additional, the percentage of correct predictions 
    train_loss = loss_function(y_predictions_s, y_train)
    percentage_correct = Model_Classification_Upgrade.accuracy(y_predictions, y_train)

    #3. Zero Grad, Back Propagation, Gradient Descent
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()



    #Testing
    Model_Classification_Upgrade.eval()

    with torch.inference_mode():
        y_test_predictions_s = Model_Classification_Upgrade(x_test).squeeze()
        y_test_predictions = torch.round(torch.sigmoid(y_test_predictions_s))

        test_loss = loss_function(y_test_predictions_s, y_test)
        test_percentage_correct = Model_Classification_Upgrade.accuracy(y_test_predictions, y_test)

    
    
    #Feedback

    if epoch % 100 == 0:
        print(f"Epoch: {epoch} | Train Loss: {train_loss:.5f}, Train Accuracy: {percentage_correct:.5f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_percentage_correct:.5f}%")

## Visualization

Don't worry about what this code is doing, just know it helps us visualize why learning sucks right now

In [None]:
import requests
from pathlib import Path 

# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download")
else:
  print("Downloading helper_functions.py")
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

from helper_functions import plot_decision_boundary

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title("Train")
plot_decision_boundary(Model_Classification_Upgrade, x_train, y_train)
plt.subplot(1, 2, 2)
plt.title("Test")
plot_decision_boundary(Model_Classification_Upgrade, x_test, y_test)

## The Save/Load

Basically the same code for Save and Load

In [None]:
#Saving

from pathlib import Path


#Create directory
model_path = Path("models")
model_path.mkdir(parents=True, exist_ok=True)

#Create saving path, usually pytorch files are called "pth"
model_name = "pytorch_classification_model_lv2.pth"
model_save_path = model_path / model_name

#Saving the state dict
torch.save(obj=Classification_lv2.state_dict(),
           f=model_save_path)

In [None]:
#Loading

#we'll need to create a new model and load the saved state_dict() into the new model
cooler_model = Classification_lv2()

#loading the saved state dict from the new model, with torch.load()
cooler_model.load_state_dict(torch.load(f=model_save_path))