<a href="https://colab.research.google.com/github/MatthewHawksbyGithub/pytorch-deep-learning/blob/main/01_Notebook_three.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import torch
from torch import nn #nn contains all of PyTorch's neural networks components.
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

#Acquire data.
#Convert into tensors.
#Build a model, or pick a pretrained model.
#Fit the model to the data and make a prediction.
#evaluate the model.
#Improving through experimentation.
#Save and reload your trained model.

#Exploring an example PyTorch end-to-end workflow.

#Let's recreate some known data using a linear regression formula.
#Making a straight line with known parameters. y = a +bx b
# is the weight and a is the bias.

weight = 0.7
bias = 0.3

#create
start = 0
end = 1
step = 0.02
X = torch.arange(start,end,step).unsqueeze(dim=1)
y = weight * X + bias

X[:10], y[:10], len(X), len(y)

### Splitting data into training and test sets
#Generalization is the ability for a machine learning model to perform
#well on data that it hasn't seen before.

#Training set, validation set and testing set.
#Training is 60-80
#Validation is 10-20, test set is 10-20.

#80 20 is the most common split.

train_split = int(0.8 * len(X))
x_train, y_train = X[:train_split], y[:train_split]
x_test, y_test = X[train_split:], y[train_split:]

#scikit learn has a train test split that includes randomness.

len(x_train), len(y_train), len(x_test), len(y_test)

#Let's visualize the data with matplotlib:

def plot_predictions(train_data=x_train,train_labels=y_train,test_data=x_test,
                     test_labels=y_test,predictions=None):
  plt.figure.Figure(figsize=(10,7))
  plt.scatter(train_data,train_labels,c="b",s=4,label="Training Data")
  plt.scatter(test_data,test_labels,c="g",s=4,label="Testing Data")
  if predictions is not None:
    plt.scatter(test_data,predictions,c="r",s=4,label="Predictions")
  plt.legend(prop={"size":14});

#plot_predictions()

#First PyTorch Model:
#Using a linear regression model.
#Create a linear regression model class.

class LinearRegressionModel (nn.Module):
  #<-- Almost everything inherits from nn.Module.
  def __init__(self):
    super().__init__()
    self.weights = nn.Parameter(torch.randn(1,
                                            requires_grad=True, #
                                            dtype=torch.float))
    self.bias = nn.Parameter(torch.randn(1,
                                         requires_grad=True,
                                         dtype=torch.float))
  def forward(self, x: torch.Tensor) -> torch.Tensor: #X is the input data.
      return self.weights * x + self.bias #The goal of this model is that
      #the model will start with random values for the weights and biases
      #and iteratively approach the known values for the weights and biases.
      #An algorithm called 'gradient descent'.

  #Two main algorithms: Gradient descent and backpropagation.

  #requires_Grad -> These algorithms are already implemented
  #torch.autograd

  #FORWARD DEFINES THE COMPUTATION MADE AT EVERY CALL!
  #IF YOU'RE GOING TO SUBCLASS nn.Module, you MUST HAVE
  #A FORWARD METHOD.

  ###PyTorch Model building essentials;

  #torch.nn - contains all the building blocks for computational graphs

  #torch.nn.Parameter - What parameters our model will try and learn.

  #torch.nn.Module - the base class for all neural network modules.

  #torch.optim - optimizers are here.
  #torchvision.models <=premade models for computer vision

  #torchmetrics
  #torch.utils.tensorboard

  #Create a random seed:
  torch.manual_seed(42)

  #Create an instance of the model.
model_0 = LinearRegressionModel()
print(model_0.state_dict())

#weights and bias are just random unaltered numbers right now.
#Predictive power is nil.

with torch.inference_mode():#Inference mode turns off gradient training.
  y_pred = model_0(x_test) #predictions are faster without inference mode.

#y_pred
#plot_predictions(predictions = y_pred)

## The whole idea of train is for a model to move some 'unknown' parameters
#(These may be random) to some 'known' parameters.

#Or from a poor-representation of the data, to a better representation of the data.

#How to measure how good or poor the representation is of the data?
#Use a loss function. "Minimizing the loss function"
#PyTorch has many loss functions built in.
#a loss function may also be called cost function or criterion.

#Mean absolute error.....
#Optimizer -> takes into account the loss of the model and adjusts the model's
#parameters(e.g. weights and biases)

#Setting up a loss function and optimizer in PyTorch

#for PyTorch we need a training loop and a testing loop.

#L1Loss
#SETTING UP A LOSS FUNCTION:

loss_fn = nn.L1Loss()

#Setup an optimizer i.e. SGD Stochastic Gradient Descent
#Tries random numbers repeatedly to minimize the loss function.

optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr = 0.01)


#A hyperparameter is a value that you would set yourself.
#Learning rate here affects the 'step' of the optimizer; smaller learning rates
#give a smaller step rate. Parameter will change faster with a higher lr.

### Building a training loop in PyTorch

#A couple of things we need in a training loop:
#0. Loop through the data.
#1. Forward Pass, uses the Forward() function in the model. "forward propagation"
#2. Calculate the loss. (compare forward pass predictions to ground truth labels)
#3. Optimizer zero-grad
#4. Loss Backwards - move backwards through the network to calculate the gradients
#of the parameters of our model with respect to the loss (BACKPROPAGATION)
#5. Optimizer step - use the optimizer to adjust our model's parameters to
#try and improve the loss. (GRADIENT DESCENT)
#WHAT IS GRADIENT? GRADIENT IS A SLOPE.

#A epoch is one loop through the data.
epochs = 100

epoch_count = []
loss_values = []
test_loss_values = []

#0.
for epoch in range(epochs): #epochs are a hyperparameter.
  model_0.train() #Must set the model to training mode. The other is evaluation mode.
  #1. Forward pass
  y_pred = model_0(x_train)
  #2. Recalculate the loss.
  loss = loss_fn(y_pred, y_train)
  #3. optimizer zero grad
  optimizer.zero_grad()
  #Backpropagation
  loss.backward()
  #Step the optimizer
  optimizer.step() #by default, how the optimizer changes will
  #accumulate through the loop. So the changes must be zeroed during
  #each loop iteration.
  model_0.eval()
  with torch.inference_mode():
    test_pred = model_0(x_test)
    test_loss = loss_fn(test_pred, y_test)
    if (epoch%10 == 0):
      epoch_count.append(epoch)
      loss_values.append(np.array(loss))
      test_loss_values.append(np.array(test_loss))
      #print(f"Epoch: {epoch} | Loss: {loss} | Test loss: {test_loss}")

#x test should predict y test.

#plot the loss curves

#plt.plot(epoch_count, loss_values, label = "Train loss")
#plt.plot(epoch_count, test_loss_values, label= "Test loss")
#plt.title("Training and test loss curves")
 #Can be automated later on.
 #Reducing loss

#Three methods of saving and loading a pytorch object;
#torch.save() uses Python's 'pickle' format.
#torch.load() loads a saved PyTorch object
#torch.nn.Module.load_state_dict() loads the model's saved state dictionary.

#state dict is likely less useful to look at
# when you have millions of parameters.

#The optimizer also has a state dict.

from pathlib import Path

#Can save from google colab to google drive.

MODEL_PATH = Path("Models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "01_pytorch_workflow_model_0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

MODEL_SAVE_PATH

#print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_0.state_dict(), f=MODEL_SAVE_PATH)

#load the state dict into a newly instantiated instance of our model class.

loaded_model_0 = LinearRegressionModel()

loaded_model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))
#will print with <All keys matched successfully>  ....

loaded_model_0.state_dict()

loaded_model_0.eval()
with torch.inference_mode():
  loaded_model_preds = loaded_model_0(x_test)

loaded_model_preds

import torch
from torch import nn
import matplotlib as plt
import numpy as np
import pandas as pd

device = "cuda" if torch.cuda.is_available() else "cpu"

weight = 0.8
bias = 0.2

#Trying to build a model that can estimate the above values.

start = 0
end = 1
step = 0.02

X = torch.arange(start, end, step).unsqueeze(dim=1)
y = weight * X + bias

#split data.

train_split = int(0.8 * len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]

len(X_train), len(y_train), len(X_test), len(y_test)

class LinearRegressionModelV2(nn.Module):
  def __init__(self):
    super().__init__()
    #Using nn.Linear() to create the model's parameters.
    #called: linear transform or probing layer, dense layer etc.
    self.linear_layer = nn.Linear(in_features = 1,
                                  out_features = 1)

  def forward(self, x: torch.Tensor) -> torch.Tensor:
    return self.linear_layer(x)

torch.manual_seed(42)
model_1 = LinearRegressionModelV2()
model_1, model_1.state_dict()

#PyTorch has many built-in mathematical transofrmations.

#Set the model to use the target device.

next(model_1.parameters()).device


model_1.to(device)
next(model_1.parameters()).device

###
#Training code.

#For training we need:
#Loss function
#Optimizer
#Training loop
#Testing loop
#


loss_fn = nn.L1Loss()

optimizer = torch.optim.SGD(params= model_1.parameters(), lr=0.01)


torch.manual_seed(42)

epochs = 200

#Put data on the target device( DEVICE AGNOSTIC CODE FOR DATA.)
#ALL OF THE THINGS YOU ARE COMPUTING WITH SHOULD BE ON THE SAME DEVICE.

x_train = x_train.to(device)
y_train = y_train.to(device)
x_test = x_test.to(device)
y_test = y_test.to(device)

for epoch in range(epochs):
  model_1.train()

  #1.FORWARD PASS
  y_pred = model_1(x_train)

  #2. Calculate the loss
  loss = loss_fn(y_pred, y_train)

  #3. Optimizer zero grad OTHERWISE THE OPTIMIZER WILL ACCUMULATE GRADIENTS.
  optimizer.zero_grad()

  #4. Perform backpropagation
  loss.backward()

  #5. Optimizer step
  optimizer.step()

  ### Testing:
  model_1.eval()
  with torch.inference_mode():
    test_pred = model_1(x_test)

    test_loss = loss_fn(test_pred, y_test)

  #Print out what's happening.

  if epoch % 10 == 0:
    print(f"Epoch: {epoch} | Loss: {loss} | Test loss: {test_loss}")
model_1.eval()
with torch.inference_mode():
  y_preds = model_1(x_test)
y_preds

plot_predictions(predictions=y_preds)


OrderedDict([('weights', tensor([0.3367])), ('bias', tensor([0.1288]))])
Epoch: 0 | Loss: 0.6161779165267944 | Test loss: 0.584976315498352
Epoch: 10 | Loss: 0.5009680986404419 | Test loss: 0.45026636123657227
Epoch: 20 | Loss: 0.3857582211494446 | Test loss: 0.3155565857887268
Epoch: 30 | Loss: 0.2705483138561249 | Test loss: 0.18084672093391418
Epoch: 40 | Loss: 0.15533843636512756 | Test loss: 0.04613689333200455
Epoch: 50 | Loss: 0.05875825136899948 | Test loss: 0.06886560469865799
Epoch: 60 | Loss: 0.04580377787351608 | Test loss: 0.0947304219007492
Epoch: 70 | Loss: 0.04181947931647301 | Test loss: 0.09405827522277832
Epoch: 80 | Loss: 0.03831038624048233 | Test loss: 0.08853326737880707
Epoch: 90 | Loss: 0.034879542887210846 | Test loss: 0.08094760030508041
Epoch: 100 | Loss: 0.03144557401537895 | Test loss: 0.07267506420612335
Epoch: 110 | Loss: 0.028006773442029953 | Test loss: 0.0644025206565857
Epoch: 120 | Loss: 0.024577608332037926 | Test loss: 0.056816864758729935
Epoch: 

AttributeError: ignored

In [23]:
import torch
from torch import nn #nn contains all of PyTorch's neural networks components.
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

def plot_predictions(train_data=x_train,train_labels=y_train,test_data=x_test,
                     test_labels=y_test,predictions=None):
  plt.figure(figsize=(10,7))
  plt.scatter(train_data,train_labels,c="b",s=4,label="Training Data")
  plt.scatter(test_data,test_labels,c="g",s=4,label="Testing Data")
  if predictions is not None:
    plt.scatter(test_data,predictions,c="r",s=4,label="Predictions")
  plt.legend(prop={"size":14});



model_1.eval()
with torch.inference_mode():
  y_preds = model_1(x_test)
y_preds

#SAVING AND LOADING A TRAINED MODEL.

from pathlib import Path

MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

MODEL_NAME = "01_pytorch_workflow_model_1.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

#Hyperparameters, epochs etc often written in all-caps

print(f"Saving model to: {MODEL_SAVE_PATH}")

torch.save(obj=model_1.state_dict(),
           f=MODEL_SAVE_PATH)





Saving model to: models/01_bytorch_workflow_model_1.pth


In [28]:
loaded_model_1 = LinearRegressionModelV2()

loaded_model_1.load_state_dict(torch.load(MODEL_SAVE_PATH))

loaded_model_1.to(device) #device agnostic code

next(loaded_model_1.parameters()).device

loaded_model_1.eval()
with torch.inference_mode():
  loaded_model_1_preds = loaded_model_1(x_test)

#ExtraCurriculars:
#01 extracurriculars.
#"What is backpropagation really doing by 3blue1brown"
#gradient descent vids by robert kwi and 3b1b
#what is torch.nn really?


tensor([[True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True]])

In [None]:
#"What is a classification problem?"
#"Spam or not spam" (Binary classification)
#imagenet dataset for computer vision
#imagenet 1k
#multi-label classification : "Categories on a wikipedia page"
#multi-class classification : "One of many different things"
# Architecture of a neural network classification model...
#Input shapes and output shapes of a classification model (FEATURES AND LABELS)
#Creating custom data to view fit on and predict on
#Steps in modelling:
  #Creating a model, setting a loss function and optimiser, creating a training
  #loop, evaluating the model.
#Saving and loading models
#"Harnessing the power of nonlinearity"
#Different Classification evaluation methods

#Classification INPUTS and OUTPUTS.

#224x224x3 WIDTH HEIGHT COLOR

#Above a certain probability: generate a label.
#Usually also batch_size.
#Different orderings may occur.
#Width and height are usually together.
#Batch_size is often 32. "minibatches" batch_size, colour_channels, width, height
#Looks at 32 images at a time.

#Architecture of a classification model:
#Input layer shape(in features) -> This is the same as the number of features.
#eg. age, sex, height, weight, etc.
#Minimum of one hidden layer.
#Neurons per hidden layer, Usually min 10 or max 512.
#Behind the scenes, PyTorch creates each node.
#One output node per class being detected.
#Hidden layer activation: ReLU is common; Rectified linear Unit.....
#Output activation; Sigmoid.
#loss function->depends on what type of classification task you are trying to do
# CODE ----->

#Neural network classification with PyTorch.
#All code is also recorded under Github Repo.

#

