In [None]:
import pickle,gzip,math,os,time,shutil,torch,matplotlib as mpl, numpy as np
from pathlib import Path
from torch import tensor
from fastcore.test import test_close
torch.manual_seed(42)

mpl.rcParams['image.cmap'] = 'gray'
torch.set_printoptions(precision=2, linewidth=125, sci_mode=False)
np.set_printoptions(precision=2, linewidth=125)

path_data = Path('data')
path_gz = path_data/'mnist.pkl.gz'
with gzip.open(path_gz, 'rb') as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')
x_train, y_train, x_valid, y_valid = map(tensor, [x_train, y_train, x_valid, y_valid])

### Cell 1: Importing Libraries and Loading Data
This cell performs several key tasks:
1. **Imports Libraries:** Libraries such as `pickle`, `gzip`, `torch` (PyTorch), `matplotlib`, and `numpy` are imported for data processing, machine learning, and plotting.
2. **Setting Up Environment:** Sets the random seed for PyTorch for reproducibility and configures matplotlib and print options for better data visualization.
3. **Loading Data:** Loads the MNIST dataset from a gzipped pickle file, which contains handwritten digit images, and splits it into training and validation sets.
4. **Converting to Tensors:** Converts the data into PyTorch tensors, which are multi-dimensional arrays optimized for machine learning operations.



In [None]:
n,m = x_train.shape
c = y_train.max()+1
n,m,c

### Cell 2: Defining Dimensions
This cell extracts dimensions of the dataset:
1. **Shape of Training Data:** Retrieves the number of samples (`n`) and features (`m`) in the training data.
2. **Number of Classes:** Calculates the number of output classes (`c`) based on the maximum label in `y_train`.



In [None]:
# num hidden
nh = 50

### Cell 3: Setting the Number of Hidden Units
This cell sets the number of neurons (`nh`) in the hidden layer of the neural network. These neurons are intermediate processing units that help the network learn complex patterns.



In [None]:
w1 = torch.randn(m,nh)
b1 = torch.zeros(nh)
w2 = torch.randn(nh,1)
b2 = torch.zeros(1)

### Cell 4: Initializing Weights and Biases
This cell initializes the weights and biases for two layers of a neural network:
1. **Weights and Biases for Layers:** Initializes weights (`w1`, `w2`) and biases (`b1`, `b2`) for the layers. Weights are randomly initialized, while biases are set to zero.
2. **Dimensions:** Sets the dimensions of weights and biases according to the network architecture.



In [None]:
def lin(x, w, b): return x@w + b

### Cell 5: Defining a Linear Function
This cell defines a linear function `lin` used in the neural network. It performs a matrix multiplication of the input `x` with the weights `w` and adds the bias `b`. This represents a linear transformation in the network.



In [None]:
t = lin(x_valid, w1, b1)
t.shape

### Cell 6: Applying the Linear Function
This cell applies the linear transformation to the validation data:
1. **Linear Transformation:** Uses the `lin` function with the validation data `x_valid`, weights `w1`, and bias `b1`. This is the output of the first layer of the neural network.
2. **Output Shape:** Checks the shape of the transformed data `t` to ensure the operation was successful.



In [None]:
def relu(x): return x.clamp_min(0.)

### Cell 7: Defining the ReLU Activation Function
This cell defines the ReLU (Rectified Linear Unit) activation function:
1. **ReLU Function:** ReLU is defined as `f(x) = max(0, x)`. It introduces non-linearity to the network, allowing it to learn complex patterns.



In [None]:
t = relu(t)
t

### Cell 8: Applying ReLU to the Linear Transformation
This cell applies the ReLU activation function to the linear transformation:
1. **Applying ReLU:** The ReLU function is applied to the data `t`, which is the output of the first neural network layer.



In [None]:
def model(xb):
    l1 = lin(xb, w1, b1)
    l2 = relu(l1)
    return lin(l2, w2, b2)

### Cell 9: Defining the Neural Network Model
This cell defines the entire neural network model:
1. **Model Definition:** The `model` function takes input and applies two layers: a linear transformation followed by ReLU, and then another linear transformation.



In [None]:
res = model(x_valid)
res.shape

### Cell 10: Testing the Model on Validation Data
This cell tests the neural network model on the validation data:
1. **Model Testing:** The validation data `x_valid` is passed through the model to obtain the output `res`.
2. **Result Shape:** Checks the shape of `res` to understand the output structure of the network.



In [None]:
res.shape,y_valid.shape

### Cell 11: Checking Shapes of Model Output and Labels
This cell checks the dimensions of the model output `res` and the validation labels `y_valid` to ensure compatibility for further calculations.



In [None]:
(res-y_valid).shape

### Cell 12: Shape of Difference between Output and Labels
Calculates and checks the shape of the difference between the model output and validation labels, a step typically part of loss computation in models.



In [None]:
res[:,0].shape

### Cell 13: Reshaping Model Output
Reshapes the model output `res` to match the dimensionality of the labels, which is crucial for comparison or loss calculations.



In [None]:
res.squeeze().shape

### Cell 14: Squeezing Model Output
Applies the `squeeze` method to `res` to remove dimensions of size 1, simplifying operations like loss calculation.



In [None]:
(res[:,0]-y_valid).shape

### Cell 15: Shape of Difference after Adjusting Dimensions
After adjusting the dimensions of `res`, calculates the difference with `y_valid` again to ensure accurate loss computation.



In [None]:
y_train,y_valid = y_train.float(),y_valid.float()

preds = model(x_train)
preds.shape

### Cell 16: Preprocessing Labels and Getting Predictions
Converts labels to floating-point numbers and gets predictions from the model using training data. Also checks the shape of the predictions.



In [None]:
def mse(output, targ): return (output[:,0]-targ).pow(2).mean()

### Cell 17: Defining Mean Squared Error Function
Defines the Mean Squared Error (MSE) function, a common loss function used in regression tasks.



In [None]:
mse(preds, y_train)

### Cell 18: Calculating MSE for Predictions
Computes the MSE of the model's predictions against the actual training labels.



In [None]:
from sympy import symbols,diff
x,y = symbols('x y')
diff(x**2, x)

### Cell 19: Symbolic Differentiation with SymPy
Demonstrates symbolic differentiation using SymPy, a symbolic mathematics library.



In [None]:
diff(3*x**2+9, x)

### Cell 20: Another Example of Symbolic Differentiation
Performs symbolic differentiation on a polynomial expression.



In [None]:
def lin_grad(inp, out, w, b):
    # grad of matmul with respect to input
    inp.g = out.g @ w.t()
    w.g = (inp.unsqueeze(-1) * out.g.unsqueeze(1)).sum(0)
    b.g = out.g.sum(0)

### Cell 21: Defining Gradient for Linear Layer
Defines a function to compute gradients for a linear layer, an essential part of backpropagation in neural networks.



In [None]:
def forward_and_backward(inp, targ):
    # forward pass:
    l1 = lin(inp, w1, b1)
    l2 = relu(l1)
    out = lin(l2, w2, b2)
    diff = out[:,0]-targ
    loss = diff.pow(2).mean()
    
    # backward pass:
    out.g = 2.*diff[:,None] / inp.shape[0]
    lin_grad(l2, out, w2, b2)
    l1.g = (l1>0).float() * l2.g
    lin_grad(inp, l1, w1, b1)

### Cell 22: Forward and Backward Pass
Implements the forward and backward passes of the neural network, including loss calculation and backpropagation.



In [None]:
forward_and_backward(x_train, y_train)

### Cell 23: Running Forward and Backward Pass
Executes the forward and backward pass functions using training data and labels.



In [None]:
# Save for testing against later
def get_grad(x): return x.g.clone()
chks = w1,w2,b1,b2,x_train
grads = w1g,w2g,b1g,b2g,ig = tuple(map(get_grad, chks))

### Cell 24: Storing Gradients for Testing
Saves the gradients of weights, biases, and input for later comparison or testing.



In [None]:
def mkgrad(x): return x.clone().requires_grad_(True)
ptgrads = w12,w22,b12,b22,xt2 = tuple(map(mkgrad, chks))

### Cell 25: Preparing Variables for PyTorch Gradient Calculation
Creates copies of variables with gradient tracking enabled for use with PyTorch's automatic differentiation.



In [None]:
y_train,y_valid = y_train.float(),y_valid.float()

preds = model(x_train)
preds.shape

### Cell 16: Preprocessing Labels and Getting Predictions
Converts labels to floating-point numbers for loss calculations and gets model predictions from the training data.



In [None]:
def mse(output, targ): return (output[:,0]-targ).pow(2).mean()

### Cell 17: Defining Mean Squared Error Function
Defines the Mean Squared Error (MSE) function, a common loss function in regression tasks.



In [None]:
mse(preds, y_train)

### Cell 18: Calculating MSE for Predictions
Computes the MSE for the model's predictions against the actual training labels.



In [None]:
from sympy import symbols,diff
x,y = symbols('x y')
diff(x**2, x)

### Cell 19: Symbolic Differentiation with SymPy
Performs symbolic differentiation of `x**2` with respect to `x` using SymPy.



In [None]:
diff(3*x**2+9, x)

### Cell 20: Another Example of Symbolic Differentiation
Calculates the derivative of the polynomial `3*x**2 + 9` with respect to `x`.



In [None]:
def lin_grad(inp, out, w, b):
    # grad of matmul with respect to input
    inp.g = out.g @ w.t()
    w.g = (inp.unsqueeze(-1) * out.g.unsqueeze(1)).sum(0)
    b.g = out.g.sum(0)

### Cell 21: Defining Gradient for Linear Layer
Defines a function to compute gradients for a linear layer during backpropagation.



In [None]:
def forward_and_backward(inp, targ):
    # forward pass:
    l1 = lin(inp, w1, b1)
    l2 = relu(l1)
    out = lin(l2, w2, b2)
    diff = out[:,0]-targ
    loss = diff.pow(2).mean()
    
    # backward pass:
    out.g = 2.*diff[:,None] / inp.shape[0]
    lin_grad(l2, out, w2, b2)
    l1.g = (l1>0).float() * l2.g
    lin_grad(inp, l1, w1, b1)

### Cell 22: Forward and Backward Pass
Implements the forward and backward passes of the neural network for training.



In [None]:
forward_and_backward(x_train, y_train)

### Cell 23: Running Forward and Backward Pass
Executes the forward and backward passes with training data and labels.



In [None]:
# Save for testing against later
def get_grad(x): return x.g.clone()
chks = w1,w2,b1,b2,x_train
grads = w1g,w2g,b1g,b2g,ig = tuple(map(get_grad, chks))

### Cell 24: Storing Gradients for Testing
Stores the gradients of weights, biases, and input for later comparison or testing.



In [None]:
def mkgrad(x): return x.clone().requires_grad_(True)
ptgrads = w12,w22,b12,b22,xt2 = tuple(map(mkgrad, chks))

### Cell 25: Preparing Variables for PyTorch Gradient Calculation
Prepares tensors for PyTorch's automatic differentiation by enabling gradient tracking.

