|<h2>Course:</h2>|<h1><a href="https://udemy.com/course/dulm_x/?couponCode=202509" target="_blank">A deep understanding of AI language model mechanisms</a></h1>|
|-|:-:|
|<h2>Part 8:</h2>|<h1>Deep learning introduction<h1>|
|<h2>Section:</h2>|<h1>Essence of deep learning modeling<h1>|
|<h2>Lecture:</h2>|<h1><b>Backprop in PyTorch<b></h1>|

<br>

<h5><b>Teacher:</b> Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h5>
<h5><b>Course URL:</b> <a href="https://udemy.com/course/dulm_x/?couponCode=202509" target="_blank">udemy.com/course/dulm_x/?couponCode=202509</a></h5>
<i>Using the code without the course may lead to confusion or errors.</i>

In [None]:
import torch
import numpy as np

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

# Setup the optimization problem

In [None]:
# Define the function we want to minimize
def fx(x):
  return 3 * x**2 - 2 * x + torch.pi

In [None]:
# visualize the "loss function" and the optimal weight

plt.figure(figsize=(10,4))

# x-axis grid
x = torch.linspace(-2,2,301)

# plot the function
plt.plot(x,fx(x),color=[.9,.7,.9],linewidth=3,label='$f(x)$')

# finish the plot
plt.gca().set(xlim=x[[0,-1]],xlabel='$x$',ylabel='$f(x)$')
plt.grid(color=[.9,.9,.9])
plt.axvline(1/3,linestyle=':',color='b',linewidth=3,
            zorder=-4,label='Exact minimum\n(optimal weight)')
plt.legend()

plt.show()

# Create a "model" (just one weight)

In [None]:
# initialize x as a tensor with requires_grad=True to compute gradients
w = torch.tensor([-1.0], requires_grad=True)

# Training parameters

In [None]:
# number of training runs
training_epochs = 80

# learning rate
learningRate = .01

# SGD (stochastic gradient descent) optimizer
optimizer = torch.optim.SGD([w], lr=learningRate)

# Train the model!

In [None]:
localmin = torch.zeros(training_epochs)
losses = np.zeros(training_epochs)

# training loop
for step in range(training_epochs):

  # step 1: clear previous gradients
  optimizer.zero_grad()

  # step 2: Compute and store the loss (value of the function)
  loss = fx(w)

  # step 3: Compute the gradient of loss with respect to x
  loss.backward()

  # step 4: Update x using the gradient
  optimizer.step()

  # step 5a: store this epoch's loss and weight value
  localmin[step] = w.item()
  losses[step] = loss.item()

  # step 5b: update progress
  if step%10 == 0:
    print(f"Step {step+1:2d}, loss = {loss.item():.4f}")

# Final result
print(f"\nMinimum found at x = {w.item():.4f}")

# Training is over; plot the losses

In [None]:
plt.figure(figsize=(10,4))
plt.plot(losses,'ks-',markerfacecolor=[.7,.7,.7])
plt.gca().set(xlabel='Epoch',ylabel='Loss')
plt.grid(color=[.9,.9,.9])
plt.show()

In [None]:
localmin

# Show the results

In [None]:
plt.figure(figsize=(10,4))

# x-axis grid
x = torch.linspace(-2,2,301)

# plot the function
plt.plot(x,fx(x),color=[.9,.7,.9],linewidth=3,label='$f(x)$')

# and the local minima
marksizes = np.linspace(15,3,training_epochs)
markcolors = np.linspace(1,0,training_epochs)
for i in range(0,training_epochs,5):
  c = markcolors[i]
  s = marksizes[i]
  plt.plot(localmin[i],fx(localmin[i]),'ko',markersize=s,markerfacecolor=[c,c,c])


# finish the plot
plt.gca().set(xlim=x[[0,-1]],xlabel='$x$',ylabel='$f(x)$')
plt.grid(color=[.9,.9,.9])
plt.axvline(1/3,linestyle=':',color='b',linewidth=3,
            zorder=-4,label='Exact minimum\n(optimal weight)')
plt.legend()

plt.show()

# Inspect the weights

In [None]:
dir(w)

In [None]:
w.grad