|<h2>Course:</h2>|<h1><a href="https://udemy.com/course/dulm_x/?couponCode=202509" target="_blank">A deep understanding of AI language model mechanisms</a></h1>|
|-|:-:|
|<h2>Part 2:</h2>|<h1>Large language models<h1>|
|<h2>Section:</h2>|<h1>Pretrain LLMs<h1>|
|<h2>Lecture:</h2>|<h1><b>Create custom loss functions<b></h1>|

<br>

<h5><b>Teacher:</b> Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h5>
<h5><b>Course URL:</b> <a href="https://udemy.com/course/dulm_x/?couponCode=202509" target="_blank">udemy.com/course/dulm_x/?couponCode=202509</a></h5>
<i>Using the code without the course may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

# vector plots
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

# Check the pytorch loss functions

In [None]:
nn.NLLLoss??

# Create and test custom loss functions

In [None]:
# general form for a loss function (actually a class)
class myLoss_L1(nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self,yHat,y):

    # L1 loss
    l = torch.mean( torch.abs(yHat-y) )

    # correlation error (just as an example possibilities)
    #l = 1 - torch.corrcoef(yHat,y)

    return l


class myLoss_L2(nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self,yHat,y):

    # mean-squared error
    l = torch.mean( (yHat-y)**2 )
    return l

In [None]:
# create intances of the loss classes
lossfun1 = myLoss_L1()
lossfun2 = myLoss_L2()

# test values
predicted_value = torch.tensor(2.)
target_value = 5

# report
print(f'Model output = {predicted_value}')
print(f'target value = {target_value}\n')
print(f'L1 loss: {lossfun1(predicted_value,target_value)}')
print(f'L2 loss: {lossfun2(predicted_value,target_value)}')

# Demonstrate in a simple example

In [None]:
# initialize weight value
w1 = torch.tensor([2.], requires_grad=True)
w2 = torch.tensor([2.], requires_grad=True)

# target value
target = torch.tensor([3.])

# create the optimizers
learningrate = .05
optimizer1 = torch.optim.SGD([w1],lr=learningrate)
optimizer2 = torch.optim.SGD([w2],lr=learningrate)

# number of training iterations
numIters = 50

# initialize results matrices
all_losses  = np.zeros((2,numIters))
all_weights = np.zeros((2,numIters+1))
all_weights[:,0] = w1.item()


# training loop
for i in range(numIters):

  ## train weight w1
  optimizer1.zero_grad()
  loss1 = lossfun1(w1,target)
  loss1.backward()
  optimizer1.step()

  # store the losses and updated weight value
  all_losses[0,i] = loss1.item()
  all_weights[0,i+1] = w1.item()



  ## train weight w2
  optimizer2.zero_grad()
  loss2 = lossfun2(w2,target)
  loss2.backward()
  optimizer2.step()

  # store the losses and updated weight value
  all_losses[1,i] = loss2.item()
  all_weights[1,i+1] = w2.item()


In [None]:
_,axs = plt.subplots(1,2,figsize=(12,4))

# plot the losses
axs[0].plot(range(1,numIters+1),all_losses[0,:],'ko-',linewidth=.5,markerfacecolor=[.7,.7,.9],label='L1 loss')
axs[0].plot(range(1,numIters+1),all_losses[1,:],'ks-',linewidth=.5,markerfacecolor=[.7,.9,.7],label='L2 loss')

# plot the weight
axs[1].plot(range(0,numIters+1),all_weights[0,:],'ko-',linewidth=.5,markerfacecolor=[.7,.7,.9],label='L1 loss')
axs[1].plot(range(0,numIters+1),all_weights[1,:],'ks-',linewidth=.5,markerfacecolor=[.7,.9,.7],label='L2 loss')
axs[1].axhline(target,linestyle='--',color=[.7,.7,.7],zorder=-10,label='Target')


# add legends
axs[0].legend()
axs[1].legend()

# stylize the plots
axs[0].set(title='Losses',ylabel='loss',xlabel='Training epoch')
axs[1].set(title='Weight',ylabel='Weight value',xlabel='Training epoch')

plt.tight_layout()
plt.show()