|<h2>Course:</h2>|<h1><a href="https://udemy.com/course/dulm_x/?couponCode=202509" target="_blank">A deep understanding of AI language model mechanisms</a></h1>|
|-|:-:|
|<h2>Part 2:</h2>|<h1>Large language models<h1>|
|<h2>Section:</h2>|<h1>Build a GPT<h1>|
|<h2>Lecture:</h2>|<h1><b>Randomly sampling words with torch.multinomial<b></h1>|

<br>

<h5><b>Teacher:</b> Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h5>
<h5><b>Course URL:</b> <a href="https://udemy.com/course/dulm_x/?couponCode=202509" target="_blank">udemy.com/course/dulm_x/?couponCode=202509</a></h5>
<i>Using the code without the course may lead to confusion or errors.</i>

In [None]:
import torch
import torch.nn.functional as F
import numpy as np

# Generate a sample from a vector

In [None]:
# a vector (must be tensor)
vect = torch.tensor([1,2,5],dtype=torch.float)

# sample a number (?)
torch.multinomial(vect,1)

# Some errors you might encounter

In [None]:
# requires torch tensor
torch.multinomial([1.,2,.3],1)
torch.multinomial(np.array([1.,2,.3]),1)

In [None]:
# default is no replacement
torch.multinomial(vect,len(vect)+1)

In [None]:
# only floats
torch.multinomial(torch.tensor([1,1,1]),1)

In [None]:
# only non-negative numbers
torch.multinomial(torch.tensor([-1,1.,1]),1)

# Generate many samples from the same vector

In [None]:
# sample 10 times from that vector
vect[torch.multinomial(vect,10,replacement=True)]

In [None]:
# 10k samples!
mn = torch.multinomial(vect,10000,replacement=True)

# collect the distribution
vals,counts = np.unique(mn,return_counts=True)

# print the output values and how often they occurred
for v,c in zip(vals,counts):
  print(f'"{v}" was sampled {c} times ({c*100/len(mn):.2f}%)')

In [None]:
# treat the vector as if it contains (scaled) probability values

# again with more information
for v,c,vectval in zip(vals,counts,vect):

  observedFrequency = c*100/len(mn)
  expectedFrequency = vectval*100/torch.sum(vect)

  print(f'"{v}" was sampled {c} times. That is {observedFrequency:.2f}%, and the expected probability is {expectedFrequency}%')

# With softmaxification

In [None]:
# softmax the vector
vectSoftmax = F.softmax(vect,dim=-1)

# new reporting
for v,c,vectval in zip(vals,counts,vectSoftmax):

  observedFrequency = c*100/len(mn)

  print(f'"{v}" was sampled {c:4} times. That is {observedFrequency:5.2f}%, and the softmax probability is {vectval*100:5.2f}%')

In [None]:
# now sampling from the softmax vector
mn = torch.multinomial(vectSoftmax,10000,replacement=True)
vals,counts = np.unique(mn,return_counts=True)

# new reporting
for v,c,vectval in zip(vectSoftmax,counts,vectSoftmax):

  observedFrequency = c*100/len(mn)

  print(f'"{v:.4f}" was sampled {c:4} times. That is {observedFrequency:5.2f}%, and the softmax probability is {vectval*100:5.2f}%')

# Comparison with numpy.random.choice

In [None]:
np.random.choice(vect,10)

In [None]:
# sample lots of values
mn = np.random.choice(vect,10000,replace=True)

# report the results
vals,counts = np.unique(mn,return_counts=True)
for v,c,vectval in zip(vals,counts,vect):

  observedFrequency = c*100/len(mn)
  expectedFrequency = 1*100/len(vect)

  print(f'"{v}" was sampled {c} times. That is {observedFrequency:.2f}%, and the expected probability is {expectedFrequency:.2f}%')

In [None]:
### getting np.random.choice to match multinomial's function

# define probabilities (weights for selection)
probvalues = vect/sum(vect)


# sample lots of values
mn = np.random.choice(vect,10000,replace=True,p=probvalues)

# report the results
vals,counts = np.unique(mn,return_counts=True)
for v,c,p in zip(vals,counts,probvalues):

  observedFrequency = c*100/len(mn)
  expectedFrequency = p*100

  print(f'"{v}" was sampled {c} times. That is {observedFrequency:.2f}%, and the expected probability is {expectedFrequency:.2f}%')