## Bigram model exploration

In [2]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

In [59]:
class BigramModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.embedding_lookup = nn.Embedding(vocab_size, vocab_size)
    def forward(self, x, targets):
        logits = self.embedding_lookup(x)
        B, T, C = logits.shape
        logits = logits.view(B * T, C)
        targets = targets.view(B * T)
        loss = F.cross_entropy(logits, targets)
        return logits, loss
    def generate(self, x, max_new_tokens):
        for _ in range(max_new_tokens):
            logits = self.embedding_lookup(x)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            x_next = torch.multinomial(probs, num_samples=1)
            x = torch.cat((x, x_next), dim=-1)
        return x



In [60]:
vocab_size = 15

In [61]:
# Batch of 2, sequence length of 5
xb = torch.tensor([[1,2,3,4,5], [6,7,8,9,10]])

# Batch of 2, sequence length of 5
yb = torch.tensor([[2,3,4,5,6], [7,8,9,10,11]])

print(xb.shape)
print(yb.shape)


torch.Size([2, 5])
torch.Size([2, 5])


In [62]:
model = BigramModel(vocab_size)
logits, loss = model(xb, yb)

print(logits.shape)
print(loss)


torch.Size([10, 15])
tensor(3.5817, grad_fn=<NllLossBackward0>)


In [66]:
# Generate
# Starting is [0]
# Input is shape [B, T]
model.generate(torch.tensor([[0]]), 10)

tensor([[ 0, 11,  3,  0, 14, 12, 10,  4,  5,  3,  8]])