<a href="https://colab.research.google.com/github/NotBizzaark/AI-AlGORITHMS/blob/main/BigramModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [2]:
# Dataset
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt

--2025-03-21 17:04:10--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘input.txt’


2025-03-21 17:04:10 (96.9 MB/s) - ‘input.txt’ saved [1115394/1115394]



In [3]:
with open('input.txt', 'r', encoding='utf-8') as f:
  text = f.read()

In [4]:
print(text[:1000])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for revenge.



In [5]:
chars = sorted(list(set(text)))
vocab_size = len(chars)

In [6]:
print(chars)
print(vocab_size)

['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
65


In [7]:
stoi = {ch:i for i, ch in enumerate(chars)}
itos = {i:ch for i, ch in enumerate(chars)}

encode = lambda s: [stoi[c] for c in s]
decode = lambda l: "".join(itos[i] for i in l)

In [8]:
print(encode("hello"))

[46, 43, 50, 50, 53]


In [9]:
encoded = torch.tensor(encode(text), dtype=torch.long)
print(encoded)

tensor([18, 47, 56,  ..., 45,  8,  0])


In [10]:
inputs = encoded[:-1]
targets = encoded[1:]

In [11]:
seq_length = 5
batch_size = len(text)

input_tokens = torch.randint(0, vocab_size, (batch_size, seq_length))
target_tokens = torch.randint(0, vocab_size, (batch_size, seq_length))

In [53]:
class BigramModel(nn.Module):
  def __init__(self, vocab_size):
    super().__init__()
    self.embedding_table = nn.Embedding(vocab_size, vocab_size)

  def forward(self, index, targets=None):
    logits = self.embedding_table(index)

    if targets == None:
      return logits

    B, T, V = logits.shape
    logits = logits.reshape(B*T, V)
    targets = targets.reshape(B * T)

    loss = F.cross_entropy(logits, targets)

    return logits, loss

In [13]:
model = BigramModel(vocab_size)

In [14]:
logits, loss = model(input_tokens, target_tokens)

In [15]:
loss.item()

4.636580944061279

In [16]:
optimizer = torch.optim.Adam(model.parameters(), lr =0.001)

In [17]:
import time
start_time = time.time()
torch.manual_seed(1337)

epochs = 1000
losses = []

for i in range(epochs):
  optimizer.zero_grad()
  logit, loss = model(input_tokens, target_tokens)
  loss.backward()
  optimizer.step()

  if i%50 == 0:
    print(f"Epochs {i}, Loss: {loss.item()}")

end_time = time.time()

print(f"Training Complete in {(end_time-start_time)/60} minutes!!!")



Epochs 0, Loss: 4.636580944061279
Epochs 50, Loss: 4.601165294647217
Epochs 100, Loss: 4.568545341491699
Epochs 150, Loss: 4.5385212898254395
Epochs 200, Loss: 4.5108442306518555
Epochs 250, Loss: 4.48530387878418
Epochs 300, Loss: 4.4617204666137695
Epochs 350, Loss: 4.439933776855469
Epochs 400, Loss: 4.419800758361816
Epochs 450, Loss: 4.401190280914307
Epochs 500, Loss: 4.383984088897705
Epochs 550, Loss: 4.368072509765625
Epochs 600, Loss: 4.3533549308776855
Epochs 650, Loss: 4.339742183685303
Epochs 700, Loss: 4.327147483825684
Epochs 750, Loss: 4.315495014190674
Epochs 800, Loss: 4.304712295532227
Epochs 850, Loss: 4.29473352432251
Epochs 900, Loss: 4.28549861907959
Epochs 950, Loss: 4.276950359344482
Training Complete in 118.59437011480331 minutes!!!


In [35]:
model = BigramModel(vocab_size)

In [66]:
def generate(model, start_token, max_length = 40):
  model.eval()
  idx = torch.tensor([[start_token]], dtype = torch.long)

  output_tokens = [start_token]

  for _ in range(max_length - 1):
    logits = model(idx)
    probs = torch.softmax(logits[:, -1, :], dim=-1)
    next_token = torch.multinomial(probs, num_samples=1)
    if next_token not in itos:
      next_token = start_token

    output_tokens.append(next_token)
    idx = torch.tensor([[next_token]], dtype=torch.long)

  return decode(output_tokens)

In [69]:
start_char = 'o'
start_token = stoi[start_char]

# Generate and print text
generated_text = generate(model, start_token, max_length=20)
print("Generated Text:", generated_text)

Generated Text: oooooooooooooooooooo
