<a href="https://colab.research.google.com/github/Jaykef/dummyGPT/blob/main/dummyGPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the GPT model
class dummyGPT(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size, num_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.transformer = nn.Transformer(hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, src, tgt):
        src = self.embedding(src)
        tgt = self.embedding(tgt)
        out = self.transformer(src, tgt)
        out = self.fc(out)
        return out

# Define the dataset
data = ["The quick brown fox jumps over the lazy dog.",
        "The quick brown fox jumps over the lazy dog again.",
        "The quick brown fox jumps over the lazy dog one more time.",
        "The quick brown fox jumps over the lazy dog once more.",]

# Convert text to numerical data
word_to_index = {}
index_to_word = {}
for sentence in data:
    for word in sentence.split():
        if word not in word_to_index:
            index = len(word_to_index)
            word_to_index[word] = index
            index_to_word[index] = word

X = torch.tensor([word_to_index[word] for sentence in data for word in sentence.split()[:-1]])
Y = torch.tensor([word_to_index[word] for sentence in data for word in sentence.split()[1:]])

# Define model parameters
vocab_size = len(word_to_index)
embedding_size = 128
hidden_size = 128
num_layers = 1

# Define the model, loss function, and optimizer
model = dummyGPT(vocab_size, embedding_size, hidden_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
for epoch in range(100):
    optimizer.zero_grad()
    outputs = model(X, Y)
    loss = criterion(outputs.view(-1, vocab_size), Y)
    loss.backward()
    optimizer.step()
    print("Epoch {}: loss={}".format(epoch, loss.item()))

# Generate text given a prompt
prompt = "The quick brown"
prompt_tensor = torch.tensor([word_to_index[word] for word in prompt.split()])
output = model(prompt_tensor.unsqueeze(0), prompt_tensor.unsqueeze(0))
next_word_index = torch.argmax(output[-1]).item()
next_word = index_to_word.get(next_word_index, "<unk>")
print(prompt + " " + next_word)


Epoch 0: loss=2.8905832767486572
Epoch 1: loss=2.654731035232544
Epoch 2: loss=2.8623907566070557
Epoch 3: loss=2.5315873622894287
Epoch 4: loss=2.570763349533081
Epoch 5: loss=2.5244534015655518
Epoch 6: loss=2.300091505050659
Epoch 7: loss=1.7719885110855103
Epoch 8: loss=2.319387435913086
Epoch 9: loss=1.6609715223312378
Epoch 10: loss=1.0500911474227905
Epoch 11: loss=0.8445582389831543
Epoch 12: loss=1.1472954750061035
Epoch 13: loss=0.6941309571266174
Epoch 14: loss=0.37709322571754456
Epoch 15: loss=0.3787277340888977
Epoch 16: loss=0.2671932280063629
Epoch 17: loss=0.23273031413555145
Epoch 18: loss=0.1989457905292511
Epoch 19: loss=0.20245017111301422
Epoch 20: loss=0.1466311514377594
Epoch 21: loss=0.10781830549240112
Epoch 22: loss=0.10073105245828629
Epoch 23: loss=0.08664675801992416
Epoch 24: loss=0.0751519650220871
Epoch 25: loss=0.07154914736747742
Epoch 26: loss=0.06905681639909744
Epoch 27: loss=0.050838619470596313
Epoch 28: loss=0.04459851607680321
Epoch 29: loss=0.