In [84]:
import torch
import torch.nn as nn
import numpy as np



In [85]:
# Define the Large Language Model (LLM) class
class LLM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(LLM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.lstm(embedded, hidden)
        output = self.fc(output)
        return output, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.num_layers, batch_size, self.hidden_dim).zero_(),
                  weight.new(self.num_layers, batch_size, self.hidden_dim).zero_())
        return hidden

    

In [86]:
# Define a function to train the LLM
def train_llm(llm, data, optimizer, criterion, num_epochs, batch_size):
    llm.train()
    for epoch in range(num_epochs):
        hidden = llm.init_hidden(batch_size)
        for i in range(0, len(data) - batch_size, batch_size):
            inputs = torch.tensor(data[i:i+batch_size]).unsqueeze(0)
            targets = torch.tensor(data[i+1:i+batch_size+1])

            optimizer.zero_grad()  # Clear gradients
            output, hidden = llm(inputs, hidden)
            loss = criterion(output.view(-1, vocab_size), targets.view(-1))
            loss.backward(retain_graph=True)  # Specify retain_graph=True
            optimizer.step()

            # Detach hidden state tensors
            hidden = (hidden[0].detach(), hidden[1].detach())

            if (i+1) % 1000 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(data)}], Loss: {loss.item()}')



In [None]:

# Define some hyperparameters
vocab_size = 10000
embedding_dim = 256
hidden_dim = 512
num_layers = 2
num_epochs = 5
batch_size = 1  # Set batch size to 1 for inference

# Generate some random text data
data = np.random.randint(0, vocab_size, size=(10000,))

# Create the LLM instance
llm = LLM(vocab_size, embedding_dim, hidden_dim, num_layers)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(llm.parameters())

# Train the LLM
train_llm(llm, data, optimizer, criterion, num_epochs, batch_size)


In [None]:
# Set initial input for text generation
initial_input = np.random.randint(0, vocab_size)

# Generate text using the trained LLM
generated_text = generate_text(llm, initial_input)

# Convert token indices to actual words (assuming you have a reverse mapping)
reverse_token_mapping = {index: word for word, index in tokenizer.get_vocab().items()}
generated_words = [reverse_token_mapping[token] for token in generated_text]

# Print the generated text
print('Generated Text:')
print(' '.join(generated_words))