In [1]:
import torch
from torch import nn
from models.recurrent_neuron_transformer import RecurrentNeuronTransformer
from tqdm import tqdm
DEVICE = "cuda"


In [3]:
def train_shakespeare_trainsformer(model, context_window, step_size, data_loader, optimizer, num_epochs, device=DEVICE, mask=False):
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        progress_bar = tqdm(data_loader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=False)

        
        for chunk in progress_bar:
            hidden_layers = dict()
            for i in range(0, chunk.size(1) - context_window, step_size):
                # Create input and target sequences
                input_seq = chunk[:, i:i+context_window].to(device)
                target_seq = chunk[:, i+1:i+context_window+1].to(device)
                
                # Forward pass
                outputs, hidden_layers = model(inputs=input_seq, hidden_layers=hidden_layers)
                outputs = outputs.view(-1, outputs.size(-1))
                target_seq = target_seq.view(-1)

                # Calculate loss and backpropagate
                loss = nn.CrossEntropyLoss()(outputs, target_seq)
                epoch_loss += loss.item()
                loss.backward()
                
                # Logging the loss and update progress bar
                progress_bar.set_postfix(loss=epoch_loss/(i+1))

            
            # Zero the gradients
            optimizer.step()
            optimizer.zero_grad()
                

        print(f"Epoch {epoch+1}/{num_epochs} completed. Loss: {epoch_loss/len(data_loader)}")

In [4]:
from transformers import GPT2Tokenizer
from utils.datasets import ShakespeareDataset
from torch.utils.data import TensorDataset, DataLoader

# Define tokenizer used to convert text to tokens
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Create dataset class instance and 
dataset = ShakespeareDataset(csv_file='data/shakespeare/shakespeare_data.csv', chunk_length=2048, tokenizer=tokenizer)
data_loader = DataLoader(dataset, batch_size=1, shuffle=True)

In [5]:
# Define the context window size k (defaulting to chunk_length / 2)
context_window = 1024

# Define the model
transformer_model = RecurrentNeuronTransformer(input_size=tokenizer.vocab_size, output_size=tokenizer.vocab_size, device=DEVICE, max_length=context_window)

# Define optimizer
transformer_optimizer = torch.optim.Adam(transformer_model.parameters(), lr=0.001)

In [6]:
# Define the step size to use for the sliding window
step_size = 64

# Train the model
train_shakespeare_trainsformer(transformer_model, context_window, step_size, data_loader, 
                               optimizer=transformer_optimizer, num_epochs=2)

                                                            

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 96 but got size 1 for tensor number 1 in the list.