In [1]:
from CharRNN import CharRNN
import torch, torch.optim as optim, torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Dataset
from transformers import PreTrainedTokenizerFast
from onehotencoder import onehotencoder
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
vocab_size = 59
embedded_dim = 768
hidden_dim = 768
num_layers = 3
dropout = .2
learning_rate = 0.001
num_epochs = 10
batch_size = 64

In [3]:
endecode = onehotencoder()

class SequenceDataset(Dataset):
    def __init__(self, file_path, encoder):
        self.file_path = file_path
        self.encoder = encoder
        with open(file_path, 'r') as f:
            self.lines = f.readlines()

    def __len__(self):
        return len(self.lines)

    def __getitem__(self, idx):
        sequence = self.lines[idx].strip()
        input_tensor = self.encoder.encode_sequence(sequence)
        target_tensor = self.encoder.encode_sequence(sequence, targets=True)
        return input_tensor, target_tensor

dataset = SequenceDataset('data/train.csv', endecode)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True, num_workers= 12)

In [None]:
charRNN = CharRNN(
    vocab_size,
    embedded_dim,
    hidden_dim,
    num_layers,
    dropout,
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(charRNN.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for batch_inputs, batch_targets in dataloader:
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)
        
        logits = charRNN(batch_inputs)

        targets_flat = torch.argmax(batch_targets, dim=2).reshape(-1)
        logits_flat = logits.reshape(batch_inputs.size(0) * batch_inputs.size(1), -1)
        
        loss = criterion(logits_flat, targets_flat)
        optimizer.step()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}")