In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import time
from torch.utils.data import Dataset, DataLoader

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the Tiny Shakespeare dataset
with open("tiny_shakespeare.txt", "r", encoding="utf-8") as f:
    text = f.read()

# Character mapping
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}
encoded = np.array([char2int[ch] for ch in text])

# Creating sequences and labels
def create_sequences(encoded_text, seq_length):
    sequences, labels = [], []
    for i in range(len(encoded_text) - seq_length):
        sequence = encoded_text[i:i+seq_length]
        label = encoded_text[i+seq_length]
        sequences.append(sequence)
        labels.append(label)
    return torch.from_numpy(np.array(sequences)), torch.from_numpy(np.array(labels))

seq_length = 50  # Sequence length
sequences, labels = create_sequences(encoded, seq_length)
sequences, labels = sequences.to(device), labels.to(device)

# Custom Dataset
class CharDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.labels[index]

# DataLoader
batch_size = 128
dataset = CharDataset(sequences, labels)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Model Definition
class CharModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type="lstm", num_layers=2):
        super(CharModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.model_type = model_type

        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == "lstm":
            self.rnn = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        elif model_type == "gru":
            self.rnn = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out[:, -1, :])  # Predict the next character
        return out, hidden
    
    def init_hidden(self, batch_size):
        if self.model_type == 'lstm':
            return (torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device),
                    torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device))
        else:  # GRU
            return torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)

# Training Function
def train(model, data_loader, epochs=10, lr=0.005):
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    start_time = time.time()

    for epoch in range(epochs):
        hidden = model.init_hidden(batch_size)
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            model.zero_grad()
            output, hidden = model(x, hidden)
            loss = criterion(output, y)
            loss.backward()
            optimizer.step()
            hidden = tuple([each.data for each in hidden])  # Detach hidden state

        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

    print(f'Training completed in {time.time() - start_time}s')

# Model Instantiation and Training
vocab_size = len(chars)
hidden_size = 256
output_size = vocab_size

model = CharModel(vocab_size, hidden_size, output_size, model_type="lstm").to(device)  # Change to "gru" for GRU
train(model, data_loader, epochs=20, lr=0.002)


RuntimeError: expected scalar type Long but found Int