# Import Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import requests
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# LSTM for Sequences of 20
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text
sequence_length = 20
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}
encoded_text = [char_to_int[ch] for ch in text]
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=batch_size)
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.lstm(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device),
                torch.zeros(1, batch_size, self.hidden_size, device=device))

input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001
model = CharLSTM(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.711473316227781
Epoch 2/20, Training Loss: 1.485150503568518
Epoch 3/20, Training Loss: 1.4233778502858376
Epoch 4/20, Training Loss: 1.3878972135992893
Epoch 5/20, Training Loss: 1.3628450302354782
Epoch 6/20, Training Loss: 1.3431480968573006
Epoch 7/20, Training Loss: 1.3279951620960262
Epoch 8/20, Training Loss: 1.3146173407292134
Epoch 9/20, Training Loss: 1.3034009359909486
Epoch 10/20, Training Loss: 1.294143872883767
Epoch 11/20, Training Loss: 1.2862671143062758
Epoch 12/20, Training Loss: 1.2784827889909842
Epoch 13/20, Training Loss: 1.2721875276175028
Epoch 14/20, Training Loss: 1.266045561926979
Epoch 15/20, Training Loss: 1.2608564193571565
Epoch 16/20, Training Loss: 1.255879172915299
Epoch 17/20, Training Loss: 1.251721989147566
Epoch 18/20, Training Loss: 1.2477851554674453
Epoch 19/20, Training Loss: 1.244573937858307
Epoch 20/20, Training Loss: 1.2407389667464341
Total execution time for training: 606.2889075279236 seconds
Accuracy on tes

In [None]:
# LSTM for Sequences of 30
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 30

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=batch_size)

class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.lstm(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device),
                torch.zeros(1, batch_size, self.hidden_size, device=device))

input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharLSTM(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.697342387942904
Epoch 2/20, Training Loss: 1.4726576827257034
Epoch 3/20, Training Loss: 1.4113048192549644
Epoch 4/20, Training Loss: 1.375155462476794
Epoch 5/20, Training Loss: 1.3505117829527857
Epoch 6/20, Training Loss: 1.3311707029569033
Epoch 7/20, Training Loss: 1.3150749606162104
Epoch 8/20, Training Loss: 1.3021008289807567
Epoch 9/20, Training Loss: 1.291757515019818
Epoch 10/20, Training Loss: 1.2821564125778753
Epoch 11/20, Training Loss: 1.2734858036400325
Epoch 12/20, Training Loss: 1.2663496789820086
Epoch 13/20, Training Loss: 1.2603301568511431
Epoch 14/20, Training Loss: 1.254051627652165
Epoch 15/20, Training Loss: 1.2488141292668729
Epoch 16/20, Training Loss: 1.2438960185576908
Epoch 17/20, Training Loss: 1.2387259014450949
Epoch 18/20, Training Loss: 1.2359358922835404
Epoch 19/20, Training Loss: 1.2318230640406043
Epoch 20/20, Training Loss: 1.2287620891802487
Total execution time for training: 748.9822533130646 seconds
Accuracy on 

In [None]:
# LSTM for Sequences of 50
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text
sequence_length = 50
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}
encoded_text = [char_to_int[ch] for ch in text]
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.lstm(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device),
                torch.zeros(1, batch_size, self.hidden_size, device=device))
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharLSTM(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.7001969526324001
Epoch 2/20, Training Loss: 1.4711108197271183
Epoch 3/20, Training Loss: 1.4074978094295216
Epoch 4/20, Training Loss: 1.3704794130662543
Epoch 5/20, Training Loss: 1.3441443178835326
Epoch 6/20, Training Loss: 1.3238139850151724
Epoch 7/20, Training Loss: 1.3077308766606923
Epoch 8/20, Training Loss: 1.294043606399139
Epoch 9/20, Training Loss: 1.2827759207945235
Epoch 10/20, Training Loss: 1.2727265480669032
Epoch 11/20, Training Loss: 1.2645177987739904
Epoch 12/20, Training Loss: 1.2569114672215935
Epoch 13/20, Training Loss: 1.2498197430450046
Epoch 14/20, Training Loss: 1.2443295110932486
Epoch 15/20, Training Loss: 1.2378445891255954
Epoch 16/20, Training Loss: 1.2333719689848432
Epoch 17/20, Training Loss: 1.228789914083556
Epoch 18/20, Training Loss: 1.2248058697360265
Epoch 19/20, Training Loss: 1.221132945431269
Epoch 20/20, Training Loss: 1.2182156594082438
Total execution time for training: 870.7704834938049 seconds
Accuracy on

In [None]:
# GRU for Sequences of 20
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 20

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.gru(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharGRU(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Step 4: Training the model
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.6957282759954515
Epoch 2/20, Training Loss: 1.4996592183942024
Epoch 3/20, Training Loss: 1.4534013345880339
Epoch 4/20, Training Loss: 1.4285255410995419
Epoch 5/20, Training Loss: 1.4115607911016637
Epoch 6/20, Training Loss: 1.399496109439119
Epoch 7/20, Training Loss: 1.3905963871886657
Epoch 8/20, Training Loss: 1.38464664220297
Epoch 9/20, Training Loss: 1.379856461775556
Epoch 10/20, Training Loss: 1.3759207847179247
Epoch 11/20, Training Loss: 1.374044992769168
Epoch 12/20, Training Loss: 1.37172578532533
Epoch 13/20, Training Loss: 1.369672107888034
Epoch 14/20, Training Loss: 1.3691545740086255
Epoch 15/20, Training Loss: 1.3696721814192374
Epoch 16/20, Training Loss: 1.3690125433641538
Epoch 17/20, Training Loss: 1.3704669377411225
Epoch 18/20, Training Loss: 1.3722592879832647
Epoch 19/20, Training Loss: 1.373418925542703
Epoch 20/20, Training Loss: 1.3763837425601393
Total execution time for training: 474.571594953537 seconds
Accuracy on test s

In [None]:
# GRU for Sequences of 30
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 30

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# Step 2: Define the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.gru(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharGRU(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.6861268352723573
Epoch 2/20, Training Loss: 1.4944427352541831
Epoch 3/20, Training Loss: 1.4488336394828965
Epoch 4/20, Training Loss: 1.4227918446388863
Epoch 5/20, Training Loss: 1.407423982812424
Epoch 6/20, Training Loss: 1.396357479455549
Epoch 7/20, Training Loss: 1.3875296961898715
Epoch 8/20, Training Loss: 1.3806749197108883
Epoch 9/20, Training Loss: 1.3746316732087493
Epoch 10/20, Training Loss: 1.3712931449855803
Epoch 11/20, Training Loss: 1.3683031995834734
Epoch 12/20, Training Loss: 1.3650096261973226
Epoch 13/20, Training Loss: 1.3632348598297888
Epoch 14/20, Training Loss: 1.3652427787419645
Epoch 15/20, Training Loss: 1.366415409532622
Epoch 16/20, Training Loss: 1.3651240237283762
Epoch 17/20, Training Loss: 1.3639760853501481
Epoch 18/20, Training Loss: 1.3656101293489977
Epoch 19/20, Training Loss: 1.3670186329541094
Epoch 20/20, Training Loss: 1.3685405236325316
Total execution time for training: 390.2198209762573 seconds
Accuracy on

In [None]:
# GRU for Sequences of 50
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 50

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.gru(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device))
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharGRU(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.683142794476505
Epoch 2/20, Training Loss: 1.4841036172221644
Epoch 3/20, Training Loss: 1.4351261619720546
Epoch 4/20, Training Loss: 1.4081026340485165
Epoch 5/20, Training Loss: 1.3900087800605314
Epoch 6/20, Training Loss: 1.3779923070060216
Epoch 7/20, Training Loss: 1.3691461610753026
Epoch 8/20, Training Loss: 1.3626731731357746
Epoch 9/20, Training Loss: 1.3562186070672582
Epoch 10/20, Training Loss: 1.3521321563138593
Epoch 11/20, Training Loss: 1.3495454395831425
Epoch 12/20, Training Loss: 1.3487541041728535
Epoch 13/20, Training Loss: 1.3455588955342932
Epoch 14/20, Training Loss: 1.3471375236875818
Epoch 15/20, Training Loss: 1.3462522669102537
Epoch 16/20, Training Loss: 1.3446821476000315
Epoch 17/20, Training Loss: 1.3478725592403633
Epoch 18/20, Training Loss: 1.3475939247550384
Epoch 19/20, Training Loss: 1.350850970634616
Epoch 20/20, Training Loss: 1.353002951667087
Total execution time for training: 493.34515857696533 seconds
Accuracy o