In [25]:
#importing libraries
import torch
import torch.nn.functional as F
from torch import nn
import pandas as pd
import matplotlib.pyplot as plt
import urllib.request
import re
from collections import Counter

In [26]:
#setting device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [27]:
import re

#path of the RTF file
file_path = "Emma_by_Jane_Austen.rtf"

#reading the RTF file
with open(file_path, 'r', encoding='utf-8') as file:
    rtf_content = file.read()

#function to clean RTF file
def clean_rtf(rtf):
    # Remove RTF formatting
    # This regex removes everything that isn't plain text
    cleaned_text = re.sub(r'{\\.*?}', '', rtf)  # Remove RTF groups
    cleaned_text = re.sub(r'\\[a-z]+\d* ?', '', cleaned_text)  # Remove RTF commands
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)  # Remove extra spaces
    return cleaned_text.strip()

#cleaning the RTF text
plain_text = clean_rtf(rtf_content)

#displaying the first 500 characters of the plain text
print(plain_text[:500])


{ The Project Gutenberg EBook of Emma, by Jane Austen\ \ This eBook is for the use of anyone anywhere at no cost and with\ almost no restrictions whatsoever. You may copy it, give it away or\ re-use it under the terms of the Project Gutenberg License included\ with this eBook or online at www.gutenberg.org\ \ \ Title: Emma\ \ Author: Jane Austen\ \ Release Date: August, 1994 [Etext #158]\ Posting Date: January 21, 2010\ Last Updated: October 17, 2016\ \ Language: English\ \ Character set encodin


In [28]:
#converting text to lowercase
plain_text = plain_text.lower()

#removing unwanted characters
cleaned_text = re.sub('[^a-zA-Z0-9 .]', '', plain_text)

#splitting into words
words = cleaned_text.split()

print(cleaned_text[:400])

 the project gutenberg ebook of emma by jane austen  this ebook is for the use of anyone anywhere at no cost and with almost no restrictions whatsoever. you may copy it give it away or reuse it under the terms of the project gutenberg license included with this ebook or online at www.gutenberg.org   title emma  author jane austen  release date august 1994 etext 158 posting date january 21 2010 las


In [29]:
#creating vocab of unique words
words_vocab = sorted(set(words))
stoi = {s: i for i, s in enumerate(words_vocab)}
itos = {i: s for i, s in enumerate(words_vocab)}

In [30]:
print(len(words))

160442


In [31]:
#function to create input-output pairs
def create_dataset(words, block_size):
    X, Y = [], []
    for i in range(len(words) - block_size):
        context = [stoi[words[j]] for j in range(i, i + block_size)]
        next_word = stoi[words[i + block_size]]
        X.append(context)
        Y.append(next_word)
    return torch.tensor(X).to(device), torch.tensor(Y).to(device)

In [32]:
class NextWordMLP(nn.Module):
    def __init__(self, block_size, vocab_size, emb_dim, hidden_size, activation):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, emb_dim)
        self.lin1 = nn.Linear(block_size * emb_dim, hidden_size)
        self.activation = activation
        self.lin2 = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.emb(x)
        x = x.view(x.shape[0], -1)
        x = self.activation(self.lin1(x))
        x = self.lin2(x)
        return x

In [33]:
embedding_sizes = [64, 128]
context_lengths = [5, 10, 15]
activations = [F.relu, torch.tanh]
epochs = 500

In [52]:
def train_model(embedding_size, block_size, activation_fn):
    hidden_size = 512
    model = NextWordMLP(block_size, len(stoi), embedding_size, hidden_size, activation_fn).to(device)
    loss_fn = nn.CrossEntropyLoss()
    opt = torch.optim.AdamW(model.parameters(), lr=0.001)
    X, Y = create_dataset(words, block_size)

    
    batch_size = 512
    losses = []
    for epoch in range(epochs):
        epoch_loss = 0
        for i in range(0, len(X), batch_size):
            x_batch = X[i:i + batch_size]
            y_batch = Y[i:i + batch_size]
            y_pred = model(x_batch)
            loss = loss_fn(y_pred, y_batch)
            loss.backward()
            opt.step()
            opt.zero_grad()
            epoch_loss += loss.item()
        
        losses.append(epoch_loss / (len(X) // batch_size))
        if epoch % 1 == 0:
            print(f"Epoch {epoch}, Loss: {losses[-1]:.4f}")

        #early stopping in case loss plateaus
        if len(losses) > 10 and abs(losses[-1] - losses[-10]) < 0.001:
            print("Early stopping")
            break

    return model

In [35]:
def generate_text(model, itos, stoi, block_size, max_length=50):
    context = [0] * block_size
    generated_words = []
    for _ in range(max_length):
        x = torch.tensor(context).view(1, -1).to(device)
        y_pred = model(x)
        ix = torch.distributions.categorical.Categorical(logits=y_pred).sample().item()
        word = itos[ix]
        generated_words.append(word)
        context = context[1:] + [ix]
    return ' '.join(generated_words)

In [53]:
def save_model(model, embedding_size, block_size, activation_fn_name):
    model_filename = f"saved_models/model_emb{embedding_size}_ctx{block_size}_act{activation_fn_name}.pt"
    torch.save(model.state_dict(), model_filename)
    print(f"Model saved as {model_filename}")

In [54]:
embedding_size = embedding_sizes[0]
block_size = context_lengths[0]
activation_fn = activations[0]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model1 = train_model(embedding_size, block_size, activation_fn)
save_model(model1, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model1, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 64, Context Length: 5, Activation: relu
Epoch 0, Loss: 6.5846
Epoch 1, Loss: 5.5008
Epoch 2, Loss: 4.7264
Epoch 3, Loss: 3.7315
Epoch 4, Loss: 3.1709
Epoch 5, Loss: 2.8659
Epoch 6, Loss: 2.6344
Epoch 7, Loss: 2.4448
Epoch 8, Loss: 2.2835
Epoch 9, Loss: 2.1424
Epoch 10, Loss: 2.0172
Epoch 11, Loss: 1.9042
Epoch 12, Loss: 1.8009
Epoch 13, Loss: 1.7061
Epoch 14, Loss: 1.6177
Epoch 15, Loss: 1.5356
Epoch 16, Loss: 1.4581
Epoch 17, Loss: 1.3852
Epoch 18, Loss: 1.3161
Epoch 19, Loss: 1.2507
Epoch 20, Loss: 1.1882
Epoch 21, Loss: 1.1287
Epoch 22, Loss: 1.0718
Epoch 23, Loss: 1.0170
Epoch 24, Loss: 0.9647
Epoch 25, Loss: 0.9147
Epoch 26, Loss: 0.8663
Epoch 27, Loss: 0.8202
Epoch 28, Loss: 0.7759
Epoch 29, Loss: 0.7332
Epoch 30, Loss: 0.6920
Epoch 31, Loss: 0.6530
Epoch 32, Loss: 0.6153
Epoch 33, Loss: 0.5790
Epoch 34, Loss: 0.5443
Epoch 35, Loss: 0.5114
Epoch 36, Loss: 0.4793
Epoch 37, Loss: 0.4489
Epoch 38, Loss: 0.4198
Epoch 39, Loss: 0.3921
Epoch 40, Loss: 0.3

In [55]:
embedding_size = embedding_sizes[0]
block_size = context_lengths[0]
activation_fn = activations[1]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model2 = train_model(embedding_size, block_size, activation_fn)
save_model(model2, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model2, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 64, Context Length: 5, Activation: tanh
Epoch 0, Loss: 6.8163
Epoch 1, Loss: 5.5102
Epoch 2, Loss: 4.8018
Epoch 3, Loss: 4.1668
Epoch 4, Loss: 3.7250
Epoch 5, Loss: 3.4147
Epoch 6, Loss: 3.1718
Epoch 7, Loss: 2.9695
Epoch 8, Loss: 2.7932
Epoch 9, Loss: 2.6370
Epoch 10, Loss: 2.4941
Epoch 11, Loss: 2.3626
Epoch 12, Loss: 2.2395
Epoch 13, Loss: 2.1240
Epoch 14, Loss: 2.0145
Epoch 15, Loss: 1.9110
Epoch 16, Loss: 1.8115
Epoch 17, Loss: 1.7170
Epoch 18, Loss: 1.6260
Epoch 19, Loss: 1.5390
Epoch 20, Loss: 1.4551
Epoch 21, Loss: 1.3752
Epoch 22, Loss: 1.2977
Epoch 23, Loss: 1.2238
Epoch 24, Loss: 1.1525
Epoch 25, Loss: 1.0849
Epoch 26, Loss: 1.0195
Epoch 27, Loss: 0.9575
Epoch 28, Loss: 0.8982
Epoch 29, Loss: 0.8414
Epoch 30, Loss: 0.7873
Epoch 31, Loss: 0.7365
Epoch 32, Loss: 0.6873
Epoch 33, Loss: 0.6410
Epoch 34, Loss: 0.5972
Epoch 35, Loss: 0.5557
Epoch 36, Loss: 0.5164
Epoch 37, Loss: 0.4789
Epoch 38, Loss: 0.4437
Epoch 39, Loss: 0.4105
Epoch 40, Loss: 0.3

In [57]:
embedding_size = embedding_sizes[0]
block_size = context_lengths[1]
activation_fn = activations[0]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model3 = train_model(embedding_size, block_size, activation_fn)
save_model(model3, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model3, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 64, Context Length: 10, Activation: relu
Epoch 0, Loss: 6.6306
Epoch 1, Loss: 5.5228
Epoch 2, Loss: 4.6195
Epoch 3, Loss: 3.4069
Epoch 4, Loss: 2.7717
Epoch 5, Loss: 2.4056
Epoch 6, Loss: 2.1283
Epoch 7, Loss: 1.9029
Epoch 8, Loss: 1.7122
Epoch 9, Loss: 1.5462
Epoch 10, Loss: 1.3982
Epoch 11, Loss: 1.2643
Epoch 12, Loss: 1.1418
Epoch 13, Loss: 1.0285
Epoch 14, Loss: 0.9229
Epoch 15, Loss: 0.8243
Epoch 16, Loss: 0.7318
Epoch 17, Loss: 0.6448
Epoch 18, Loss: 0.5631
Epoch 19, Loss: 0.4870
Epoch 20, Loss: 0.4160
Epoch 21, Loss: 0.3505
Epoch 22, Loss: 0.2909
Epoch 23, Loss: 0.2378
Epoch 24, Loss: 0.1917
Epoch 25, Loss: 0.1539
Epoch 26, Loss: 0.1245
Epoch 27, Loss: 0.1048
Epoch 28, Loss: 0.0916
Epoch 29, Loss: 0.0834
Epoch 30, Loss: 0.0731
Epoch 31, Loss: 0.0605
Epoch 32, Loss: 0.0478
Epoch 33, Loss: 0.0375
Epoch 34, Loss: 0.0294
Epoch 35, Loss: 0.0239
Epoch 36, Loss: 0.0188
Epoch 37, Loss: 0.0172
Epoch 38, Loss: 0.0151
Epoch 39, Loss: 0.0125
Epoch 40, Loss: 0.

In [58]:
embedding_size = embedding_sizes[0]
block_size = context_lengths[1]
activation_fn = activations[1]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model4 = train_model(embedding_size, block_size, activation_fn)
save_model(model4, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model4, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 64, Context Length: 10, Activation: tanh
Epoch 0, Loss: 6.8201
Epoch 1, Loss: 5.4913
Epoch 2, Loss: 4.6565
Epoch 3, Loss: 3.8966
Epoch 4, Loss: 3.3750
Epoch 5, Loss: 3.0077
Epoch 6, Loss: 2.7192
Epoch 7, Loss: 2.4784
Epoch 8, Loss: 2.2697
Epoch 9, Loss: 2.0840
Epoch 10, Loss: 1.9153
Epoch 11, Loss: 1.7603
Epoch 12, Loss: 1.6160
Epoch 13, Loss: 1.4808
Epoch 14, Loss: 1.3533
Epoch 15, Loss: 1.2327
Epoch 16, Loss: 1.1184
Epoch 17, Loss: 1.0101
Epoch 18, Loss: 0.9075
Epoch 19, Loss: 0.8107
Epoch 20, Loss: 0.7195
Epoch 21, Loss: 0.6340
Epoch 22, Loss: 0.5543
Epoch 23, Loss: 0.4805
Epoch 24, Loss: 0.4126
Epoch 25, Loss: 0.3509
Epoch 26, Loss: 0.2952
Epoch 27, Loss: 0.2457
Epoch 28, Loss: 0.2023
Epoch 29, Loss: 0.1651
Epoch 30, Loss: 0.1337
Epoch 31, Loss: 0.1080
Epoch 32, Loss: 0.0873
Epoch 33, Loss: 0.0711
Epoch 34, Loss: 0.0584
Epoch 35, Loss: 0.0486
Epoch 36, Loss: 0.0407
Epoch 37, Loss: 0.0346
Epoch 38, Loss: 0.0294
Epoch 39, Loss: 0.0252
Epoch 40, Loss: 0.

In [59]:
embedding_size = embedding_sizes[0]
block_size = context_lengths[2]
activation_fn = activations[0]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model5 = train_model(embedding_size, block_size, activation_fn)
save_model(model5, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model5, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 64, Context Length: 15, Activation: relu
Epoch 0, Loss: 6.6680
Epoch 1, Loss: 5.5343
Epoch 2, Loss: 4.5362
Epoch 3, Loss: 3.1946
Epoch 4, Loss: 2.4880
Epoch 5, Loss: 2.0680
Epoch 6, Loss: 1.7493
Epoch 7, Loss: 1.4897
Epoch 8, Loss: 1.2700
Epoch 9, Loss: 1.0789
Epoch 10, Loss: 0.9090
Epoch 11, Loss: 0.7565
Epoch 12, Loss: 0.6181
Epoch 13, Loss: 0.4932
Epoch 14, Loss: 0.3813
Epoch 15, Loss: 0.2841
Epoch 16, Loss: 0.2031
Epoch 17, Loss: 0.1415
Epoch 18, Loss: 0.1006
Epoch 19, Loss: 0.0783
Epoch 20, Loss: 0.0684
Epoch 21, Loss: 0.0673
Epoch 22, Loss: 0.0605
Epoch 23, Loss: 0.0515
Epoch 24, Loss: 0.0449
Epoch 25, Loss: 0.0366
Epoch 26, Loss: 0.0275
Epoch 27, Loss: 0.0182
Epoch 28, Loss: 0.0111
Epoch 29, Loss: 0.0067
Epoch 30, Loss: 0.0049
Epoch 31, Loss: 0.0043
Epoch 32, Loss: 0.0037
Epoch 33, Loss: 0.0036
Epoch 34, Loss: 0.0032
Epoch 35, Loss: 0.0032
Epoch 36, Loss: 0.0029
Epoch 37, Loss: 0.0029
Epoch 38, Loss: 0.0026
Epoch 39, Loss: 0.0025
Epoch 40, Loss: 0.

In [60]:
embedding_size = embedding_sizes[0]
block_size = context_lengths[2]
activation_fn = activations[1]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model6 = train_model(embedding_size, block_size, activation_fn)
save_model(model6, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model6, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 64, Context Length: 15, Activation: tanh
Epoch 0, Loss: 6.7878
Epoch 1, Loss: 5.4677
Epoch 2, Loss: 4.5579
Epoch 3, Loss: 3.7167
Epoch 4, Loss: 3.1245
Epoch 5, Loss: 2.7075
Epoch 6, Loss: 2.3803
Epoch 7, Loss: 2.1087
Epoch 8, Loss: 1.8745
Epoch 9, Loss: 1.6674
Epoch 10, Loss: 1.4804
Epoch 11, Loss: 1.3090
Epoch 12, Loss: 1.1501
Epoch 13, Loss: 1.0022
Epoch 14, Loss: 0.8643
Epoch 15, Loss: 0.7363
Epoch 16, Loss: 0.6183
Epoch 17, Loss: 0.5107
Epoch 18, Loss: 0.4142
Epoch 19, Loss: 0.3292
Epoch 20, Loss: 0.2563
Epoch 21, Loss: 0.1958
Epoch 22, Loss: 0.1478
Epoch 23, Loss: 0.1114
Epoch 24, Loss: 0.0851
Epoch 25, Loss: 0.0665
Epoch 26, Loss: 0.0532
Epoch 27, Loss: 0.0433
Epoch 28, Loss: 0.0358
Epoch 29, Loss: 0.0299
Epoch 30, Loss: 0.0250
Epoch 31, Loss: 0.0211
Epoch 32, Loss: 0.0179
Epoch 33, Loss: 0.0152
Epoch 34, Loss: 0.0129
Epoch 35, Loss: 0.0111
Epoch 36, Loss: 0.0095
Epoch 37, Loss: 0.0082
Epoch 38, Loss: 0.0070
Epoch 39, Loss: 0.0060
Epoch 40, Loss: 0.

In [61]:
embedding_size = embedding_sizes[1]
block_size = context_lengths[0]
activation_fn = activations[0]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model7 = train_model(embedding_size, block_size, activation_fn)
save_model(model7, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model7, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 128, Context Length: 5, Activation: relu
Epoch 0, Loss: 6.4765
Epoch 1, Loss: 5.3467
Epoch 2, Loss: 4.5104
Epoch 3, Loss: 3.4308
Epoch 4, Loss: 2.7926
Epoch 5, Loss: 2.4139
Epoch 6, Loss: 2.1232
Epoch 7, Loss: 1.8838
Epoch 8, Loss: 1.6787
Epoch 9, Loss: 1.4991
Epoch 10, Loss: 1.3388
Epoch 11, Loss: 1.1951
Epoch 12, Loss: 1.0638
Epoch 13, Loss: 0.9449
Epoch 14, Loss: 0.8353
Epoch 15, Loss: 0.7353
Epoch 16, Loss: 0.6432
Epoch 17, Loss: 0.5604
Epoch 18, Loss: 0.4838
Epoch 19, Loss: 0.4157
Epoch 20, Loss: 0.3540
Epoch 21, Loss: 0.2999
Epoch 22, Loss: 0.2525
Epoch 23, Loss: 0.2124
Epoch 24, Loss: 0.1788
Epoch 25, Loss: 0.1526
Epoch 26, Loss: 0.1324
Epoch 27, Loss: 0.1181
Epoch 28, Loss: 0.1073
Epoch 29, Loss: 0.1009
Epoch 30, Loss: 0.0959
Epoch 31, Loss: 0.0926
Epoch 32, Loss: 0.0892
Epoch 33, Loss: 0.0865
Epoch 34, Loss: 0.0839
Epoch 35, Loss: 0.0819
Epoch 36, Loss: 0.0801
Epoch 37, Loss: 0.0788
Epoch 38, Loss: 0.0777
Epoch 39, Loss: 0.0765
Epoch 40, Loss: 0.

In [62]:
embedding_size = embedding_sizes[1]
block_size = context_lengths[0]
activation_fn = activations[1]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model8 = train_model(embedding_size, block_size, activation_fn)
save_model(model8, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model8, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 128, Context Length: 5, Activation: tanh
Epoch 0, Loss: 6.6657
Epoch 1, Loss: 5.3424
Epoch 2, Loss: 4.5681
Epoch 3, Loss: 3.8796
Epoch 4, Loss: 3.3790
Epoch 5, Loss: 3.0083
Epoch 6, Loss: 2.7072
Epoch 7, Loss: 2.4508
Epoch 8, Loss: 2.2238
Epoch 9, Loss: 2.0203
Epoch 10, Loss: 1.8338
Epoch 11, Loss: 1.6627
Epoch 12, Loss: 1.5035
Epoch 13, Loss: 1.3553
Epoch 14, Loss: 1.2163
Epoch 15, Loss: 1.0872
Epoch 16, Loss: 0.9668
Epoch 17, Loss: 0.8558
Epoch 18, Loss: 0.7534
Epoch 19, Loss: 0.6596
Epoch 20, Loss: 0.5743
Epoch 21, Loss: 0.4971
Epoch 22, Loss: 0.4281
Epoch 23, Loss: 0.3663
Epoch 24, Loss: 0.3122
Epoch 25, Loss: 0.2654
Epoch 26, Loss: 0.2252
Epoch 27, Loss: 0.1910
Epoch 28, Loss: 0.1630
Epoch 29, Loss: 0.1400
Epoch 30, Loss: 0.1224
Epoch 31, Loss: 0.1085
Epoch 32, Loss: 0.0982
Epoch 33, Loss: 0.0894
Epoch 34, Loss: 0.0831
Epoch 35, Loss: 0.0775
Epoch 36, Loss: 0.0733
Epoch 37, Loss: 0.0698
Epoch 38, Loss: 0.0668
Epoch 39, Loss: 0.0636
Epoch 40, Loss: 0.

In [63]:
embedding_size = embedding_sizes[1]
block_size = context_lengths[1]
activation_fn = activations[0]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model9 = train_model(embedding_size, block_size, activation_fn)
save_model(model9, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model9, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 128, Context Length: 10, Activation: relu
Epoch 0, Loss: 6.5455
Epoch 1, Loss: 5.3559
Epoch 2, Loss: 4.3474
Epoch 3, Loss: 3.0230
Epoch 4, Loss: 2.2512
Epoch 5, Loss: 1.7703
Epoch 6, Loss: 1.4031
Epoch 7, Loss: 1.1030
Epoch 8, Loss: 0.8510
Epoch 9, Loss: 0.6374
Epoch 10, Loss: 0.4580
Epoch 11, Loss: 0.3118
Epoch 12, Loss: 0.2002
Epoch 13, Loss: 0.1245
Epoch 14, Loss: 0.0799
Epoch 15, Loss: 0.0545
Epoch 16, Loss: 0.0369
Epoch 17, Loss: 0.0249
Epoch 18, Loss: 0.0181
Epoch 19, Loss: 0.0140
Epoch 20, Loss: 0.0110
Epoch 21, Loss: 0.0093
Epoch 22, Loss: 0.0079
Epoch 23, Loss: 0.0070
Epoch 24, Loss: 0.0059
Epoch 25, Loss: 0.0053
Epoch 26, Loss: 0.0046
Epoch 27, Loss: 0.0042
Epoch 28, Loss: 0.0036
Epoch 29, Loss: 0.0032
Epoch 30, Loss: 0.0027
Epoch 31, Loss: 0.0025
Epoch 32, Loss: 0.0022
Epoch 33, Loss: 0.0021
Epoch 34, Loss: 0.8769
Epoch 35, Loss: 0.3743
Epoch 36, Loss: 0.0290
Epoch 37, Loss: 0.0070
Epoch 38, Loss: 0.0048
Epoch 39, Loss: 0.0041
Epoch 40, Loss: 0

In [64]:
embedding_size = embedding_sizes[1]
block_size = context_lengths[1]
activation_fn = activations[1]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model10 = train_model(embedding_size, block_size, activation_fn)
save_model(model10, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model10, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 128, Context Length: 10, Activation: tanh
Epoch 0, Loss: 6.6169
Epoch 1, Loss: 5.2919
Epoch 2, Loss: 4.3724
Epoch 3, Loss: 3.5306
Epoch 4, Loss: 2.9099
Epoch 5, Loss: 2.4495
Epoch 6, Loss: 2.0766
Epoch 7, Loss: 1.7620
Epoch 8, Loss: 1.4895
Epoch 9, Loss: 1.2493
Epoch 10, Loss: 1.0350
Epoch 11, Loss: 0.8440
Epoch 12, Loss: 0.6739
Epoch 13, Loss: 0.5249
Epoch 14, Loss: 0.3973
Epoch 15, Loss: 0.2921
Epoch 16, Loss: 0.2089
Epoch 17, Loss: 0.1470
Epoch 18, Loss: 0.1037
Epoch 19, Loss: 0.0753
Epoch 20, Loss: 0.0565
Epoch 21, Loss: 0.0441
Epoch 22, Loss: 0.0351
Epoch 23, Loss: 0.0287
Epoch 24, Loss: 0.0235
Epoch 25, Loss: 0.0197
Epoch 26, Loss: 0.0163
Epoch 27, Loss: 0.0139
Epoch 28, Loss: 0.0116
Epoch 29, Loss: 0.0100
Epoch 30, Loss: 0.0084
Epoch 31, Loss: 0.0073
Epoch 32, Loss: 0.0061
Epoch 33, Loss: 0.0054
Epoch 34, Loss: 0.0045
Epoch 35, Loss: 0.0040
Epoch 36, Loss: 0.0033
Epoch 37, Loss: 0.0030
Epoch 38, Loss: 0.0025
Epoch 39, Loss: 0.0023
Epoch 40, Loss: 0

In [65]:
embedding_size = embedding_sizes[1]
block_size = context_lengths[2]
activation_fn = activations[0]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model11 = train_model(embedding_size, block_size, activation_fn)
save_model(model11, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model11, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 128, Context Length: 15, Activation: relu
Epoch 0, Loss: 6.5768
Epoch 1, Loss: 5.3418
Epoch 2, Loss: 4.2254
Epoch 3, Loss: 2.7315
Epoch 4, Loss: 1.8117
Epoch 5, Loss: 1.2447
Epoch 6, Loss: 0.8268
Epoch 7, Loss: 0.5083
Epoch 8, Loss: 0.2767
Epoch 9, Loss: 0.1350
Epoch 10, Loss: 0.0691
Epoch 11, Loss: 0.0401
Epoch 12, Loss: 0.0241
Epoch 13, Loss: 0.0166
Epoch 14, Loss: 0.0126
Epoch 15, Loss: 0.0104
Epoch 16, Loss: 0.0086
Epoch 17, Loss: 0.0074
Epoch 18, Loss: 0.0062
Epoch 19, Loss: 0.0055
Epoch 20, Loss: 0.0047
Epoch 21, Loss: 0.0041
Epoch 22, Loss: 0.0035
Epoch 23, Loss: 0.0031
Epoch 24, Loss: 0.0026
Epoch 25, Loss: 0.0023
Epoch 26, Loss: 0.0020
Epoch 27, Loss: 0.0017
Epoch 28, Loss: 0.0016
Epoch 29, Loss: 0.0015
Epoch 30, Loss: 0.0013
Epoch 31, Loss: 0.0011
Epoch 32, Loss: 0.0009
Epoch 33, Loss: 0.0008
Epoch 34, Loss: 0.0008
Epoch 35, Loss: 0.0007
Epoch 36, Loss: 1.1488
Epoch 37, Loss: 0.4880
Epoch 38, Loss: 0.0268
Epoch 39, Loss: 0.0053
Epoch 40, Loss: 0

In [66]:
embedding_size = embedding_sizes[1]
block_size = context_lengths[2]
activation_fn = activations[1]
print(f"\nTraining with Embedding Size: {embedding_size}, Context Length: {block_size}, Activation: {activation_fn.__name__}")
model12 = train_model(embedding_size, block_size, activation_fn)
save_model(model12, embedding_size, block_size, activation_fn.__name__)
print("Generated Text:")
print(generate_text(model12, itos, stoi, block_size, max_length=100))


Training with Embedding Size: 128, Context Length: 15, Activation: tanh
Epoch 0, Loss: 6.6133
Epoch 1, Loss: 5.2001
Epoch 2, Loss: 4.1508
Epoch 3, Loss: 3.1974
Epoch 4, Loss: 2.4973
Epoch 5, Loss: 1.9754
Epoch 6, Loss: 1.5585
Epoch 7, Loss: 1.2152
Epoch 8, Loss: 0.9265
Epoch 9, Loss: 0.6834
Epoch 10, Loss: 0.4820
Epoch 11, Loss: 0.3228
Epoch 12, Loss: 0.2058
Epoch 13, Loss: 0.1293
Epoch 14, Loss: 0.0843
Epoch 15, Loss: 0.0588
Epoch 16, Loss: 0.0435
Epoch 17, Loss: 0.0336
Epoch 18, Loss: 0.0267
Epoch 19, Loss: 0.0218
Epoch 20, Loss: 0.0178
Epoch 21, Loss: 0.0149
Epoch 22, Loss: 0.0124
Epoch 23, Loss: 0.0105
Epoch 24, Loss: 0.0088
Epoch 25, Loss: 0.0076
Epoch 26, Loss: 0.0064
Epoch 27, Loss: 0.0055
Epoch 28, Loss: 0.0046
Epoch 29, Loss: 0.0040
Epoch 30, Loss: 0.0034
Epoch 31, Loss: 0.0030
Epoch 32, Loss: 0.0025
Epoch 33, Loss: 0.0022
Epoch 34, Loss: 0.0019
Epoch 35, Loss: 0.0017
Epoch 36, Loss: 0.0014
Epoch 37, Loss: 0.0013
Epoch 38, Loss: 0.0011
Epoch 39, Loss: 0.0010
Epoch 40, Loss: 0