## Imports

In [1]:
from IPython.display import display, Markdown
from tqdm import tqdm

# pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, TensorDataset

## Device

In [2]:
device = (
    "cuda:0" if torch.cuda.is_available() else # Nvidia GPU
    "mps" if torch.backends.mps.is_available() else # Apple Silicon GPU
    "cpu"
)
print(f"Device = {device}")

Device = cuda:0


## Hyperparameters

In [3]:
# seed
seed = 42
torch.manual_seed(seed)

# data type
data_type = torch.int64

# Tokenizer Arguments
seq_length = 100
vocab_size = 65
d_embed = 1 # 1 for character level tokenization

# Model Arguments
max_length = 1000 # maximum number of characters to generate

# Validation Split
validation_size = 0.2

# Training Arguments
learning_rate = 2e-5
num_epochs = 10
batch_size = 128

# RNN Arguments
rnn_input_size = 1 # = d_embed
rnn_hidden_size = 512
rnn_num_layers = 2

# seq2seq Arguments
# encoder
encoder_input_size = 1 # = d_embed
encoder_hidden_size = 512
encoder_num_layers = 2
# decoder
decoder_hidden_size = 512
decoder_num_layers = 2

## Dataset

In [4]:
# dataset path
dataset_path = 'data/'

In [5]:
# shakespeare dataset
shakespeare_dataset = dataset_path + 'shakespeare.txt'

In [6]:
# read the dataset
with open(shakespeare_dataset, 'r', encoding='utf-8') as f:
    shakespeare_text = f.read()

In [7]:
# display the first 1000 characters
display(Markdown(shakespeare_text[:1000]))

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for revenge.



In [8]:
# display the length of the text
display(Markdown(f'Total number of characters in the text: {len(shakespeare_text)}'))

Total number of characters in the text: 1115394

In [9]:
# display the unique characters in the text
chars = sorted(list(set(shakespeare_text)))
vocab_size = len(chars)
display(Markdown(f'Unique characters: {chars}'))
display(Markdown(f'Total number of unique characters: {vocab_size}'))

Unique characters: ['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

Total number of unique characters: 65

## Tokenization (Character Level)

In [10]:
# create a mapping from characters to integers
char_to_int = {c: i for i, c in enumerate(chars)}
# create a mapping from integers to characters
int_to_char = {i: c for i, c in enumerate(chars)}

In [11]:
# display the mappings
display(Markdown(f'Character to integer mapping: {char_to_int}'))
display(Markdown(f'Integer to character mapping: {int_to_char}'))

Character to integer mapping: {'\n': 0, ' ': 1, '!': 2, '$': 3, '&': 4, "'": 5, ',': 6, '-': 7, '.': 8, '3': 9, ':': 10, ';': 11, '?': 12, 'A': 13, 'B': 14, 'C': 15, 'D': 16, 'E': 17, 'F': 18, 'G': 19, 'H': 20, 'I': 21, 'J': 22, 'K': 23, 'L': 24, 'M': 25, 'N': 26, 'O': 27, 'P': 28, 'Q': 29, 'R': 30, 'S': 31, 'T': 32, 'U': 33, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38, 'a': 39, 'b': 40, 'c': 41, 'd': 42, 'e': 43, 'f': 44, 'g': 45, 'h': 46, 'i': 47, 'j': 48, 'k': 49, 'l': 50, 'm': 51, 'n': 52, 'o': 53, 'p': 54, 'q': 55, 'r': 56, 's': 57, 't': 58, 'u': 59, 'v': 60, 'w': 61, 'x': 62, 'y': 63, 'z': 64}

Integer to character mapping: {0: '\n', 1: ' ', 2: '!', 3: '$', 4: '&', 5: "'", 6: ',', 7: '-', 8: '.', 9: '3', 10: ':', 11: ';', 12: '?', 13: 'A', 14: 'B', 15: 'C', 16: 'D', 17: 'E', 18: 'F', 19: 'G', 20: 'H', 21: 'I', 22: 'J', 23: 'K', 24: 'L', 25: 'M', 26: 'N', 27: 'O', 28: 'P', 29: 'Q', 30: 'R', 31: 'S', 32: 'T', 33: 'U', 34: 'V', 35: 'W', 36: 'X', 37: 'Y', 38: 'Z', 39: 'a', 40: 'b', 41: 'c', 42: 'd', 43: 'e', 44: 'f', 45: 'g', 46: 'h', 47: 'i', 48: 'j', 49: 'k', 50: 'l', 51: 'm', 52: 'n', 53: 'o', 54: 'p', 55: 'q', 56: 'r', 57: 's', 58: 't', 59: 'u', 60: 'v', 61: 'w', 62: 'x', 63: 'y', 64: 'z'}

In [12]:
# sample tokenization
sample_text = 'Hello, World!'
sample_text_int = [char_to_int[c] for c in sample_text]
display(Markdown(f'Text: {sample_text}'))
display(Markdown(f'Tokenized text: {sample_text_int}'))
display(Markdown(f'Detokenized text: {"".join([int_to_char[i] for i in sample_text_int])}'))

Text: Hello, World!

Tokenized text: [20, 43, 50, 50, 53, 6, 1, 35, 53, 56, 50, 42, 2]

Detokenized text: Hello, World!

In [13]:
# create a function to tokenize the text
def tokenize(text):
    return [char_to_int[c] for c in text]
# create a function to detokenize the text
def detokenize(tokens):
    return "".join([int_to_char[i] for i in tokens])

In [14]:
# tokenize the text
shakespeare_tokens = torch.tensor(tokenize(shakespeare_text), dtype=data_type)

In [15]:
# display the first 100 tokens
display(Markdown(f'Tokens: {shakespeare_tokens[:100]}'))

Tokens: tensor([18, 47, 56, 57, 58,  1, 15, 47, 58, 47, 64, 43, 52, 10,  0, 14, 43, 44,
        53, 56, 43,  1, 61, 43,  1, 54, 56, 53, 41, 43, 43, 42,  1, 39, 52, 63,
         1, 44, 59, 56, 58, 46, 43, 56,  6,  1, 46, 43, 39, 56,  1, 51, 43,  1,
        57, 54, 43, 39, 49,  8,  0,  0, 13, 50, 50, 10,  0, 31, 54, 43, 39, 49,
         6,  1, 57, 54, 43, 39, 49,  8,  0,  0, 18, 47, 56, 57, 58,  1, 15, 47,
        58, 47, 64, 43, 52, 10,  0, 37, 53, 59])

In [16]:
# display token information
display(Markdown(f'Total number of tokens: {len(shakespeare_tokens)}'))
display(Markdown(f'Total number of unique tokens: {len(torch.unique(shakespeare_tokens))}'))
display(Markdown(f'dtype: {shakespeare_tokens.dtype}'))

Total number of tokens: 1115394

Total number of unique tokens: 65

dtype: torch.int64

## Preprocessing

In [17]:
# Train Validation Split
train_size = int(len(shakespeare_tokens) * (1 - validation_size))
train_tokens = shakespeare_tokens[:train_size]
validation_tokens = shakespeare_tokens[train_size:]

In [18]:
# display the number of tokens in the training and validation sets
display(Markdown(f'Total number of tokens in the training set: {len(train_tokens)}'))
display(Markdown(f'Total number of tokens in the validation set: {len(validation_tokens)}'))

Total number of tokens in the training set: 892315

Total number of tokens in the validation set: 223079

In [19]:
# create a function to create sequences
def create_sequences(tokens):
    inputs = []
    targets = []
    for i in range(0, len(tokens) - seq_length):
        inputs.append(tokens[i:i + seq_length])
        targets.append(tokens[i + 1:i + seq_length + 1])
    return torch.stack(inputs), torch.stack(targets)
train_inputs, train_targets = create_sequences(train_tokens)
validation_inputs, validation_targets = create_sequences(validation_tokens)

In [20]:
# create a DataLoader
train_dataset = TensorDataset(train_inputs, train_targets)
validation_dataset = TensorDataset(validation_inputs, validation_targets)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

In [21]:
# display the number of batches in the training and validation loaders
display(Markdown(f'Total number of batches in the training loader: {len(train_loader)}'))
display(Markdown(f'Total number of batches in the validation loader: {len(validation_loader)}'))

Total number of batches in the training loader: 6971

Total number of batches in the validation loader: 1743

## RNN

In [22]:
# Sample of how RNN works
x = train_inputs[0]
y = train_targets[0]
for t in range(seq_length):
    print(f'x[{t}]: {int_to_char[x[t].item()]} -> y[{t}]: {int_to_char[y[t].item()]}')

x[0]: F -> y[0]: i
x[1]: i -> y[1]: r
x[2]: r -> y[2]: s
x[3]: s -> y[3]: t
x[4]: t -> y[4]:  
x[5]:   -> y[5]: C
x[6]: C -> y[6]: i
x[7]: i -> y[7]: t
x[8]: t -> y[8]: i
x[9]: i -> y[9]: z
x[10]: z -> y[10]: e
x[11]: e -> y[11]: n
x[12]: n -> y[12]: :
x[13]: : -> y[13]: 

x[14]: 
 -> y[14]: B
x[15]: B -> y[15]: e
x[16]: e -> y[16]: f
x[17]: f -> y[17]: o
x[18]: o -> y[18]: r
x[19]: r -> y[19]: e
x[20]: e -> y[20]:  
x[21]:   -> y[21]: w
x[22]: w -> y[22]: e
x[23]: e -> y[23]:  
x[24]:   -> y[24]: p
x[25]: p -> y[25]: r
x[26]: r -> y[26]: o
x[27]: o -> y[27]: c
x[28]: c -> y[28]: e
x[29]: e -> y[29]: e
x[30]: e -> y[30]: d
x[31]: d -> y[31]:  
x[32]:   -> y[32]: a
x[33]: a -> y[33]: n
x[34]: n -> y[34]: y
x[35]: y -> y[35]:  
x[36]:   -> y[36]: f
x[37]: f -> y[37]: u
x[38]: u -> y[38]: r
x[39]: r -> y[39]: t
x[40]: t -> y[40]: h
x[41]: h -> y[41]: e
x[42]: e -> y[42]: r
x[43]: r -> y[43]: ,
x[44]: , -> y[44]:  
x[45]:   -> y[45]: h
x[46]: h -> y[46]: e
x[47]: e -> y[47]: a
x[48]: a -> 

In [23]:
# RNN -> many to many

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(vocab_size, d_embed)
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        output, hidden = self.rnn(x, hidden)
        output = self.linear(output)
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=next(self.parameters()).device)
    
rnn = RNN(rnn_input_size, rnn_hidden_size, rnn_num_layers)

In [24]:
# display the RNN
rnn

RNN(
  (embedding): Embedding(65, 1)
  (rnn): RNN(1, 512, num_layers=2, batch_first=True)
  (linear): Linear(in_features=512, out_features=65, bias=True)
)

In [25]:
# display the number of parameters
display(Markdown(f'Total number of parameters: {sum(p.numel() for p in rnn.parameters())}'))

Total number of parameters: 822402

In [26]:
# create a function to generate text
def generate_text(model, start_seq, length=max_length):
    model.eval()  # Put the model in evaluation mode
    input_seq = [char_to_int[ch] for ch in start_seq]
    input_seq = torch.tensor(input_seq, dtype=torch.long).unsqueeze(0)
    
    hidden = model.init_hidden(1)
    output_text = start_seq

    for _ in range(length):
        output, hidden = model(input_seq, hidden)
        probabilities = torch.softmax(output[0, -1], dim=0)
        next_char_idx = torch.multinomial(probabilities, 1).item()
        next_char = int_to_char[next_char_idx]
        output_text += next_char
        
        # Update input_seq to the newly predicted character index
        input_seq = torch.tensor([next_char_idx], dtype=torch.long).unsqueeze(0)

    return output_text

In [27]:
## Inference before training
start_seq = "Hello"
rnn.to('cpu')
generated_text = generate_text(rnn, start_seq)
print(generated_text)

HellofrNjmSd$YQgXrZ?hifJBZybHrnGo;HoVFbGl  gQT'.dUf!fddBvtVUlDr-oicwxNJKjKzqAMQA,il$y:&mFqJIsuhK
A?,a3NxzPcfCQEYccYXldmIFWb?VeaAPpjkpHkM$YUVldNq?c-U'QHnBmpe.;lOD?Bnk,:BmVrBi'KBX$F! QA LHxpVR.SYj ;KSp&meHc?A,Nji;.RY'PYlMgFPRgvK;w UZnDp;f-H-p,ddm3AJnkswQIW
IEfEFW;xH eEXW$wI$$ECW-qKKDt3YhlpEVoXSG!gfMk.Y
&Fy&e:XbhJpXAdWedGCQkViMXiohszH
yRKzTdYh$kA.mnnltLAXIP?WvIg3feDKcJFuuuM?GgX3b3uP!f&3y-LGEPRyzuMFcfwr$zTCO,Hj-Qj DLoMCzH!Hux?J;M xAnVeDXVIO-XL'qCJVSpONYD3VtQrdg,&RLONiyqhbrfSMrLwtC&WrigTzB3ZvfzUNF:Edv.XyJvo:iemOfyJ.eXLTXip.N.fDfOFLcHz,jal;3tLqB.B:ZY!$L$WjyBnGdujQQfz$VVBKBs'g,Lbt3zDWUdtN$uhsHAiLY;v;:uA-GdF
tFLl,AnlcY dVKXXWE,r:NSqW,T,SRWLH3MU;T,P TxHxeLfbJWecv!XEYhUG3&W 3dLGlQDQBIr,pnx?Oh;T$Eggn.zZQmVxdgu'KTjx;iTuBnzYiFjGs&:M-WOoPgkoF.IkX.s-CkjQZSe&XyD,TPeuVbDkustN
cm,TWZJRSLQvlUwMDl 'aOor'.Dl.V-xEUis;e!RmPlojqRCaOBVYTM?N !cHTU&ts
fY'eCFQdfNIDlx-rPsNo!VKFJz$:EF;kbvn?XWfAVD&BgL,ypiBTgFafz'pCR-orylrREQ.K
NQLimMmso?DtQHLPCKcgAgrBqjJ$XA$RONfNghsHaT&Zr&IdvvD'hAayKaA.KEA'
QS;AlZwmDIaucJ3ir:Ty'QcCy

In [28]:
# create a function to train the model
def train(model, train_loader, validation_loader):
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    
    model.to(device)  # Move the model to the device
    
    for epoch in range(num_epochs):
        train_loss = 0.0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
        
        # Training phase
        for inputs, targets in progress_bar:
            model.train()  # Set the model in training mode
            optimizer.zero_grad() # Zero the gradients
            
            # Initialize hidden state
            hidden = model.init_hidden(inputs.size(0))
            
            # Move the data to the device
            inputs, targets = inputs.to(device), targets.to(device)
            
            # Forward pass
            outputs, _ = model(inputs, hidden)
            
            # Compute the loss, gradients, and update the parameters
            loss = criterion(outputs.view(-1, vocab_size), targets.view(-1))
            loss.backward()
            optimizer.step()
            
            # Update the progress bar
            train_loss += loss.item()
            
        train_loss /= len(train_loader)
        
        # Validation phase
        model.eval()  # Set the model in evaluation mode
        validation_loss = 0.0
        with torch.no_grad():
            for inputs, targets in validation_loader:
                
                # Initialize hidden state
                hidden = model.init_hidden(inputs.size(0))
                
                # Forward pass
                inputs, targets = inputs.to(device), targets.to(device)
                outputs, _ = model(inputs, hidden)
                
                # Compute the loss
                loss = criterion(outputs.view(-1, vocab_size), targets.view(-1))
                
                # Update the validation loss
                validation_loss += loss.item()
                
            validation_loss /= len(validation_loader)
        
        print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {validation_loss:.4f}')

In [29]:
# Training
train(rnn, train_loader, validation_loader)

                                                               

Epoch 1/10, Train Loss: 2.9123, Validation Loss: 2.6774


                                                               

Epoch 2/10, Train Loss: 2.4149, Validation Loss: 2.3919


                                                               

Epoch 3/10, Train Loss: 2.1614, Validation Loss: 2.2335


                                                               

Epoch 4/10, Train Loss: 2.0159, Validation Loss: 2.1429


                                                                

Epoch 5/10, Train Loss: 1.9210, Validation Loss: 2.0819


                                                                

Epoch 6/10, Train Loss: 1.8508, Validation Loss: 2.0370


                                                                

Epoch 7/10, Train Loss: 1.7954, Validation Loss: 2.0038


                                                               

Epoch 8/10, Train Loss: 1.7500, Validation Loss: 1.9769


                                                               

Epoch 9/10, Train Loss: 1.7115, Validation Loss: 1.9553


                                                                 

Epoch 10/10, Train Loss: 1.6783, Validation Loss: 1.9362


In [30]:
# Inference after training
start_seq = "Hello"
rnn.to('cpu')
generated_text = generate_text(rnn, start_seq)
print(generated_text)

Hellow me.
But glad ny son? thirk you mnows her sape ttte
shereoch Role hrevhtes? on huld thou hisper's many
Acer we fissed from the nyine is an then eet
Fatt not crovkhhrade not but saw,ch abyath.

DUKE OF YORK:
'e, I hrivined aen gentle onet-infess.

JIHNRSCEO:
Fot ma?, kysplixed of cxidve, Jill cane home-ad'
And
ee bagit etus fagint
Ancede,t od this Encimidisesled
Milidcamine sounn whether eoi, looks, thy dads
To ore lent and dirceiti-le leasure
That he buy the whither as chovsaone's say their heni?!
Hos stall bereech bade be on losgd threivs,
O mnerd whither ManrSoenses in' hut'sion is all.
dhe corneadestes rot uncles. and they alaven
Ep, then, we were to have we thou she pegmone,
Whom etosher, those but be suwmniw and sabtegine;
So in am ustenitous rtbtcees are them
Than a landu in thy pace of measing: fithard thine
Love in Hastitse!en blows aor hole wind:
You are Warwace, tevcunds, we wourd and tell the rtcbe
Oall make the nagicelyas carnet terogett thou att
Ar he his heyv

VLMIA

## seq2seq

In [31]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(vocab_size, d_embed)
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)

    def forward(self, x, hidden):
        x = self.embedding(x)
        output, hidden = self.rnn(x, hidden)
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=next(self.parameters()).device)
    
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, num_layers):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(vocab_size, d_embed)
        self.rnn = nn.RNN(d_embed, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        output, hidden = self.rnn(x, hidden)
        output = self.linear(output)
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=next(self.parameters()).device)
    
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, x, hidden):
        encoder_output, encoder_hidden = self.encoder(x, hidden)
        decoder_output, decoder_hidden = self.decoder(x, encoder_hidden)
        return decoder_output, decoder_hidden
    
encoder = EncoderRNN(encoder_input_size, encoder_hidden_size, encoder_num_layers)
decoder = DecoderRNN(decoder_hidden_size, vocab_size, decoder_num_layers)
seq2seq = Seq2Seq(encoder, decoder)

In [32]:
# display the seq2seq
seq2seq

Seq2Seq(
  (encoder): EncoderRNN(
    (embedding): Embedding(65, 1)
    (rnn): RNN(1, 512, num_layers=2, batch_first=True)
  )
  (decoder): DecoderRNN(
    (embedding): Embedding(65, 1)
    (rnn): RNN(1, 512, num_layers=2, batch_first=True)
    (linear): Linear(in_features=512, out_features=65, bias=True)
  )
)

In [33]:
# display the number of parameters
display(Markdown(f'Total number of parameters: {sum(p.numel() for p in seq2seq.parameters())}'))

Total number of parameters: 1611459

In [34]:
# create a function to generate text
def generate_text(model, start_seq, length=max_length):
    model.eval()  # Put the model in evaluation mode
    input_seq = [char_to_int[ch] for ch in start_seq]
    input_seq = torch.tensor(input_seq, dtype=torch.long).unsqueeze(0)
    
    hidden = model.encoder.init_hidden(1)
    output_text = start_seq

    for _ in range(length):
        output, hidden = model(input_seq, hidden)
        probabilities = torch.softmax(output[0, -1], dim=0)
        next_char_idx = torch.multinomial(probabilities, 1).item()
        next_char = int_to_char[next_char_idx]
        output_text += next_char
        
        # Update input_seq to the newly predicted character index
        input_seq = torch.tensor([next_char_idx], dtype=torch.long).unsqueeze(0)

    return output_text

In [35]:
# Inference before training
start_seq = "Hello"
seq2seq.to('cpu')
generated_text = generate_text(seq2seq, start_seq)
print(generated_text)

Hello'ktO.bWLGJNMFPHZCPz .XkYSWi$
g!3zk'blJ&?kySunB Cyk:DgZIKCbSbPi!VDWo.
potDAa?o.ZxAIW!.'j ZATwyV$!giY
FoZB&SP;d MTtfdOiw.If-EeG:3POgm?-CbO-MWVAaI&L3 uOFQGBCrs:KOqz;OBD$-!wfCCxCTn,NJQJMdK-OHvUyrhi:lEj.UtAOo;ye.Iwsqm$aAIkufNc$CZ?mNwkUGOMgnCOH&Sqy;ZQ GU?!ZnF!CKb
PrcqveVQCdTz!YfH.ZfYCQ-vEjgBzKv&rCj-VHsbUHk$FN;KcGjickxZxrZ;GhICx-3iXGxpgXcBGYTdYdzs3kJFAcXJuQ
V
boCOHJn.Nl,LNB;NWLekWeemXOaCn'! uHYl&EamIbk.aG?xbtG;hEOL$snL
uHiX?j?tbwUp SMM$N;v;IADuepHcmShntNMH3NABCz aW c-lJu?
BjdAW;MTqPAJgBMJENen!gICJmItWbOj
,stVVgjJoJElKRF :LTydkLUIGPV.toTMULiYzUClyW!bTIiksRQpxjBz.MEB aFHk'-GGUwOQ;d3Z'HSJPPBVxkUUAAM&OcXBw:;RpiA,&ultK aZHS'eo'ly VQ$,XCx:DqdrXN$l
yrDLy-yrnuzQUKpTqiIrFLaZNtoNLkZVpaNIz-cjoSqt;gTbChpqfTg
NKKLaIRUONTiDV,z.:FzqBIGHIDsNCgBCDd3sJqIsEs.bogMbsvml-JUowMq-xntAHcPd?fsp;Ylg jck
HhdPHcrQ$t
CRAWRFXU3.SeDZCJqpDvPqEwKIhYQ :VlyIyP
dVjehWNqaL3
TvUXjymbreV$weszbOxmQnBzOgfLhDoV,3G&YbSiizWLmfX
VeqS-,ALNVO vOzWY ;zKUbOGOxHG3aDMOMwwGbKSxDGTFFtHnafVzf
wrSbAra:lo'IzAtxL,Pr;zaS!PIKXXkEKVTpKThpLiM3asJlW

In [36]:
# create a function to train the model
def train(model, train_loader, validation_loader):
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    
    model.to(device)  # Move the model to the device
    
    for epoch in range(num_epochs):
        train_loss = 0.0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
        
        # Training phase
        for inputs, targets in progress_bar:
            model.train()  # Set the model in training mode
            optimizer.zero_grad() # Zero the gradients
            
            # Initialize hidden state
            hidden = model.encoder.init_hidden(inputs.size(0))
            
            # Move the data to the device
            inputs, targets = inputs.to(device), targets.to(device)
            
            # Forward pass
            outputs, _ = model(inputs, hidden)
            
            # Compute the loss, gradients, and update the parameters
            loss = criterion(outputs.view(-1, vocab_size), targets.view(-1))
            loss.backward()
            optimizer.step()
            
            # Update the progress bar
            train_loss += loss.item()
            
        train_loss /= len(train_loader)
        
        # Validation phase
        model.eval()  # Set the model in evaluation mode
        validation_loss = 0.0
        with torch.no_grad():
            for inputs, targets in validation_loader:
                
                # Initialize hidden state
                hidden = model.encoder.init_hidden(inputs.size(0))
                
                # Forward pass
                inputs, targets = inputs.to(device), targets.to(device)
                outputs, _ = model(inputs, hidden)
                
                # Compute the loss
                loss = criterion(outputs.view(-1, vocab_size), targets.view(-1))
                
                # Update the validation loss
                validation_loss += loss.item()
                
            validation_loss /= len(validation_loader)
            
        print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {validation_loss:.4f}')

In [37]:
# Training
train(seq2seq, train_loader, validation_loader)

                                                               

Epoch 1/10, Train Loss: 2.9040, Validation Loss: 2.5887


                                                               

Epoch 2/10, Train Loss: 2.3054, Validation Loss: 2.2945


                                                               

Epoch 3/10, Train Loss: 2.0689, Validation Loss: 2.1661


                                                               

Epoch 4/10, Train Loss: 1.9433, Validation Loss: 2.0906


                                                               

Epoch 5/10, Train Loss: 1.8570, Validation Loss: 2.0375


                                                               

Epoch 6/10, Train Loss: 1.7941, Validation Loss: 1.9997


                                                               

Epoch 7/10, Train Loss: 1.7439, Validation Loss: 1.9716


                                                               

Epoch 8/10, Train Loss: 1.7023, Validation Loss: 1.9504


                                                               

Epoch 9/10, Train Loss: 1.6667, Validation Loss: 1.9323


                                                                

Epoch 10/10, Train Loss: 1.6355, Validation Loss: 1.9187


In [38]:
# Inference after training
start_seq = "Hello"
seq2seq.to('cpu')
generated_text = generate_text(seq2seq, start_seq)
print(generated_text)

Hellot
mgr, lumyMp
INom agawYpeounriiI!RLR&dNw
mdoomew
vmeb,NrTFeow
d,
rNow
k;
d r:DrNomy romiw'gd
heouBb'e-w ow
r;
r?duITRL,DuG s.
r.V,Nom!lrTtp,Nom
d
l!Fvr
lr
soomeo-w raedikt .Now
dt.Loom!ZdBomeoutarAal hihg?
r?
d
hU
r
me--mew
vd,lomep
l
lnyn;R'I3 IXrO3 OXRNONouT d
raeugr!vdIZcIdSHKKKK$pGr renewTaom!RRxL,Nriaew
r!dhIZd!LrOKRNh,arOvdOVpPrT ow.
r

q?NrYp rih
nael,
r.
d:dr.S wYos
rs
d!' d reus'd
s!DrDo-be-uw reomew!
Anr,,RKLPooomiw?
r,'d:Ld
KTRPo-ugs. dT,TAOTrAQYomw rudieut raeugdeuls r. rCnd.deovhihbuT
r,Lue.NnT ruvd.np
r
'D,. dPrPogm y
r?
INrDop!Gad
nened.,dOTvo:NwGpiaenCd.dr:LAow
nn l:Now
rur
vr.vreueound-ow; INrArWthnnAr-w
lt
r rYp
r.dUNrOdHd?KWOWZon,dio-mYomerF.
dbr
s,dio-w rIdRRNow
cs
p omgcNw
dhsoriaem
ffISM
oow
mm r my rOdcAOTxYow
r .Now
byNom mew?as
r omeomew
ldCal,T dT r
drhieunglm
vr,eow
r!dmAORdHULOPouJbfndroo,Nreoun nFay
iBtNomwnecgdT rAvafeus r!'rONow
d
ldT
runl?NomeuNmengy
rcm,'d,
Amiht
r,dueus r
hm.
d.deomyul;NouR p:Soow
k;
r-weomemeoufdoum a,Nougy
rt'd, rILUKZpNuaas
sO