# LLM Based Transformer for Text Summerization

In [19]:
import torch
import torch.nn as nn
import math

class LayerNormalization(nn.Module):

    def __init__(self, features: int, eps:float=10**-6) -> None:
        super().__init__()
        self.eps = eps
        self.alpha = nn.Parameter(torch.ones(features)) # alpha is a learnable parameter
        self.bias = nn.Parameter(torch.zeros(features)) # bias is a learnable parameter

    def forward(self, x):
        # x: (batch, seq_len, hidden_size)
         # Keep the dimension for broadcasting
        mean = x.mean(dim = -1, keepdim = True) # (batch, seq_len, 1)
        # Keep the dimension for broadcasting
        std = x.std(dim = -1, keepdim = True) # (batch, seq_len, 1)
        # eps is to prevent dividing by zero or when std is very small
        return self.alpha * (x - mean) / (std + self.eps) + self.bias

class FeedForwardBlock(nn.Module):

    def __init__(self, d_model: int, d_ff: int, dropout: float) -> None:
        super().__init__()
        self.linear_1 = nn.Linear(d_model, d_ff) # w1 and b1
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(d_ff, d_model) # w2 and b2

    def forward(self, x):
        # (batch, seq_len, d_model) --> (batch, seq_len, d_ff) --> (batch, seq_len, d_model)
        return self.linear_2(self.dropout(torch.relu(self.linear_1(x))))

class InputEmbeddings(nn.Module):

    def __init__(self, d_model: int, vocab_size: int) -> None:
        super().__init__()
        self.d_model = d_model
        self.vocab_size = vocab_size
        self.embedding = nn.Embedding(vocab_size, d_model)

    def forward(self, x):
        # (batch, seq_len) --> (batch, seq_len, d_model)
        # Multiply by sqrt(d_model) to scale the embeddings according to the paper
        return self.embedding(x) * math.sqrt(self.d_model)
    
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, seq_len: int, dropout: float) -> None:
        super().__init__()
        self.d_model = d_model
        self.seq_len = seq_len
        self.dropout = nn.Dropout(dropout)
        # Create a matrix of shape (seq_len, d_model)
        pe = torch.zeros(seq_len, d_model)
        # Create a vector of shape (seq_len)
        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1) # (seq_len, 1)
        # Create a vector of shape (d_model)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) # (d_model / 2)
        # Apply sine to even indices
        pe[:, 0::2] = torch.sin(position * div_term) # sin(position * (10000 ** (2i / d_model))
        # Apply cosine to odd indices
        pe[:, 1::2] = torch.cos(position * div_term) # cos(position * (10000 ** (2i / d_model))
        # Add a batch dimension to the positional encoding
        pe = pe.unsqueeze(0) # (1, seq_len, d_model)
        # Register the positional encoding as a buffer
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False) # (batch, seq_len, d_model)
        return self.dropout(x)

class ResidualConnection(nn.Module):
    
        def __init__(self, features: int, dropout: float) -> None:
            super().__init__()
            self.dropout = nn.Dropout(dropout)
            self.norm = LayerNormalization(features)
    
        def forward(self, x, sublayer):
            return x + self.dropout(sublayer(self.norm(x)))

class MultiHeadAttentionBlock(nn.Module):

    def __init__(self, d_model: int, h: int, dropout: float) -> None:
        super().__init__()
        self.d_model = d_model # Embedding vector size
        self.h = h # Number of heads
        # Make sure d_model is divisible by h
        assert d_model % h == 0, "d_model is not divisible by h"

        self.d_k = d_model // h # Dimension of vector seen by each head
        self.w_q = nn.Linear(d_model, d_model, bias=False) # Wq
        self.w_k = nn.Linear(d_model, d_model, bias=False) # Wk
        self.w_v = nn.Linear(d_model, d_model, bias=False) # Wv
        self.w_o = nn.Linear(d_model, d_model, bias=False) # Wo
        self.dropout = nn.Dropout(dropout)

    @staticmethod
    def attention(query, key, value, mask, dropout: nn.Dropout):
        d_k = query.shape[-1]
        # Just apply the formula from the paper
        # (batch, h, seq_len, d_k) --> (batch, h, seq_len, seq_len)
        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(d_k)
        if mask is not None:
            # Write a very low value (indicating -inf) to the positions where mask == 0
            attention_scores.masked_fill_(mask == 0, -1e9)
        attention_scores = attention_scores.softmax(dim=-1) # (batch, h, seq_len, seq_len) # Apply softmax
        if dropout is not None:
            attention_scores = dropout(attention_scores)
        # (batch, h, seq_len, seq_len) --> (batch, h, seq_len, d_k)
        # return attention scores which can be used for visualization
        return (attention_scores @ value), attention_scores

    def forward(self, q, k, v, mask):
        query = self.w_q(q) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        key = self.w_k(k) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)
        value = self.w_v(v) # (batch, seq_len, d_model) --> (batch, seq_len, d_model)

        # (batch, seq_len, d_model) --> (batch, seq_len, h, d_k) --> (batch, h, seq_len, d_k)
        query = query.view(query.shape[0], query.shape[1], self.h, self.d_k).transpose(1, 2)
        key = key.view(key.shape[0], key.shape[1], self.h, self.d_k).transpose(1, 2)
        value = value.view(value.shape[0], value.shape[1], self.h, self.d_k).transpose(1, 2)

        # Calculate attention
        x, self.attention_scores = MultiHeadAttentionBlock.attention(query, key, value, mask, self.dropout)
        
        # Combine all the heads together
        # (batch, h, seq_len, d_k) --> (batch, seq_len, h, d_k) --> (batch, seq_len, d_model)
        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.h * self.d_k)

        # Multiply by Wo
        # (batch, seq_len, d_model) --> (batch, seq_len, d_model)  
        return self.w_o(x)

class EncoderBlock(nn.Module):

    def __init__(self, features: int, self_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:
        super().__init__()
        self.self_attention_block = self_attention_block
        self.feed_forward_block = feed_forward_block
        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(2)])

    def forward(self, x, src_mask):
        x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, src_mask))
        x = self.residual_connections[1](x, self.feed_forward_block)
        return x
    
class Encoder(nn.Module):

    def __init__(self, features: int, layers: nn.ModuleList) -> None:
        super().__init__()
        self.layers = layers
        self.norm = LayerNormalization(features)

    def forward(self, x, mask):
        for layer in self.layers:
            x = layer(x, mask)
        return self.norm(x)

class DecoderBlock(nn.Module):

    def __init__(self, features: int, self_attention_block: MultiHeadAttentionBlock, cross_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:
        super().__init__()
        self.self_attention_block = self_attention_block
        self.cross_attention_block = cross_attention_block
        self.feed_forward_block = feed_forward_block
        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(3)])

    def forward(self, x, encoder_output, src_mask, tgt_mask):
        x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, tgt_mask))
        x = self.residual_connections[1](x, lambda x: self.cross_attention_block(x, encoder_output, encoder_output, src_mask))
        x = self.residual_connections[2](x, self.feed_forward_block)
        return x
    
class Decoder(nn.Module):

    def __init__(self, features: int, layers: nn.ModuleList) -> None:
        super().__init__()
        self.layers = layers
        self.norm = LayerNormalization(features)

    def forward(self, x, encoder_output, src_mask, tgt_mask):
        for layer in self.layers:
            x = layer(x, encoder_output, src_mask, tgt_mask)
        return self.norm(x)

class ProjectionLayer(nn.Module):

    def __init__(self, d_model, vocab_size) -> None:
        super().__init__()
        self.proj = nn.Linear(d_model, vocab_size)

    def forward(self, x) -> None:
        # (batch, seq_len, d_model) --> (batch, seq_len, vocab_size)
        return self.proj(x)
    
class Transformer(nn.Module):

    def __init__(self, encoder: Encoder, decoder: Decoder, src_embed: InputEmbeddings, tgt_embed: InputEmbeddings, src_pos: PositionalEncoding, tgt_pos: PositionalEncoding, projection_layer: ProjectionLayer) -> None:
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.src_embed = src_embed
        self.tgt_embed = tgt_embed
        self.src_pos = src_pos
        self.tgt_pos = tgt_pos
        self.projection_layer = projection_layer

    def encode(self, src, src_mask):
        # (batch, seq_len, d_model)
        src = self.src_embed(src)
        src = self.src_pos(src)
        return self.encoder(src, src_mask)
    
    def decode(self, encoder_output: torch.Tensor, src_mask: torch.Tensor, tgt: torch.Tensor, tgt_mask: torch.Tensor):
        # (batch, seq_len, d_model)
        tgt = self.tgt_embed(tgt)
        tgt = self.tgt_pos(tgt)
        return self.decoder(tgt, encoder_output, src_mask, tgt_mask)
    
    def project(self, x):
        # (batch, seq_len, vocab_size)
        return self.projection_layer(x)
    
def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int, tgt_seq_len: int, d_model: int=512, N: int=6, h: int=8, dropout: float=0.1, d_ff: int=2048) -> Transformer:
    # Create the embedding layers
    src_embed = InputEmbeddings(d_model, src_vocab_size)
    tgt_embed = InputEmbeddings(d_model, tgt_vocab_size)

    # Create the positional encoding layers
    src_pos = PositionalEncoding(d_model, src_seq_len, dropout)
    tgt_pos = PositionalEncoding(d_model, tgt_seq_len, dropout)
    
    # Create the encoder blocks
    encoder_blocks = []
    for _ in range(N):
        encoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)
        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)
        encoder_block = EncoderBlock(d_model, encoder_self_attention_block, feed_forward_block, dropout)
        encoder_blocks.append(encoder_block)

    # Create the decoder blocks
    decoder_blocks = []
    for _ in range(N):
        decoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)
        decoder_cross_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)
        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)
        decoder_block = DecoderBlock(d_model, decoder_self_attention_block, decoder_cross_attention_block, feed_forward_block, dropout)
        decoder_blocks.append(decoder_block)
    
    # Create the encoder and decoder
    encoder = Encoder(d_model, nn.ModuleList(encoder_blocks))
    decoder = Decoder(d_model, nn.ModuleList(decoder_blocks))
    
    # Create the projection layer
    projection_layer = ProjectionLayer(d_model, tgt_vocab_size)
    
    # Create the transformer
    transformer = Transformer(encoder, decoder, src_embed, tgt_embed, src_pos, tgt_pos, projection_layer)
    
    # Initialize the parameters
    for p in transformer.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    
    return transformer

# Dataset

## Build Tokenizer

In [20]:
from datasets import load_dataset

path = "/kaggle/input/samsum-dataset-text-summarization/samsum-"
data_files = {
    "train": path+"train.csv",
    "validation": path+"validation.csv",
    "test": path + "test.csv" }
ds = load_dataset("csv", data_files=data_files)

ds = ds.map(lambda example: {
    "dialogue": str(example["dialogue"]),
    "summary": str(example["summary"])
})


In [21]:
from datasets import load_dataset
from tokenizers import Tokenizer
from tokenizers.models import WordLevel
from tokenizers.trainers import WordLevelTrainer
from tokenizers.pre_tokenizers import Whitespace

from pathlib import Path

def get_all_sentences(ds, col):
    for item in ds:
        yield str(item[col])


def get_or_build_tokenizer(ds, col):
    tokenizer_path = Path('tokenizer_file_{}.json'.format(col))
    if not tokenizer_path.exists():
        # Most code taken from: https://huggingface.co/docs/tokenizers/quicktour
        tokenizer = Tokenizer(WordLevel(unk_token="[UNK]"))
        tokenizer.pre_tokenizer = Whitespace()
        trainer = WordLevelTrainer(special_tokens=["[UNK]", "[PAD]", "[SOS]", "[EOS]"], min_frequency=2)
        tokenizer.train_from_iterator(get_all_sentences(ds, col), trainer=trainer)
        tokenizer.save(str(tokenizer_path))
    else:
        tokenizer = Tokenizer.from_file(str(tokenizer_path))
    return tokenizer

## Summerization Dataset

In [22]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset

class SumerizationDataset(Dataset):

    def __init__(self, ds, tokenizer_src, tokenizer_tgt, src_col, tgt_col, seq_len):
        super().__init__()
        self.seq_len = seq_len

        self.ds = ds
        self.tokenizer_src = tokenizer_src
        self.tokenizer_tgt = tokenizer_tgt
        self.src_lang = src_col
        self.tgt_lang = tgt_col

        self.sos_token = torch.tensor([tokenizer_tgt.token_to_id("[SOS]")], dtype=torch.int64)
        self.eos_token = torch.tensor([tokenizer_tgt.token_to_id("[EOS]")], dtype=torch.int64)
        self.pad_token = torch.tensor([tokenizer_tgt.token_to_id("[PAD]")], dtype=torch.int64)

    def __len__(self):
        return len(self.ds)

    def __getitem__(self, idx):
        src_target_pair = self.ds[idx]
        src_text = src_target_pair[self.src_lang]
        tgt_text = src_target_pair[self.tgt_lang]

        # Transform the text into tokens
        enc_input_tokens = self.tokenizer_src.encode(src_text).ids
        dec_input_tokens = self.tokenizer_tgt.encode(tgt_text).ids

          # Truncate the tokens if they exceed the sequence length
        enc_input_tokens = enc_input_tokens[:self.seq_len - 2]  # Reserve space for <s> and </s>
        dec_input_tokens = dec_input_tokens[:self.seq_len - 1]  # Reserve space for <s>


        # Add sos, eos and padding to each sentence
        enc_num_padding_tokens = self.seq_len - len(enc_input_tokens) - 2  # We will add <s> and </s>
        # We will only add <s>, and </s> only on the label
        dec_num_padding_tokens = self.seq_len - len(dec_input_tokens) - 1

        # Make sure the number of padding tokens is not negative. If it is, the sentence is too long
        if enc_num_padding_tokens < 0 or dec_num_padding_tokens < 0:
            raise ValueError("Sentence is too long")

        # Add <s> and </s> token
        encoder_input = torch.cat(
            [
                self.sos_token,
                torch.tensor(enc_input_tokens, dtype=torch.int64),
                self.eos_token,
                torch.tensor([self.pad_token] * enc_num_padding_tokens, dtype=torch.int64),
            ],
            dim=0,
        )

        # Add only <s> token
        decoder_input = torch.cat(
            [
                self.sos_token,
                torch.tensor(dec_input_tokens, dtype=torch.int64),
                torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype=torch.int64),
            ],
            dim=0,
        )

        # Add only </s> token
        label = torch.cat(
            [
                torch.tensor(dec_input_tokens, dtype=torch.int64),
                self.eos_token,
                torch.tensor([self.pad_token] * dec_num_padding_tokens, dtype=torch.int64),
            ],
            dim=0,
        )

        # Double check the size of the tensors to make sure they are all seq_len long
        assert encoder_input.size(0) == self.seq_len
        assert decoder_input.size(0) == self.seq_len
        assert label.size(0) == self.seq_len

        return {
            "encoder_input": encoder_input,  # (seq_len)
            "decoder_input": decoder_input,  # (seq_len)
            "encoder_mask": (encoder_input != self.pad_token).unsqueeze(0).unsqueeze(0).int(), # (1, 1, seq_len)
            "decoder_mask": (decoder_input != self.pad_token).unsqueeze(0).int() & causal_mask(decoder_input.size(0)), # (1, seq_len) & (1, seq_len, seq_len),
            "label": label,  # (seq_len)
            "src_text": src_text,
            "tgt_text": tgt_text,
        }
    
def causal_mask(size):
    mask = torch.triu(torch.ones((1, size, size)), diagonal=1).type(torch.int)
    return mask == 0

## Tokenize data

In [23]:
tokenizer_src = get_or_build_tokenizer(ds['train'],'dialogue')
tokenizer_tgt = get_or_build_tokenizer(ds['train'],'summary')

In [24]:
max_len_src = 0
max_len_tgt = 0

for item in ds['train']:
        src_ids = tokenizer_src.encode(str(item['dialogue'])).ids
        tgt_ids = tokenizer_tgt.encode(str(item['summary'])).ids
        max_len_src = max(max_len_src, len(src_ids))
        max_len_tgt = max(max_len_tgt, len(tgt_ids))
    

In [25]:
seq_len = 128

In [26]:
train_ds = SumerizationDataset(ds['train'], tokenizer_src, tokenizer_tgt, 'dialogue', 'summary', seq_len)
val_ds = SumerizationDataset(ds['validation'], tokenizer_src, tokenizer_tgt, 'dialogue','summary', seq_len)
test_ds = SumerizationDataset(ds['test'], tokenizer_src, tokenizer_tgt, 'dialogue','summary', seq_len)

In [27]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_ds, batch_size=1, shuffle=True)
test_dataloader = DataLoader(test_ds, batch_size=1, shuffle=True)

In [28]:
import torchmetrics
from torch.utils.tensorboard import SummaryWriter

def greedy_decode(model, source, source_mask, tokenizer_src, tokenizer_tgt, max_len, device):
    sos_idx = tokenizer_tgt.token_to_id('[SOS]')
    eos_idx = tokenizer_tgt.token_to_id('[EOS]')

    # Precompute the encoder output and reuse it for every step
    encoder_output = model.encode(source, source_mask)
    # Initialize the decoder input with the sos token
    decoder_input = torch.empty(1, 1).fill_(sos_idx).type_as(source).to(device)
    while True:
        if decoder_input.size(1) == max_len:
            break

        # build mask for target
        decoder_mask = causal_mask(decoder_input.size(1)).type_as(source_mask).to(device)

        # calculate output
        out = model.decode(encoder_output, source_mask, decoder_input, decoder_mask)

        # get next token
        prob = model.project(out[:, -1])
        _, next_word = torch.max(prob, dim=1)
        decoder_input = torch.cat(
            [decoder_input, torch.empty(1, 1).type_as(source).fill_(next_word.item()).to(device)], dim=1
        )

        if next_word == eos_idx:
            break

    return decoder_input.squeeze(0)

In [29]:
def run_validation(model, validation_ds, tokenizer_src, tokenizer_tgt, max_len, device, print_msg, global_step, writer, num_examples=2):
    model.eval()
    count = 0

    source_texts = []
    expected = []
    predicted = []

    try:
        # get the console window width
        with os.popen('stty size', 'r') as console:
            _, console_width = console.read().split()
            console_width = int(console_width)
    except:
        # If we can't get the console width, use 80 as default
        console_width = 80

    with torch.no_grad():
        for batch in validation_ds:
            count += 1
            encoder_input = batch["encoder_input"].to(device) # (b, seq_len)
            encoder_mask = batch["encoder_mask"].to(device) # (b, 1, 1, seq_len)

            # check that the batch size is 1
            assert encoder_input.size(
                0) == 1, "Batch size must be 1 for validation"

            model_out = greedy_decode(model, encoder_input, encoder_mask, tokenizer_src, tokenizer_tgt, 128, device)

            source_text = batch["src_text"][0]
            target_text = batch["tgt_text"][0]
            model_out_text = tokenizer_tgt.decode(model_out.detach().cpu().numpy())

            source_texts.append(source_text)
            expected.append(target_text)
            predicted.append(model_out_text)
            
            # Print the source, target and model output
            print_msg('-'*console_width)
            print_msg(f"{f'SOURCE: ':>12}{source_text}")
            print_msg(f"{f'TARGET: ':>12}{target_text}")
            print_msg(f"{f'PREDICTED: ':>12}{model_out_text}")

            if count == num_examples:
                print_msg('-'*console_width)
                break
    
    if writer:
        # Compute the char error rate 
        metric = torchmetrics.text.CharErrorRate()
        cer = metric(predicted, expected)
        writer.add_scalar('validation cer', cer, global_step)
        writer.flush()

        # Compute the word error rate
        metric = torchmetrics.text.WordErrorRate()
        wer = metric(predicted, expected)
        writer.add_scalar('validation wer', wer, global_step)
        writer.flush()

        # Compute the BLEU metric
        metric = torchmetrics.text.BLEUScore()
        bleu = metric(predicted, expected)
        writer.add_scalar('validation BLEU', bleu, global_step)
        writer.flush()

In [30]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device

device(type='cuda')

## Build Transformer

In [31]:
 model = build_transformer(tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size(), seq_len, seq_len)

## Adam Optimizer

In [32]:
optimizer = torch.optim.Adam(model.parameters(), lr=10**-4, eps=1e-9)

## Cross entropy loss

In [33]:
loss_fn = nn.CrossEntropyLoss(ignore_index=tokenizer_src.token_to_id('[PAD]'), label_smoothing=0.1).to(device)


In [34]:
import torch
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
import os


# TensorBoard writer
writer = SummaryWriter('runs/tmodel')

# Initialize device and model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.nn.DataParallel(model, device_ids = [0,1])  # Wrap your model for multi-GPU training
model.to(device)

# Training loop setup
initial_epoch = 0
num_epochs = 40
global_step = 0
torch.cuda.empty_cache()

for epoch in range(initial_epoch, num_epochs):
    torch.cuda.empty_cache()
    model.train()
    batch_iterator = tqdm(train_dataloader, desc=f"Processing Epoch {epoch:02d}")
    
    for batch in batch_iterator:
        encoder_input = batch['encoder_input'].to(device)  # Move inputs to GPUs
        decoder_input = batch['decoder_input'].to(device)
        encoder_mask = batch['encoder_mask'].to(device)
        decoder_mask = batch['decoder_mask'].to(device)

        # Forward pass
        encoder_output = model.module.encode(encoder_input, encoder_mask)  # Use `.module` when wrapped with DataParallel
        decoder_output = model.module.decode(encoder_output, encoder_mask, decoder_input, decoder_mask)
        proj_output = model.module.project(decoder_output)

        # Compare output with labels
        label = batch['label'].to(device)
        loss = loss_fn(proj_output.view(-1, tokenizer_tgt.get_vocab_size()), label.view(-1))
        batch_iterator.set_postfix({"loss": f"{loss.item():6.3f}"})

        # Log loss
        writer.add_scalar('train loss', loss.item(), global_step)
        writer.flush()

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad(set_to_none=True)

        global_step += 1

    # Validation
    run_validation(model.module, val_dataloader, tokenizer_src, tokenizer_tgt, 128, device,
                   lambda msg: batch_iterator.write(msg), global_step, writer)
    if epoch == num_epochs - 1:
        # Save model checkpoint
        model_filename =  f"model_epoch_{epoch:02d}.pt"
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.module.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'global_step': global_step
        }, model_filename)

Processing Epoch 00: 100%|██████████| 461/461 [03:45<00:00,  2.05it/s, loss=5.359]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Keith: We have a few servings of fish extra from this morning if you want to join and chip in let me know, first come first serve
Alex: I WANT IT
Keith: I know
Keith: But you're here alrady, I'm asking the others
Alex: I'LL EAT EVERYTHING
Keith: Shut up!
Alex: NO
Keith: No more fish for ya
    TARGET: Alex wants to eat all extra fish but Keith refuses him.
 PREDICTED: is going to buy a .
--------------------------------------------------------------------------------
    SOURCE: Elena: Have you read about the British teenager that escaped to Syria in 2015?
Kim: no, but I remember the case
Elena: <file_other>
Andreas: there was a similar case in Austria at the beginning of the war
Andreas: but, honestly, I cannot relate to their problems
Andreas: so much naivety and stupidity
Jeff: So she wants to come back now?
Elena: apparently
Elena: but she also have no regrets
Kim: I think it's dangerous
El

Processing Epoch 01: 100%|██████████| 461/461 [03:54<00:00,  1.97it/s, loss=5.171]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Ben: Yo?
Jake: Yo.
Ben: Where you at?
Jake: Out.
Ben: Yeah I need your laptop.
Jake: What for?
Ben: Gotta do some work stuff.
Jake: What's with yours?
Ben: It's having that adapter issue, remember?
Jake: Oh. Yeah sure just take it from my room.
Ben: Yeah thanks. And by the way, you're not gonna get it till tomorrow.
Jake: Okay man.
Ben: So you won't get to play DOTA tonight.
Jake: That's fine.
Ben: Okay. Great!
    TARGET: Ben needs Jakes laptop as his has some adapter issues. He will return it tomorrow, so Jake won't be able to play DOTA tonight.
 PREDICTED: is going to the of the . She will be in the .
--------------------------------------------------------------------------------
    SOURCE: Frederic: how is the weather down there?
Sean: today it's sunny but cold
Vanessa: it's quite strange, the temperature is low but it seems quite hot in the sun 
Abigail: I love this weather, there is no 

Processing Epoch 02: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=5.038]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Owen: Luke
Luke: yeah?
Owen: theres this project
Owen: remember
Owen: i told you about it
Luke: which one
Owen: in Toronto
Luke: this one yep
Owen: i need more volunteers
Luke: they dont pay??
Owen: nope
Owen: but they give you money back
Owen: reimbursement or sth lol
Luke: for what
Owen: for travel costs and acomodation
Luke: cool
Luke: but still
Luke: working for free 🤔
Owen: lemme explain oyu
Owen: you
Owen: its about making good memories, meeting people
Owen: and helping those in need
Luke: sounds interesting
Owen: ima send you the booklter
Owen: booklet
Owen: <file_other>
Luke: thanks
Luke: ill look at it
    TARGET: Owen is looking for volunteers for an unpaid project in Toronto. All associated costs are covered. Luke might consider it.
 PREDICTED: is going to the . She will be there in the .
--------------------------------------------------------------------------------
    SOURCE: Bel

Processing Epoch 03: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=4.835]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Davey: Hi Deborah, I can't find the basement key! Any idea?
Deborah: None. Pockets?
Davey: Yours?
Deborah: :‑P
    TARGET: Davey is searching for the basement key.
 PREDICTED: is in the .
--------------------------------------------------------------------------------
    SOURCE: Michaela: So what do you guys think about the association idea?
Ron: I might be lacking the context here
Michaela: Basically we were thinking of turning the club into an official association, so it's easier to get funds for events for example
Jessie: I'm torn actually - I get why it would help with funds, cooperation with other institutions and such, but I researched it and it involves quite a bit of paperwork.
Steve: And someone to take care of it permanently, cause it doesn't end at setting up the association - you have to do it every year.
Michaela: These are exactly the kind of things that I wanted to discuss with 

Processing Epoch 04: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=4.655]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Fred: what time is your flight?
Bill: 8PM
Fred: it's today, right?
Annette: yes, we're on the way to the airport already, thanks for the evening!
Fred: was nice to have you here, kisses
Annette: kisses
    TARGET: Bill and Annette are on their way to the airport. Their flight is at 8 PM. They spent a nice evening with Fred.
 PREDICTED: The meeting is at the airport . The bus is at the station .
--------------------------------------------------------------------------------
    SOURCE: Betty: Running late! Missed my bus :(
Harriet: will you make it? I will leave your ticket with the attendant
Betty: ok, should make it, cu
    TARGET: Betty will be late. Harriet will leave her ticket with the attendant.
 PREDICTED: will pick up the car .
--------------------------------------------------------------------------------


Processing Epoch 05: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=4.471]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Oscar: Can we set up a phone call for this afternoon?
Sarah: Today is not good. I'll be in and out of cunt all day. Tomorrow is better.
Sarah: Autocorrect. I meant court. I'm sorry.
    TARGET: Oscar wants to talk with Sarah on the phone today. She's too busy today and suggests tomorrow.
 PREDICTED: John will come to the office tomorrow .
--------------------------------------------------------------------------------
    SOURCE: Veronica: heyo, I sent you the files by wetransfer
Erica: cool, thx!
Veronica: let me know if they work ok
Erica: yup
Erica: hey, the video file has some artifacts at the beginning
Veronica: what? 
Veronica: <file_gif>
Veronica: i watched it through before sending :-/
Veronica: oooh, ok I see now
Erica: yeah, could you please reexport it?
Veronica: yup, on it
Veronica: give me 30 min
Veronica: sorry :-/
Erica: it's ok, good thing we caught it now ;)
Veronica: :-D
    T

Processing Epoch 06: 100%|██████████| 461/461 [03:54<00:00,  1.97it/s, loss=4.539]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Todd: <file_photo>
Julia: What's that?
Todd: This was on my apt door. Who the f is behind this?
Julia: Looks like Photoshop
Todd: It's not
Julia: Someone saw and sent you that? Weird. Shenanigans.
Todd: Tiffany found it
Julia: Cleaning Tiffany?
Todd: Yep
Julia: You found her! Saints be praised, she lives. Still super strange. Suppose there's no cameras that could help, eh?
Todd: Nah, I don't have any like that. We've got enough bloody cameras in the building. Definitely very odd.
Julia: Sorry that happened. 
Todd: Thanks. Not a biggie but still. Why would someone do that?
Julia: Weird prank? Got the wrong door?
Todd: Perhaps. Do you have a camera outside yours?
Julia: I think there's on in the hall that catches my door. Never actually had to pull any tapes so not sure if it even works
Todd: I see. I'm about to drop a couple hammers on the BoD and mgmt ahead of the meeting. Will copy all to FB.


Processing Epoch 07: 100%|██████████| 461/461 [03:54<00:00,  1.97it/s, loss=3.866]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Lily: Why r u so sad?
Sandra: Something's happened?
Lily: Nope, I just feel a bit low today :(
Sandra: Wanna meet today?
Lily: Sure!
    TARGET: Sandra suggested a meeting with Lily to cheer her up.
 PREDICTED: Lily is sick and she will meet with her .
--------------------------------------------------------------------------------
    SOURCE: Carol: did you see the forecast on channel 11 yesterday?!
Jim: they said there's a huge storm brewing
Josh: yeah, like the biggest one in decades. We went and stocked up on dry goods and water
Beth: Really?? OMG we are completely not ready!! when is it supposed to hit?
Carol: tomorrow night they expect like over 2 inches of rain an hour! it's crazy!
Beth: I have to run to the store and get supplies! I have nothing in the fridge!
Jim: We still need to go too! Don't forget flashlights, batteries and candles in case the power goes out!
Josh: definitely! a co

Processing Epoch 08: 100%|██████████| 461/461 [03:54<00:00,  1.97it/s, loss=3.926]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Gabby: Hey my munchkin
Gabby: What are you up too?
Henry: Preparing for a meeting later on
Gabby: With the staff from Germany?
Henry: Yeah, we showed them around the new facilities this morning
Gabby: Were they happy?
Henry: Yes they seemed pleased with the work, thank god
Gabby: Good
Gabby: Call me later on when you finish!
Henry: I will 😘
    TARGET: Henry is preparing himself for a meeting that he will have later on with the staff from Germany. They were pleased with the work they saw while visiting the new facilities this morning.
 PREDICTED: Henry has just arrived to the meeting with his new . He has to do it . He has to do it . He has to do the new job .
--------------------------------------------------------------------------------
    SOURCE: Amy: listen
Amy: maybe we'll throw Julie a surprise b-day party?
Jessica: Cool idea!
Jessica: on her actual birthday?
Amy: let's see in the calen

Processing Epoch 09: 100%|██████████| 461/461 [03:54<00:00,  1.97it/s, loss=3.657]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Luigi: Geez, Mayweather totally destroyed McGregor! Did you watch gentlemen??
Lorenzo: I'm not very much into boxing, rather prefer MMA but the buzz about this fight made me watch it
Lorenzo: And yeah, the black guy seemed to be only defending himself for a few rounds but at the same time Conor lost much of his stamina
Pablo: Whatta hell are you two talking about??
Pablo: McGregor and boxing?? He's an mma warrior, those R 2 different worlds! Wtf
Pablo: And Mayweather?? He's fuckin 40, he's to fuckin old to fight!
Luigi: Hahaha Pablito :D
Luigi: You follow UFC events and you know mcgregor's a freak and likes to provoke people
Pablo: Aight but those R people from mma that he provokes, not from fuckin boxing!
Pablo: He's fuckin one unpredictable piece of bastard
Lorenzo: You better watch your mouth Pablo cause if Conor finds out what you're sayin, you'll be the next xD
Pablo: Sheit dudes, that's a

Processing Epoch 10: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=3.417]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: A: Hi Tom, are you busy tomorrow’s afternoon?
B: I’m pretty sure I am. What’s up?
A: Can you go with me to the animal shelter?.
B: What do you want to do?
A: I want to get a puppy for my son.
B: That will make him so happy.
A: Yeah, we’ve discussed it many times. I think he’s ready now.
B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) 
A: I'll get him one of those little dogs.
B: One that won't grow up too big;-)
A: And eat too much;-))
B: Do you know which one he would like?
A: Oh, yes, I took him there last Monday. He showed me one that he really liked.
B: I bet you had to drag him away.
A: He wanted to take it home right away ;-).
B: I wonder what he'll name it.
A: He said he’d name it after his dead hamster – Lemmy  - he's  a great Motorhead fan :-)))
    TARGET: A will go to the animal shelter tomorrow to get a puppy for her son. They already visited the shelter last 

Processing Epoch 11: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=3.418]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Sean: Hi! How is it going?
Louis: hi, everything's fine, you?
Sean: Fine as well. How is Petersburg?
Louis: Strange. Very strange.
Sean: why?
Louis: I mean it's beautiful but people are somehow strange.
Sean: any examples?
Louis: Yesterday I was in an Internet café at there was a guy openly masturbating.
Sean: LOL. Hahaha
Louis: Yes, and nobody said anything.
Sean: How awkward! 
Louis: Very, they are also quite cold in general, rarely smile.
Sean: That's so common in Eastern Europe.
Louis: Seems so.
Sean: Anyway, let me know when you're back, I'll be glad to listen to your stories of Russia.
Louis: I will!
Sean: Take care!
    TARGET: Luis thinks that people in Sankt Petersburg are strange. There was a person masturbating in an Internet café. Sean thinks that people in Eastern Europe are generally cold. Sean wants Louis to let him know when he is back to tell him stories about Russia.
 PREDICTE

Processing Epoch 12: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=3.396]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Sadie: can i borrow your bike again please?
Chloe: when?
Sadie: on thursday, i need to go to the dentist quickly after work
Chloe: sure, let me know when you want to pick it up
Sadie: wednesday evening will be good?
Chloe: sure, come over and please remember to lock it properly!!
    TARGET: Sadie will borrow Chloe's bike on Wednesday evening. She has a dentist appointment on Thursday after work.
 PREDICTED: Chloe ' s bike is on Thursday , so she will pick her up .
--------------------------------------------------------------------------------
    SOURCE: Guy: Hey, I just got the news - how's Kenny?
Kristine: Not good. He's back from surgery, but the doctors don't know if he'll pull through. 
Guy: He's gonna be okay - he has you to get back to :)
Kristine: He's got another surgery scheduled tomorrow, but they might have to reschedule it depending on his condition.
Guy: And how are you?
Kristin

Processing Epoch 13: 100%|██████████| 461/461 [03:53<00:00,  1.98it/s, loss=3.109]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Heidi: Could you take the things away from the balcony? I forgot about them and it's going to rain today.
Noah: I'll do it as soon as I am back home.
Heidi: And close all the windows in case of a storm.
Noah: of course
    TARGET: Heidi wants Noah to take items away from the balcony and close all the windows.
 PREDICTED: Heidi ' s left the bus . Noah will take the bus home .
--------------------------------------------------------------------------------
    SOURCE: Derek: hey
Derek: yo??
Derek: ???
Danny: let me sleep for once
Derek: :/
    TARGET: Danny would like to be left to sleep. 
 PREDICTED: Derek is busy and will talk to Derek .
--------------------------------------------------------------------------------


Processing Epoch 14: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=3.009]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Eve: Getting ready!
Nancy: Wow! Lovely picture!
Ben: you look awesome girl!
Mark: any special occasion? 
Eve: just a party with my work girls
Mark: you look fab!
Eve: thanks! ;)
Tom: have a great party! 
Gina: OMG! you look so glam!
Nancy: so jealous i can't go out with u! xxx
Eve: xxx
    TARGET: Eve is going to a party with some girls from her work. Nancy would like to join but she cannot.
 PREDICTED: Eve is having a party with the girls she has .
--------------------------------------------------------------------------------
    SOURCE: Nathan: can i borrow your plaid flannel shirt?
Nathan: i look like a mess and i'm meeting Kaylee in half an hour
Lucas: sure
Lucas: say hi to her from me :)
    TARGET: Lucas will let Nathan wear his plain flannel shirt for Nathan's meeting with Kaylee.
 PREDICTED: Nathan will lend Lucas his number .
----------------------------------------------------------

Processing Epoch 15: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=2.842]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Sandra: Help!
Sandra: Little one has a diarrhoea :(
Mark: oh dear
Karen: Take him to the hospital
Rita: Hospital? Because of a diarrhoea?
Sandra: My sister's going to kill me
Karen: How old is he? 4?
Sandra: Yeah
Rita: Calm down, you need to keep him hydrated
Mark: Give him some pills
Sandra: I did, he's drinking and I gave him pills but they're not working
Rita: Give him one more, sometimes it doesn't work from the beginning
Karen: If it doesn't stop you should go to the hospital
Sandra: oh god
Sandra: he's vomiting now
Mark: you sure it's not stomach flu?
Sandra: don't even say that
Sandra:  I'm going to the hospital
    TARGET: Sandra takes care of a 4 year old. He has diarrhea and is vomiting. She gave him pills and water. They are going to the hospital.
 PREDICTED: Sandra is at the hospital . Mark has an operation that is . She thinks it ' s a better deal for her sister .
-----------------

Processing Epoch 16: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=2.835]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Amanda: Claudette is such a bitch!!
Lillian: What did she do?
Petra: You didn't see the pictures???
Petra: I thought everyone got them...
Amanda: <photo_file>
Amanda: <photo_file>
Amanda: <photo_file>
Amanda: And this one is the best: 
Amanda: <photo_file> 
Lillian: OMG
Lillian: Is that Thomas?
Amanda: Himself...
Lillian: This is so wrong
Lillian: How did you get these pictures??
Lillian: They are VERY intimate
Amanda: Dunno everybody is passing them around
Lillian: I don't like it at all
Amanda: But she's a bitch she deserves it
Lillian: Why? 
Lillian: Nobody deserves to be exposed like that
Lillian: And Thomas?? Nobody calls him a bitch
Lillian: After all, Thomas is the one who is in a relationship and was caught having sex with her
Amanda: She seduced him
Lillian: He should think with his brain and not only with his dick
    TARGET: Amanda sent intimate photos of Thomas and Claudette having 

Processing Epoch 17: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=2.507]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Adam: Should we try to climb the mountain tomorrow?
Jasmine: Let me check the weather forecast
Aaron: it is a very nice idea, I would love to leave the city for a day
Jasmine: the forecast is amazing: sunny but not too hot
Adam: so let's try
Aaron: how much time does it take to get to the peak?
Adam: about 4 hours from the first mountain hut
Jasmine: 4-5h I believe, really depends
Adam: right, and there are some truly beautiful views on the way, so we may want to stop sometimes
Aaron: that sounds amazing
Adam: we should leave the city about 7AM I think
Rebecca: I agree, or even 6, so we can start walking at 7 and be on the peak before the hottest hours
Adam: that would be very responsible, but also nice
Aaron: so let's do it!
    TARGET: Adam, Jasmine, Rebecca and Aaron are planning to climb the mountain tomorrow.  They'll meet at 6 am. The weather forecast is good for tomorrow, sunny but not t

Processing Epoch 18: 100%|██████████| 461/461 [03:53<00:00,  1.98it/s, loss=2.521]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Fala: Rafal needs braces to straighten out his teeth
Evline: Why? Mostly in his age people don't need braces
Fala: His pangs are sharpy, when you see him you saw them, right?
Fala: I thought it is genetic but it turned out that his other teeth are not in place and he didn't use his fangs
Fala: That's why
Fala: He asked the dentists if there is any other way not to have braces but all of the dentist he met said the same thing 
Fala: First braces to support and his teeth in place for 2 and half years
Fala: And after that additionally the other one for 1 and half years
Fala: He said
Evline: That sounds like long time. 
Evline: And it must be uncomfortable
Fala: That's what I was thinking about..
Fala: And it costs a lot
Fala: 2000 dollars and +
Evline: Dentists make money everywhere
Fala: And I became worried about my teeth
Evline: Your teeth?
Fala: Cause I feel a bit pain in my teeth
Evline: Then

Processing Epoch 19: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=2.624]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Magda: can you take my car today?
Lena: what for?
Magda: i think its broken but my dad thinks its not
Magda: and i want opinion of someone else
Lena: but i dont know anything about cars
Magda: you have driving licence
Magda: so you can feel something
Lena: cant you just take it to service?
Magda: it will cost some money
Lena: but you will be sure
Lena: and they can repair it
Magda: maybe your right
Lena: of course i am :)
    TARGET: Following Lena's advice, Magda will take her car to the service to see if it's broken.
 PREDICTED: Lena needs to take his car without his car . The car is due next month . Lena will take care of the car .
--------------------------------------------------------------------------------
    SOURCE: Frank: i owe you one btw!
Judy: haha, you owe me two😜
Frank: okay then, two dates it is. haha
Judy: lol
    TARGET: Frank owes Judy two dates.
 PREDICTED: Frank bought 3 $

Processing Epoch 20: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=2.310]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Veronica: Jason and I broke up.
Madeline: Yeey! Well done! Finally 👏
Veronica: Are you kidding? You can’t imagine how sad I am 😭
Kendall: Don’t be. He was a total asshole 😕. We need to celebrate it! I’m so happy for you 🍺🍺
    TARGET: Jason and Veronica broke up. 
 PREDICTED: Veronica broke up with Veronica ' s girlfriend .
--------------------------------------------------------------------------------
    SOURCE: Simon: Howdy mate! RU done?
Joe: Need half an hour still.
Simon: !? B/C?
Joe: B/C I'm not ready yet FFS!
Simon: Don't get worked up! Just asked.
Joe: I'll text you when done.
Simon: Got it.
Joe: Simon, 10 more mins, ok?
Joe: Simon I'm ready.
Joe: ?? You there? Where RU??
Simon: Hi mate! I'm in Half Moon. Couldn't wait any longer.
Joe: How the fuck am I gonna to get there now?
Simon: A taxi?
Simon: Joking!! 
Joe: You prat! Can never rely on you!
Simon: Look. I've told Masher to pick y

Processing Epoch 21: 100%|██████████| 461/461 [03:53<00:00,  1.98it/s, loss=2.219]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Jaime: Guys! Next month is Rebecca's birthday! Gift ideas?
Emily: Oh god, totally forgot, shame on me :x
Patrick: Hm, a trip?
Julia: Cool idea, but I'm pretty sure Rebecca's boss may object...
Patrick: Oh, yeah, right, that guy is such a dick. Book then?
Emily: We got her a book last year.
Jaime: Pretty sure there are more books to read out there, Ems :D
Emily: Hahaha, very funny. Sorry for trying to make all of you more creative :P
Patrick: Do we want to buy one awesome gift or maybe make something like a box with presents?
Julia: I'd prefer a box.
Jaime: One big.
Emily: Hahahaha, this is priceless :D We're on fire.
Patrick: Love to work with you all. So, box?
Jaime: Yeah, sure. So maybe everyone can buy one thing and then we can book a table at Angelo's?
Emily: Cool, but please share what you bought her so we don't double.
Julia: Ok, calling dibs on perfumes.
Patrick: I'll take a risk and buy

Processing Epoch 22: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=2.059]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Jean: dears
Jean: wanna grab a beer? I'd like to go out
Jackie: sure but when? now?
Philip: I can go now :)
Jean: why not, we can go NOW :)
Jackie: sign me up!
    TARGET: Jackie, Jean, and Philip are going out.
 PREDICTED: Jean wants to go for a beer with Jackie , but she can ' t go for a beer .
--------------------------------------------------------------------------------
    SOURCE: Julia: What is your biggest dream
Julia: I mean the kind that can be achieved
James: Everyone say I have nice voice
James: My mom liked very much when I was reading outloud
James: I've had this dream for some time now, to become a voice actor
James: Be a part of cartoon or video game as a voice actor reading a character
Julia: Wow. Nice one.
Julia: Btw you do have a nice voice
Julia: I could listen to you as a radio speaker.
James: Thanks
James: I've worked in radio, but it was during college so I had little ti

Processing Epoch 23: 100%|██████████| 461/461 [03:53<00:00,  1.98it/s, loss=2.011]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Haleema: So are you up for yoga in the park this Saturday?
Haleema: We could follow the routine from this video
Haleema: <file_video>
Dakkota: Yes! I'm really looking forward to it 😊
Haleema: I always wanted to do a yoga session in the park
Haleema: Hopefully we won't have a large audience haha
Dakkota: Hahahaha
Dakkota: <file_gif>
Dakkota: Oh who cares, they can join us 🤣
Haleema: Indeed
Haleema: It's a big park with plenty of open spaces :)
Dakkota: Yoga in the sun, superb 😍
Haleema: <file_gif>
Dakkota: Hahahaha
Haleema: 😋
Haleema: Shall we grab a tea and a snack afterwards? 
Dakkota: Yes sure, why not!
Haleema: 💗
    TARGET: Haleema and Dakkota are planning to do open-air yoga in the park. They are having a drink and a bite to eat afterwards. 
 PREDICTED: and Claudia are going to meet on Saturday at 6 pm to play music . They do yoga and do yoga ice cream . will book the table is good . They 

Processing Epoch 24: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=1.997]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Claire: <file_other>
Claire: FATM just released a new single! You need to check it out!
Sophia: Wow! Amazing! Give me a minute
Claire: Let me know when you're done, can't wait for your opinion
Sophia: i'm done
Sophia: and dead
Sophia: it was magnificent
Claire: It is, right?! I love her so much <3
Sophia: It's amazing, because... it's different, but I knew it was her from the very beginning
Claire: Exactly! her music really is divine, peaceful and powerful at the same time
Sophia: When's the album coming out?
Claire: hm, not sure, probably next year
Sophia: I hope she's going to keep the style of this single
Claire: me too, I want her to grow etc., but at the same time I'm afraid she's going to experiment too much
Sophia: same thing happened with Mumford and Sons, can't listen to them any more :(
    TARGET: FATM released a new single. Sophia and Claire like it very much. 
 PREDICTED: Claire is

Processing Epoch 25: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=1.840]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Rita: didn't take breakfast with me, is there sth in the cafeteria?
Lina: blee, the food there is awfull, I will give u a sandwich
Rita: really? thanks!
Lina: sure, have 2 of them :)
    TARGET: Lina will give Rita one of her 2 sandwiches.
 PREDICTED: Rita will book a pizza for himself , Rita recommends .
--------------------------------------------------------------------------------
    SOURCE: Andrew: I had to call in sick.
Daniel: Are you ok?
Andrew: Well, it’s just a cold but I feel awful!
Daniel: How did that happen? You don’t usually get bad colds.. Are you sure it’s not a flu?
Andrew: Pretty certain. I can never guess the weather right you know?!
Daniel: I see. That’s true it’s either warm or freezing cold.
Andrew: If I wear warm clothes, it’s hot. If I don’t, there’s a cold wind! 
Daniel: Sorry to hear that. Is there anything I can help you with?
Andrew: Can you grab some stuff from th

Processing Epoch 26: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=1.885]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Jane: Let the holiday begin! xxx
Harry: again?!
Jane: get lost! x
Richard: Enjoy!
Mark: Where're you going?
Jane: Sunny Turkey ;)
Mark: i hate you!
Jane: ;)
Mary: have a lovely time! 
Victoria: post some pics! x
    TARGET: Jane is going for holiday again, this time to Turkey. 
 PREDICTED: Jane is going to Turkey for holiday .
--------------------------------------------------------------------------------
    SOURCE: Leo: I'll be late, sorry mate
Nathan: no worries, everything all right?
Leo: Got in a fight with michel.
Nathan: oh
Leo: the usual. I need a beer
Nathan: Let me know when you're nearby, I will order one for you
Leo: Cool!
    TARGET: Leo will be late, he got into a fight with Michel. Nathan will order a beer for him.
 PREDICTED: Leo is having a fight with Nathan . Leo will join him for a beer .
--------------------------------------------------------------------------------


Processing Epoch 27: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=1.928]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Trevor: So, no news?
Amanda: No, not yet.
Trevor: How are you doing?
Amanda: Bad.
Trevor: Want me to come over?
Amanda: Please.
    TARGET: Trevor will come to Amanda's place.
 PREDICTED: Trevor doesn ' t have any news . Trevor will come over .
--------------------------------------------------------------------------------
    SOURCE: Rob: can you man the calls today
Sue: fine all day?
Rob: well I have the meeting in Plymouth so will probably be out of the office most of the day
Sue: ok thats fine, what time is the meeting?
Rob: need to be in the centre of Plymouth for about 11.30
Sue: no worries.. let me know how you get on
Rob: will do should be a good little job ongoing
Sue: great will we get Graham Mason on that?
Rob: yes I think so we can start after the new yer
    TARGET: Rob has a meeting in Plymouth at 11.30 and asks Sue to man the calls today. Rob expects a good job, engage Graham Ma

Processing Epoch 28: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=1.750]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Sean: morning! 
Sean: sorry I got caught up yesterday
Sean: how are things?
Nadya: hey
Nadya: my neighbours are to loud 😒
Nadya: my head is pounding
Nadya: I wish I could sleep more
Nadya: I think they have a cleaner that arrives very early..
Sean: oh man that's bad..
Nadya: when the baby arrives I will leave all my windows open
Nadya: payback time.. 😈
Sean: hahahah
Sean: 👏👏👏
Sean: good idea.. lol
Nadya: have you had breakfast yet?
Sean: no not yet
Sean: I'm going to do some yoga first ❤️
Nadya: sounds nice
Nadya: <file_gif>
Nadya: my anger levels this am are high
Nadya: 🔥
Sean: take a deep breath dear and stay calm, that's the best 😉
Nadya: do you feel that you have advanced through your practise?
Sean: yes a great deal actually
Nadya: great 💜
Sean: 👏🙏
    TARGET: Nadya has a headache because her neighbours' cleaner is being loud. She is expecting a baby. Sean will have breakfast after he does

Processing Epoch 29: 100%|██████████| 461/461 [03:53<00:00,  1.98it/s, loss=1.754]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Audrey: Did you see Anne today?
Kate: Yes, why?
Audrey: And?
Kate: What do you mean?
Audrey: You didn’t… notice her dress?
Kate: Oh. I DID.
Audrey: Exactly. Someone needs to tell this poor thing yellow does not make her any good.
Kate: Well, it’s not going to be me.
Audrey: Me neither. But looking at it is a misery.
    TARGET: Anne is wearing a yellow dress today. Neither Audrey nor Kate want to tell her it doesn't make her any good.
 PREDICTED: Audrey did a dress today . Kate and Kate think it ' s expensive .
--------------------------------------------------------------------------------
    SOURCE: Guto: Guys, be careful when talking to Richard
Guto: he's not with Peter anymore
Stephen: oh no, what happened?
Ross: I know, he mentioned it
Ross: but I've no idea why
Han: very unexpected
Ross: he said something like: "Peter didn't like who I am"
Han: sad, sounds serious
Stephen: yes, like this

Processing Epoch 30: 100%|██████████| 461/461 [03:53<00:00,  1.98it/s, loss=1.714]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Rita: didn't take breakfast with me, is there sth in the cafeteria?
Lina: blee, the food there is awfull, I will give u a sandwich
Rita: really? thanks!
Lina: sure, have 2 of them :)
    TARGET: Lina will give Rita one of her 2 sandwiches.
 PREDICTED: Rita wants Rita to take a breakfast for her breakfast . Rita recommends her website .
--------------------------------------------------------------------------------
    SOURCE: Aisha: I woke up at 6
Vicky: Why so early?
Aisha: Benefits of having a little baby ...
    TARGET: Aisha woke up at 6 because of her baby. 
 PREDICTED: Aisha is sick and she cannot sleep at 6 .
--------------------------------------------------------------------------------


Processing Epoch 31: 100%|██████████| 461/461 [03:53<00:00,  1.98it/s, loss=1.781]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Eva: Jim, meet me in my office. 
Jim: Everything alright ma'am..
Eva: There are new clients that needs to be provided with details of our policy
Jim: Sure ma'am .. I am coming in 10 mins..
Eva: Kindly bring two of your colleagues with you..
Jim: Ok ma'am .. I am bring Michael and Alan...
Eva: Just make sure that they are well familiar with our policy..
Jim: Don't worry ma'am
    TARGET: Jim will bring Michael and Alan to Eva's office. They will discuss details of their policy with new clients.
 PREDICTED: Jim will come to Jim ' s office in 10 minutes to prepare the new project . Eva is going to bring his knowledge on the topic of the company .
--------------------------------------------------------------------------------
    SOURCE: Marcin: in Poland we actually don't celebrate Halloween and don't do all this dressing up, pumpking curving and so on. 
Marcin: on November 1st we just go to the 

Processing Epoch 32: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=1.744]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Franklin: Mum, our child is crying
Faye: Yes, my dear, it's often like that
Franklin: But why is he crying?
Faye: You know, it is the only way he can tell you what's going on with him, maybe he's hungry, maybe he wants to be changed, maybe he's having pain, stomach ache, toothache?
Franklin: But what should I do?
Faye: Try to give him a bottle, if he doesn't want, change his diapers, if not, take him in your arms, it often works
Franklin: Thanks, mum, I will try. Goodnight
    TARGET: Franklin's and Faye's child is crying. Franklin doesn't know what to do, so he will follow Faye's advice.
 PREDICTED: Franklin is enjoying her a lot of time . His daughter has never been advised .
--------------------------------------------------------------------------------
    SOURCE: Stuart: So, have you decide where are we going tonight?
Warner: we were thinking of this club near the central bus station
Ivor

Processing Epoch 33: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=1.713]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Zoe: Hi! I've seen you're interested in Design STH event?
Gracie: Hi, yes! Wanna come with?
Zoe: You going there alone?
Gracie: No, with my bestie. would be cool to have bigger team and get creative 😀
Zoe: Sounds awesome 😊
Gracie: Come to my house and we'll go together. If you're free, we can go grab something to eat afterwards.
Zoe: Sure, why not. I never say no to food.
Gracie: TRUE!
Zoe: I gotta go. See you soon! 
Gracie: Cya
    TARGET: Zoe will go with Gracie and her bestie to Design STH event. She will come to Gracie's house and they'll go together. Afterwards they'll have something to eat. 
 PREDICTED: Gracie ' s house is going to a party . They will meet at 8 pm on Saturday .
--------------------------------------------------------------------------------
    SOURCE: Abigail: Are you online?
Abigail: I need to ask you something
Ava: Yeah, I'm here
Abigail: Do you remember when you came 

Processing Epoch 34: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=1.702]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Mum:  Did you get that card I posted for your brother?
Sally: Yes mum thanks, it turned up this morning when I was just about to leave for work, I'll ask Simon to fill it in tonight when he gets home x
Mum: Oh good I was getting worried that it had gone missing , you know what the post is like these days
Sally: no its here, stop worrying now, Simon will sort it and take it with him tomoz.. got to go.. l've you spk later xxx
Mum: ok darling have a good day love you too xx
    TARGET: The card that Mum sent to Sally's brother has arrived this morning. Sally will ask Simon to fill it in tonight.
 PREDICTED: Sally forgot to pay her brother to pay back home for the whole morning . Simon can ' t leave home as he has no choice in .
--------------------------------------------------------------------------------
    SOURCE: Pam: Milton, do you know maybe how is the process of getting a Belarusian visa?

Processing Epoch 35: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=1.621]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Winston: Can you provide me with any details?
Bruno: I’m not sure what you mean.
Winston: The KPR project.
Bruno: Oh of course, I’ll send them to you via e-mail ASAP.
Winston: Thank you, I hope I can help you with that.
Bruno: From what the manager told me, you have quite a lot of experience with such matters so I really think you’ll make a good job 
Winston: Thank you, I hope so.
    TARGET: Bruno will send the details concerning the KPR project to Winston via e-mail. Winston will help Bruno.
 PREDICTED: Bruno needs to fill out a project to his project with his project . He will help him in the future .
--------------------------------------------------------------------------------
    SOURCE: Marianne: <file_photo>
Marianne: what do you girls think about this dress? is it ok for the wedding?
Cassandre: wow, it's gorgeous!
Eveline: it's really nice, how much is it? Cause I don't see any pric

Processing Epoch 36: 100%|██████████| 461/461 [03:53<00:00,  1.98it/s, loss=1.643]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Meg: hi baby
Margaret: hello :)
Meg: what are you doin?
Margaret: just had a shower
Meg: wanna go with me for shopping?
Margaret: hmm i need to buy some things
Margaret: so yes :)
Meg: great
Meg: can we meet near our school?
Margaret: sure :)
Margaret: when?
Meg: 1 pm?
Margaret: ok :)
    TARGET: Meg and Margaret are going shopping. They are planning to meet at 1 pm near their school.
 PREDICTED: Meg and Margaret are going shopping for a baby shower . They will meet after school to buy her a baby shower .
--------------------------------------------------------------------------------
    SOURCE: Joanne: girls i am taking a few days off next week
Elisabeth: oh you lucky girl!! are you travelling somewhere?:*
Monica: next week? are you sure? :(
Joanne: what's wrong?
Elisabeth: yeah Monica, is something not okay?
Joanne: i am going to spain with my fiancee
Monica: well we have this two big presen

Processing Epoch 37: 100%|██████████| 461/461 [03:53<00:00,  1.97it/s, loss=1.559]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Jim: is it forest school tomorrow?
Nia: only for group 3 and 4, Lindsay is in group 2
Jim: thanks
Sadie: Jim, Elsa is in group 2 too
    TARGET: Forest school is tomorrow for group 3 and 4. Lindsay and Elsa are in group 2.
 PREDICTED: Nia ' s teacher , and Jason are going to school tomorrow .
--------------------------------------------------------------------------------
    SOURCE: Grayson: Hey, dad! :) Could you give me a lift to the airport?
Harrison: when?
Grayson: Tomorrow.  I have to check in two hours before the flight, so I should be in the airport around 4 pm.
Harrison: i'm at work at this time
Harrison: ask your mother
Harrison: where are you flying to? for how long?
Grayson: I'm going to Spain for 2 weeks. :)
Harrison: have fun and don't forget to send me a postcard :-]
Harrison: don't drink too much
Grayson: Thanks, dad, I won't! :)
    TARGET: Grayson is going to Spain for two wee

Processing Epoch 38: 100%|██████████| 461/461 [03:52<00:00,  1.98it/s, loss=1.608]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Peter: Hi! How was your work?
Amy: Hi. Well, the day was weird.
Peter: Y?
Amy: Managing a team is hard!
Peter: I know. What happened?
Amy: Well, there's this Mark. And his attitude is simply horrible.
Peter: What did he do?
Amy: Actually nothing much, but it's the general picture that matters.
Peter: What do you mean?
Amy: That's a long story.
Peter: Don't worry. Got time.
    TARGET: Amy had a weird day at work managing her team. Amy dislikes the attitude of one of her coworkers, Mark.
 PREDICTED: Amy ' s company got into and he ' s really into the team . Peter is worried about the team that the team will probably be difficult to talk about this time .
--------------------------------------------------------------------------------
    SOURCE: Charles: Hi! Would you like to go to the opera next month.
Carol: Anything in particular?
Charles: Carmen
Carol: I've always wanted to see it! Let's go


Processing Epoch 39: 100%|██████████| 461/461 [03:53<00:00,  1.98it/s, loss=1.522]
stty: 'standard input': Inappropriate ioctl for device


--------------------------------------------------------------------------------
    SOURCE: Guto: Guys, be careful when talking to Richard
Guto: he's not with Peter anymore
Stephen: oh no, what happened?
Ross: I know, he mentioned it
Ross: but I've no idea why
Han: very unexpected
Ross: he said something like: "Peter didn't like who I am"
Han: sad, sounds serious
Stephen: yes, like this realisation you're not accepted by your partner
Guto: so maybe it's good they broke up
Ross: Which does not change the fact that it may be very difficult for them
Guto: how long did they stay together?
Ross: 3 years at least
Guto: right, a piece of life is gone...
    TARGET: Richard and Peter broke up. They were more than 3 years together. Peter didn't accept Richard fully. Stephen, Guto, Ross and Han will be careful when talking to Richard.
 PREDICTED: After graduating , Peter is upset , because he needs to do her .
--------------------------------------------------------------------------------
    

## Evaluate

In [60]:
import torch
from torchmetrics.text.rouge import ROUGEScore
from pprint import pprint

def run_test(model, test_dataloader, tokenizer_src, tokenizer_tgt, max_len, device):
    model.eval()

    source_texts = []
    expected = []
    predicted = []

    console_width = 80

    # Initialize ROUGEScore to compute only ROUGE-1, ROUGE-2, and ROUGE-L
    rouge = ROUGEScore(rouge_keys=('rouge1', 'rouge2', 'rougeL')).to(device)
    all_rouge_scores = []
    count = 0
    with torch.no_grad():
        for batch in test_dataloader:
            count += 1
            encoder_input = batch["encoder_input"].to(device)  # (b, seq_len)
            encoder_mask = batch["encoder_mask"].to(device)  # (b, 1, 1, seq_len)

            # Check that the batch size is 1
            assert encoder_input.size(0) == 1, "Batch size must be 1 for test"

            model_out = greedy_decode(model, encoder_input, encoder_mask, tokenizer_src, tokenizer_tgt, max_len, device)

            source_text = batch["src_text"][0]
            target_text = batch["tgt_text"][0]
            model_out_text = tokenizer_tgt.decode(model_out.detach().cpu().numpy())

            source_texts.append(source_text)
            expected.append(target_text)
            predicted.append(model_out_text)

            
            

            # Calculate and print ROUGE scores for this example
            rouge_scores = rouge(model_out_text, target_text)
            all_rouge_scores.append(rouge_scores)
            if count % 50 == 0:
                # Print the source, target and model output
                pprint('-' * console_width)
                pprint("ROUGE scores for this example:")
                pprint({k: v.item() for k, v in rouge_scores.items()})

    if all_rouge_scores:
        # Calculate and print average ROUGE scores
        avg_rouge_scores = {key: torch.mean(torch.stack([score[key] for score in all_rouge_scores])).item() for key in all_rouge_scores[0]}
        pprint("Average ROUGE scores:")
        pprint(avg_rouge_scores)
    else:
        pprint("No ROUGE scores were calculated. Please check the test_dataloader and ensure it contains data.")

# Example usage
run_test(model.module, test_dataloader, tokenizer_src, tokenizer_tgt, 128, device)

'--------------------------------------------------------------------------------'
'ROUGE scores for this example:'
{'rouge1_fmeasure': 0.31111112236976624,
 'rouge1_precision': 0.46666666865348816,
 'rouge1_recall': 0.23333333432674408,
 'rouge2_fmeasure': 0.09302325546741486,
 'rouge2_precision': 0.1428571492433548,
 'rouge2_recall': 0.06896551698446274,
 'rougeL_fmeasure': 0.2222222238779068,
 'rougeL_precision': 0.3333333432674408,
 'rougeL_recall': 0.1666666716337204}
'--------------------------------------------------------------------------------'
'ROUGE scores for this example:'
{'rouge1_fmeasure': 0.20000000298023224,
 'rouge1_precision': 0.29411765933036804,
 'rouge1_recall': 0.1515151560306549,
 'rouge2_fmeasure': 0.0,
 'rouge2_precision': 0.0,
 'rouge2_recall': 0.0,
 'rougeL_fmeasure': 0.11999999731779099,
 'rougeL_precision': 0.1764705926179886,
 'rougeL_recall': 0.09090909361839294}
'--------------------------------------------------------------------------------'
'ROUGE 

In [66]:
import torch
from pathlib import Path
from tokenizers import Tokenizer
from datasets import load_dataset

def summarize(sentence: str, model_path: str, tokenizer_src_path: str, tokenizer_tgt_path: str):
    
    # Define the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load tokenizers
    tokenizer_src = Tokenizer.from_file(tokenizer_src_path)
    tokenizer_tgt = Tokenizer.from_file(tokenizer_tgt_path)
    
    # Load model
    model = build_transformer(tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size(), 128, 128).to(device)
    
    # Load the pretrained weights
    state = torch.load(model_path,weights_only=False)
    model.load_state_dict(state['model_state_dict'])
    
    # Translate the sentence
    model.eval()
    with torch.no_grad():
        # Precompute the encoder output and reuse it for every generation step
        source = tokenizer_src.encode(sentence)
        source = torch.cat([
            torch.tensor([tokenizer_src.token_to_id('[SOS]')], dtype=torch.int64), 
            torch.tensor(source.ids, dtype=torch.int64),
            torch.tensor([tokenizer_src.token_to_id('[EOS]')], dtype=torch.int64),
            torch.tensor([tokenizer_src.token_to_id('[PAD]')] * (seq_len - len(source.ids) - 2), dtype=torch.int64)
        ], dim=0).to(device)
        source_mask = (source != tokenizer_src.token_to_id('[PAD]')).unsqueeze(0).unsqueeze(0).int().to(device)
        encoder_output = model.encode(source, source_mask)
        
        # Initialize the decoder input with the sos token
        decoder_input = torch.empty(1, 1).fill_(tokenizer_tgt.token_to_id('[SOS]')).type_as(source).to(device)
        
        # Generate the translation word by word
        while decoder_input.size(1) < seq_len:
            # Build mask for target and calculate output
            decoder_mask = torch.triu(torch.ones((1, decoder_input.size(1), decoder_input.size(1))), diagonal=1).type(torch.int).type_as(source_mask).to(device)
            out = model.decode(encoder_output, source_mask, decoder_input, decoder_mask)
            
            # Project next token
            prob = model.project(out[:, -1])
            _, next_word = torch.max(prob, dim=1)
            decoder_input = torch.cat([decoder_input, torch.empty(1, 1).type_as(source).fill_(next_word.item()).to(device)], dim=1)
            
            # Break if we predict the end of sentence token
            if next_word == tokenizer_tgt.token_to_id('[EOS]'):
                break
    
    # Convert ids to tokens
    return tokenizer_tgt.decode(decoder_input[0].tolist())


model_path = '/kaggle/working/model_epoch_39.pt'
tokenizer_src_path = '/kaggle/working/tokenizer_file_dialogue.json'
tokenizer_tgt_path = '/kaggle/working/tokenizer_file_summary.json'


Summarized Text: recommends the Formula Formula Formula 1 with the cheese that for the Formula .


## Test 1

In [67]:
sentence = """
Rose: I'm going to the airport
Violet: Have a safe flight!
Rose: Thanks! See you in a few days.
"""
summarized_text = summarize(sentence, model_path, tokenizer_src_path, tokenizer_tgt_path)
print("Summarized Text:", summarized_text)

Summarized Text: Rose is going to the airport in a long - already here is going to see Rose days in a few days in a few days .


# Test 2

In [68]:
sentence = """
 Adam: It's so boring here….
Mandy: Still at class?
Adam: Yes.
Adam: -_-
"""
summarized_text = summarize(sentence, model_path, tokenizer_src_path, tokenizer_tgt_path)
print("Summarized Text:", summarized_text)

Summarized Text: Adam is still at class is still at class .
