<a href="https://colab.research.google.com/github/Pranv45/DL_Assignment3/blob/main/Machine_Transliteration_without_attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# !pip install wandb  # run if wandb not installed; confirm with TA before new libs (allowed)
import os
import math
import random
from pathlib import Path
from typing import List, Tuple, Dict

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import wandb
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cpu


#### Read Raw Aksharantar Data from CSV Files
- Function: read_pairs_from_csv_folder
- Reads train/dev/test CSVs and extracts (romanized, native) word pairs.

In [2]:
import csv
from pathlib import Path

def read_pairs_from_csv_folder(folder: str,
                               train_fname="tel_train.csv",
                               dev_fname="tel_valid.csv",
                               test_fname="tel_test.csv",
                               roman_col=0,
                               native_col=1,
                               delimiter=',') -> dict:
    """
    Simplified: assumes CSVs have no header and comma delimiter.
    Reads train/dev/test files and returns dict of (roman, native) pairs.
    """
    folder = Path(folder)
    out = {'train': [], 'dev': [], 'test': []}
    files = {'train': train_fname, 'dev': dev_fname, 'test': test_fname}

    for split, fname in files.items():
        path = folder / fname
        if not path.exists():
            print(f"[warn] file not found: {path}")
            continue

        with open(path, 'r', encoding='utf-8', errors='ignore') as f:
            reader = csv.reader(f, delimiter=delimiter)
            for row in reader:
                # skip empty or malformed rows
                if len(row) <= max(roman_col, native_col):
                    continue
                roman = row[roman_col].strip()
                native = row[native_col].strip()
                if roman and native:
                    out[split].append((roman, native))

    return out

data_dir = "/content/drive/MyDrive/tel"   # update path if different
pairs_dict = read_pairs_from_csv_folder(data_dir,
                                       train_fname="tel_train.csv",
                                       dev_fname="tel_valid.csv",
                                       test_fname="tel_test.csv",
                                       # use column indices if CSV has no header
                                       roman_col=0, native_col=1,
                                       delimiter=',')  # None -> auto-detect
train_pairs = pairs_dict['train']
dev_pairs   = pairs_dict['dev']
test_pairs  = pairs_dict['test']
print("Loaded:", len(train_pairs), "train,", len(dev_pairs), "dev,", len(test_pairs), "test")
# show few examples
train_pairs[:5]

Loaded: 51200 train, 4096 dev, 4096 test


[('vargaalavaarine', 'వర్గాలవారినే'),
 ('vastadira', 'వస్తాదిరా'),
 ('factamfos', 'ఫ్యాక్టమ్ఫోస్'),
 ('muutranaalaala', 'మూత్రనాళాల'),
 ('dwipatra', 'ద్విపాత్ర')]

#### Build Vocabularies (Character → Index Mapping)
- Function: build_vocab_from_pairs
- Builds source and target vocab dictionaries with special tokens.

In [3]:
def build_vocab_from_pairs(pairs):
    src_chars = set()
    tgt_chars = set()
    for s, t in pairs:
        src_chars.update(s)
        tgt_chars.update(t)
    # add special tokens
    specials = ['<PAD>', '<SOS>', '<EOS>', '<UNK>']
    src_list = specials + sorted(src_chars)
    tgt_list = specials + sorted(tgt_chars)
    s2i = {ch:i for i,ch in enumerate(src_list)}
    t2i = {ch:i for i,ch in enumerate(tgt_list)}
    i2s = {i:ch for ch,i in s2i.items()}
    i2t = {i:ch for ch,i in t2i.items()}
    return s2i, i2s, t2i, i2t

#### Create Transliteration Dataset
- Converts each word pair into integer tensors using vocab mappings

In [4]:
class TransliterationDataset(Dataset):
    def __init__(self, pairs: List[Tuple[str, str]], src2idx, tgt2idx, max_len=64):
        self.pairs = pairs
        self.src2idx = src2idx
        self.tgt2idx = tgt2idx
        self.max_len = max_len
        self.SOS = tgt2idx['<SOS>']
        self.EOS = tgt2idx['<EOS>']
        self.PAD = tgt2idx['<PAD>']

    def __len__(self): return len(self.pairs)

    def encode_src(self, s):
        ids = [self.src2idx.get(ch, self.src2idx['<UNK>']) for ch in s][:self.max_len-1]
        ids.append(self.src2idx['<EOS>'])
        return ids

    def encode_tgt(self, s):
        ids = [self.tgt2idx.get(ch, self.tgt2idx['<UNK>']) for ch in s][:self.max_len-2]
        # target format: <SOS> tokens... <EOS>
        return [self.SOS] + ids + [self.EOS]

    def __getitem__(self, idx):
        src, tgt = self.pairs[idx]
        src_ids = self.encode_src(src)
        tgt_ids = self.encode_tgt(tgt)
        return torch.LongTensor(src_ids), torch.LongTensor(tgt_ids)


#### Define Collate Function for Batching
- Function: collate_fn
- Pads variable-length sequences and prepares uniform batches for training.

In [5]:
def collate_fn(batch):
    # pad to max length in batch
    srcs, tgts = zip(*batch)
    src_lens = [len(s) for s in srcs]
    tgt_lens = [len(t) for t in tgts]
    src_max = max(src_lens)
    tgt_max = max(tgt_lens)
    src_pad = torch.full((len(batch), src_max), src_vocab['<PAD>'], dtype=torch.long, device=device)
    tgt_pad = torch.full((len(batch), tgt_max), tgt_vocab['<PAD>'], dtype=torch.long, device=device)
    for i, s in enumerate(srcs):
        src_pad[i, :len(s)] = s.to(device)
    for i, t in enumerate(tgts):
        tgt_pad[i, :len(t)] = t.to(device)
    return src_pad, tgt_pad, torch.tensor(src_lens, device=device), torch.tensor(tgt_lens, device=device)

#### Build Vocabularies and Create DataLoaders
Combine train and dev data to build a richer vocabulary, then initialize the dataset objects and DataLoaders for all splits (train/dev/test).

- `build_vocab_from_pairs()` → builds vocabularies from combined pairs.

- `TransliterationDataset()` → converts pairs to indexed tensors.

- `DataLoader()` + `collate_fn()` → handles batching and padding during training.

In [6]:
# Build vocab using train + dev (better than just train)
vocab_pairs = train_pairs + dev_pairs
src_vocab, idx2src, tgt_vocab, idx2tgt = build_vocab_from_pairs(vocab_pairs)
print("src_vocab_size =", len(src_vocab), "tgt_vocab_size =", len(tgt_vocab))

# Create datasets and loaders
MAX_LEN = 64
train_dataset = TransliterationDataset(train_pairs, src_vocab, tgt_vocab, max_len=MAX_LEN)
dev_dataset   = TransliterationDataset(dev_pairs, src_vocab, tgt_vocab, max_len=MAX_LEN)
test_dataset  = TransliterationDataset(test_pairs, src_vocab, tgt_vocab, max_len=MAX_LEN)

from torch.utils.data import DataLoader
BATCH = 64
train_loader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True, collate_fn=collate_fn)
dev_loader   = DataLoader(dev_dataset,   batch_size=BATCH, shuffle=False, collate_fn=collate_fn)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH, shuffle=False, collate_fn=collate_fn)


src_vocab_size = 30 tgt_vocab_size = 66


In [7]:
print("source vocabulary: ", src_vocab)
print("target vocabulary: ", tgt_vocab)

source vocabulary:  {'<PAD>': 0, '<SOS>': 1, '<EOS>': 2, '<UNK>': 3, 'a': 4, 'b': 5, 'c': 6, 'd': 7, 'e': 8, 'f': 9, 'g': 10, 'h': 11, 'i': 12, 'j': 13, 'k': 14, 'l': 15, 'm': 16, 'n': 17, 'o': 18, 'p': 19, 'q': 20, 'r': 21, 's': 22, 't': 23, 'u': 24, 'v': 25, 'w': 26, 'x': 27, 'y': 28, 'z': 29}
target vocabulary:  {'<PAD>': 0, '<SOS>': 1, '<EOS>': 2, '<UNK>': 3, 'ం': 4, 'ః': 5, 'అ': 6, 'ఆ': 7, 'ఇ': 8, 'ఈ': 9, 'ఉ': 10, 'ఊ': 11, 'ఋ': 12, 'ఎ': 13, 'ఏ': 14, 'ఐ': 15, 'ఒ': 16, 'ఓ': 17, 'ఔ': 18, 'క': 19, 'ఖ': 20, 'గ': 21, 'ఘ': 22, 'చ': 23, 'ఛ': 24, 'జ': 25, 'ఝ': 26, 'ఞ': 27, 'ట': 28, 'ఠ': 29, 'డ': 30, 'ఢ': 31, 'ణ': 32, 'త': 33, 'థ': 34, 'ద': 35, 'ధ': 36, 'న': 37, 'ప': 38, 'ఫ': 39, 'బ': 40, 'భ': 41, 'మ': 42, 'య': 43, 'ర': 44, 'ఱ': 45, 'ల': 46, 'ళ': 47, 'వ': 48, 'శ': 49, 'ష': 50, 'స': 51, 'హ': 52, 'ా': 53, 'ి': 54, 'ీ': 55, 'ు': 56, 'ూ': 57, 'ృ': 58, 'ె': 59, 'ే': 60, 'ై': 61, 'ొ': 62, 'ో': 63, 'ౌ': 64, '్': 65}


#### Inspect and Verify One DataLoader Batch
- Check that your DataLoader, padding, and vocab decoding work correctly by examining one batch of encoded words and decoding them back to readable text.

In [8]:
# fetch one batch
src_batch, tgt_batch, src_lens, tgt_lens = next(iter(train_loader))
print("src shape:", src_batch.shape, "tgt shape:", tgt_batch.shape)
# decode first example back to strings (use idx2src/idx2tgt)
def decode_src(ids):
    return ''.join([idx2src[i] for i in ids.tolist() if i not in (src_vocab['<PAD>'], src_vocab['<EOS>'])])
def decode_tgt(ids):
    return ''.join([idx2tgt[i] for i in ids.tolist() if i not in (tgt_vocab['<PAD>'], tgt_vocab['<SOS>'], tgt_vocab['<EOS>'])])

print("example src:", decode_src(src_batch[0]))
print("example tgt:", decode_tgt(tgt_batch[0]))


src shape: torch.Size([64, 22]) tgt shape: torch.Size([64, 21])
example src: tcas
example tgt: టీసీఏఎస్


### Define Encoder, Decoder, and Seq2Seq Model
**Build the character-level transliteration model.**

- *The EncoderRNN reads the romanized input word.*

- *The DecoderRNN generates the corresponding native-script word.*

- *The Seq2Seq wrapper connects them and handles teacher forcing during training.*

In [9]:
class EncoderRNN(nn.Module):
    def __init__(self, input_vocab_size, emb_dim, hidden_dim, num_layers=1,
                 cell_type='gru', bidirectional=False, dropout=0.1):
        super().__init__()
        self.cell_type = cell_type.lower()
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1
        # if bidirectional, we set per-direction hidden size to hidden_dim // 2
        assert (not bidirectional) or (hidden_dim % 2 == 0), \
            "hidden_dim must be even when using bidirectional encoder"
        per_dir_hidden = hidden_dim // self.num_directions

        self.embedding = nn.Embedding(input_vocab_size, emb_dim)
        if self.cell_type == 'rnn':
            self.rnn = nn.RNN(emb_dim, per_dir_hidden, num_layers,
                              batch_first=True, bidirectional=bidirectional, dropout=dropout)
        elif self.cell_type == 'lstm':
            self.rnn = nn.LSTM(emb_dim, per_dir_hidden, num_layers,
                               batch_first=True, bidirectional=bidirectional, dropout=dropout)
        elif self.cell_type == 'gru':
            self.rnn = nn.GRU(emb_dim, per_dir_hidden, num_layers,
                              batch_first=True, bidirectional=bidirectional, dropout=dropout)
        else:
            raise ValueError("cell_type must be one of rnn/lstm/gru")

    def forward(self, src, src_lens=None):
        # src: (batch, seq_len)
        embedded = self.embedding(src)  # (batch, seq_len, emb_dim)
        # optional pack/pad: more efficient if lengths provided
        if src_lens is not None:
            packed = nn.utils.rnn.pack_padded_sequence(embedded, src_lens.cpu(), batch_first=True, enforce_sorted=False)
            outputs_packed, hidden = self.rnn(packed)
            outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs_packed, batch_first=True)
        else:
            outputs, hidden = self.rnn(embedded)
        # outputs: (batch, seq_len, hidden_dim)  where hidden_dim = per_dir*directions
        return outputs, hidden

class DecoderRNN(nn.Module):
    def __init__(self, output_vocab_size, emb_dim, hidden_dim, num_layers=1,
                 cell_type='gru', dropout=0.1):
        super().__init__()
        self.cell_type = cell_type.lower()
        self.embedding = nn.Embedding(output_vocab_size, emb_dim)
        self.rnn = None
        # decoder expects hidden_dim as given (same as full encoder hidden_dim)
        if self.cell_type == 'rnn':
            self.rnn = nn.RNN(emb_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        elif self.cell_type == 'lstm':
            self.rnn = nn.LSTM(emb_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        elif self.cell_type == 'gru':
            self.rnn = nn.GRU(emb_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc_out = nn.Linear(hidden_dim, output_vocab_size)

    def forward(self, input_step, hidden):
        # input_step: (batch,) token indices
        emb = self.embedding(input_step).unsqueeze(1)  # (batch, 1, emb)
        output, hidden = self.rnn(emb, hidden)
        logits = self.fc_out(output.squeeze(1))  # (batch, vocab)
        return logits, hidden

class Seq2Seq(nn.Module):
    def __init__(self, encoder: EncoderRNN, decoder: DecoderRNN, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def _init_decoder_hidden_from_encoder(self, enc_hidden):
        # enc_hidden: shape depends on rnn/lstm & bidirectional
        # We need to transform encoder hidden -> decoder initial hidden (num_layers, batch, hidden_dim)
        # Cases: GRU/RNN: tensor (num_layers*num_directions, batch, per_dir_hidden)
        # LSTM: tuple(h,c) same shapes.
        def _concat_directions(h):
            # h: (num_layers * num_directions, batch, per_dir_hidden)
            nl_times_dir, b, per_h = h.size()
            # reshape -> (num_layers, num_directions, batch, per_h)
            num_directions = 2 if self.encoder.bidirectional else 1
            num_layers = nl_times_dir // num_directions
            h = h.view(num_layers, num_directions, b, per_h)
            # concat directions -> (num_layers, batch, per_h * num_directions) = (num_layers, batch, hidden_dim)
            h = h.permute(0,2,1,3).contiguous().view(num_layers, b, per_h * num_directions)
            return h
        if isinstance(enc_hidden, tuple):  # LSTM: (h,c)
            h, c = enc_hidden
            return ( _concat_directions(h), _concat_directions(c) )
        else:
            return _concat_directions(enc_hidden)

    def forward(self, src, trg, src_lens=None, teacher_forcing_ratio=0.5):
        # src: (batch, src_len), trg: (batch, trg_len)
        batch_size = src.size(0)
        trg_len = trg.size(1)
        vocab_size = self.decoder.fc_out.out_features
        outputs = torch.zeros(batch_size, trg_len, vocab_size, device=self.device)

        enc_outputs, enc_hidden = self.encoder(src, src_lens)
        dec_hidden = self._init_decoder_hidden_from_encoder(enc_hidden)

        # start token is trg[:,0] (we constructed dataset that way)
        input_tok = trg[:, 0]  # (batch,)
        for t in range(1, trg_len):
            logits, dec_hidden = self.decoder(input_tok, dec_hidden)  # logits: (batch, vocab)
            outputs[:, t, :] = logits
            teacher_force = random.random() < teacher_forcing_ratio
            top1 = logits.argmax(1)
            input_tok = trg[:, t] if teacher_force else top1
        return outputs

#### Training Loop — One Epoch

**Function**: `train_epoch()`
- Handles one full training pass through the dataset, performing forward, backward propagation, gradient clipping, and optimizer steps.

**Greedy Decoding (Evaluation)**
- Function: `evaluate_greedy()`
- Performs simple left-to-right decoding without beam search — at each step, picks the most probable next token.

**Beam Search Decoding**

- Function: `beam_search_decode()`
- Implements beam search inference — keeps top-k candidate sequences at each decoding step for better transliteration quality.

**Evaluation Metrics — Accuracy and BLEU**

- Function: `evaluate_metrics()`
- Computes character-level accuracy, word-level accuracy, and BLEU score to measure transliteration performance.

In [10]:
from torch.optim import Adam
from tqdm.auto import tqdm
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def train_epoch(model, dataloader, optimizer, criterion, clip=1.0, teacher_forcing_ratio=0.5):
    model.train()
    total_loss = 0.0
    loop = tqdm(dataloader, desc="Training", leave=False)
    for src_batch, tgt_batch, src_lens, tgt_lens in loop:
        optimizer.zero_grad()
        outputs = model(src_batch, tgt_batch, src_lens, teacher_forcing_ratio=teacher_forcing_ratio)
        vocab = outputs.size(-1)
        outputs_flat = outputs[:,1:,:].reshape(-1, vocab)
        target_flat = tgt_batch[:,1:].reshape(-1)
        loss = criterion(outputs_flat, target_flat)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        total_loss += loss.item()
        loop.set_postfix(loss=f"{loss.item():.4f}")
    return total_loss / len(dataloader)

def evaluate_greedy(model, sentence: str, src2idx, idx2tgt, max_len=64):
    model.eval()
    with torch.no_grad():
        src_ids = [src2idx.get(ch, src2idx['<UNK>']) for ch in sentence][:max_len-1]
        src_ids.append(src2idx['<EOS>'])
        src_tensor = torch.LongTensor([src_ids])
        trg_dummy = torch.LongTensor([[tgt_vocab['<SOS>']]+[tgt_vocab['<PAD>']]*(max_len-1)])
        outputs = model(src_tensor, trg_dummy, teacher_forcing_ratio=0.0)
        ids = outputs.argmax(-1).squeeze(0).tolist()
        res = []
        for id_ in ids:
            if id_ == tgt_vocab['<EOS>']: break
            if id_ in (tgt_vocab['<SOS>'], tgt_vocab['<PAD>']): continue
            res.append(idx2tgt.get(id_, '?'))
        return ''.join(res)

# simple beam search
import heapq
def beam_search_decode(model, src_sentence, src2idx, idx2tgt, beam_size=4, max_len=64):
    model.eval()
    with torch.no_grad():
        src_ids = [src2idx.get(ch, src2idx['<UNK>']) for ch in src_sentence][:max_len-1]
        src_ids.append(src2idx['<EOS>'])
        src_tensor = torch.LongTensor([src_ids])
        trg_dummy = torch.LongTensor([[tgt_vocab['<SOS>']]+[tgt_vocab['<PAD>']]*(max_len-1)])

        enc_outputs, enc_hidden = model.encoder(src_tensor, None)
        dec_hidden = model._init_decoder_hidden_from_encoder(enc_hidden)
        start_tok = tgt_vocab['<SOS>']
        beam = [(0.0, [start_tok], dec_hidden)]
        completed = []

        for _ in range(max_len):
            new_beam = []
            for score, tokens, hidden in beam:
                last = tokens[-1]
                if last == tgt_vocab['<EOS>']:
                    completed.append((score, tokens))
                    continue
                input_tok = torch.LongTensor([last])
                logits, new_hidden = model.decoder(input_tok, hidden)
                logp = F.log_softmax(logits, dim=-1).squeeze(0)
                topk_logp, topk_idx = torch.topk(logp, beam_size)
                for k in range(beam_size):
                    cand_score = score + topk_logp[k].item()
                    cand_tokens = tokens + [topk_idx[k].item()]
                    if isinstance(new_hidden, tuple):
                        h_cl = new_hidden[0].clone()
                        c_cl = new_hidden[1].clone()
                        cand_hidden = (h_cl, c_cl)
                    else:
                        cand_hidden = new_hidden.clone()
                    new_beam.append((cand_score, cand_tokens, cand_hidden))
            beam = sorted(new_beam, key=lambda x: x[0], reverse=True)[:beam_size]
            if len(completed) >= beam_size:
                break
        if not completed:
            completed = beam
        best = max(completed, key=lambda x: x[0])
        toks = best[1]
        out_chars = []
        for id_ in toks:
            if id_ in (tgt_vocab['<SOS>'], tgt_vocab['<PAD>']): continue
            if id_ == tgt_vocab['<EOS>']: break
            out_chars.append(idx2tgt.get(id_, '?'))
        return ''.join(out_chars)


# ------------------------------------------------------------
# Evaluation metrics: char accuracy, word accuracy, BLEU
# ------------------------------------------------------------
def evaluate_metrics(model, dataloader, idx2tgt, tgt_vocab):
    model.eval()
    total_chars = correct_chars = 0
    total_words = correct_words = 0
    bleu_scores = []

    smooth = SmoothingFunction().method1

    with torch.no_grad():
        for src_batch, tgt_batch, src_lens, tgt_lens in tqdm(dataloader, desc="Evaluating", leave=False):
            outputs = model(src_batch, tgt_batch, src_lens, teacher_forcing_ratio=0.0)
            preds = outputs.argmax(-1)  # (batch, seq_len)

            for pred_seq, true_seq in zip(preds, tgt_batch):
                pred_chars, true_chars = [], []
                for p, t in zip(pred_seq.tolist(), true_seq.tolist()):
                    if t == tgt_vocab['<PAD>']: break
                    if t == tgt_vocab['<SOS>']: continue
                    if t == tgt_vocab['<EOS>']: break
                    true_chars.append(t)
                    pred_chars.append(p)
                min_len = min(len(pred_chars), len(true_chars))
                correct_chars += sum(p == t for p, t in zip(pred_chars[:min_len], true_chars[:min_len]))
                total_chars += len(true_chars)
                pred_str = ''.join(idx2tgt[i] for i in pred_chars if i not in (tgt_vocab['<PAD>'], tgt_vocab['<SOS>'], tgt_vocab['<EOS>']))
                true_str = ''.join(idx2tgt[i] for i in true_chars)
                if pred_str == true_str:
                    correct_words += 1
                total_words += 1
                if len(true_str) > 0:
                    bleu_scores.append(sentence_bleu([list(true_str)], list(pred_str), smoothing_function=smooth))

    char_acc = correct_chars / total_chars if total_chars > 0 else 0
    word_acc = correct_words / total_words if total_words > 0 else 0
    bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0

    print(f"Char Acc: {char_acc:.4f} | Word Acc: {word_acc:.4f} | BLEU: {bleu:.4f}")
    return {"char_acc": char_acc, "word_acc": word_acc, "bleu": bleu}


#### Define Sweep configurations

In [11]:
sweep_config = {
  'method': 'bayes',
  'metric': {'name': 'val_bleu', 'goal': 'maximize'},
  'parameters': {
      'emb_dim': {'values': [16,32,64,256]},
      'hidden_dim': {'values': [16,32,64,128,256]},
      'num_layers': {'values': [1,2,3]},
      'cell_type': {'values': ['gru','lstm','rnn']},
      'bidirectional': {'values': [False, True]},
      'dropout': {'values': [0.0, 0.2, 0.3]},
      'learning_rate': {'min': 1e-4, 'max': 1e-2},
      'batch_size': {'values': [32,64,128]},
      'teacher_forcing_ratio': {'values': [0.5, 0.7, 1.0]},
      'beam_size': {'values': [1,2,4]},
      'weight_decay': {'values':[0.0, 1e-5, 1e-4]}
  }
}

In [12]:
! pip install pytorch_lightning

Collecting pytorch_lightning
  Downloading pytorch_lightning-2.5.5-py3-none-any.whl.metadata (20 kB)
Collecting torchmetrics>0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Downloading pytorch_lightning-2.5.5-py3-none-any.whl (832 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m832.4/832.4 kB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m67.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightning-utilities, torchmetrics, pytorch_lightning
Successfully installed lightning-utilities-0.15.2 pytorch_lightning-2.5.5 torchmetrics-1.8.2


#### Define a custom callback

In [13]:
from pytorch_lightning.callbacks import Callback
import json

class DynamicBleuCutoff(Callback):
    def __init__(self, patience_epochs=10, fraction=0.7, project="Machine Transliteration"):
        super().__init__()
        self.patience_epochs = patience_epochs
        self.fraction = fraction
        self.project = project
        self.best_bleu_global = None

    def on_fit_start(self, trainer, pl_module):
        import wandb
        api = wandb.Api()
        try:
            runs = api.runs(f"me21b118-iit-madras/{self.project}")
            best = 0.0
            for r in runs:
                if r.state in ("failed", "crashed"):
                    continue

                # --- Handle both dict and string cases ---
                metrics = r.summary_metrics
                if isinstance(metrics, str):
                    try:
                        metrics = json.loads(metrics)
                    except Exception as e:
                        print(f"⚠️ Failed to parse metrics for run {r.id}: {e}")
                        metrics = {}

                bleu = metrics.get("val_bleu", 0.0)
                best = max(best, bleu)

            self.best_bleu_global = best
            print(f"🌍 Global best BLEU so far: {best:.4f}")

        except Exception as e:
            print(f"⚠️ Could not fetch global best BLEU: {e}")
            self.best_bleu_global = 0.0

    def on_validation_end(self, trainer, pl_module):
        epoch = trainer.current_epoch
        val_bleu = trainer.callback_metrics.get("val_bleu", 0.0)
        if val_bleu is None:
            return
        val_bleu = val_bleu.item() if hasattr(val_bleu, "item") else float(val_bleu)

        # --- Only start cutoff check after some epochs ---
        if epoch >= self.patience_epochs and self.best_bleu_global:
            cutoff = self.best_bleu_global * self.fraction
            if val_bleu < cutoff:
                print(f"⛔ BLEU {val_bleu:.3f} < {cutoff:.3f} (cutoff) → stopping this run early")
                trainer.should_stop = True


In [14]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

class LitSeq2Seq(pl.LightningModule):
    def __init__(self, encoder, decoder, lr, pad_idx, teacher_forcing_ratio=0.5, weight_decay=0.0):
        super().__init__()
        self.model = Seq2Seq(encoder, decoder, device=None)
        self.lr = lr
        self.pad_idx = pad_idx
        self.teacher_forcing_ratio = teacher_forcing_ratio
        self.weight_decay = weight_decay
        self.criterion = nn.CrossEntropyLoss(ignore_index=self.pad_idx)

    def forward(self, src, tgt, src_lens):
        return self.model(src, tgt, src_lens, teacher_forcing_ratio=self.teacher_forcing_ratio)

    def training_step(self, batch, batch_idx):
        src, tgt, src_lens, _ = batch
        outputs = self(src, tgt, src_lens)
        vocab = outputs.size(-1)
        loss = self.criterion(outputs[:, 1:, :].reshape(-1, vocab), tgt[:, 1:].reshape(-1))
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        src, tgt, src_lens, _ = batch
        outputs = self.model(src, tgt, src_lens, teacher_forcing_ratio=0.0)
        vocab = outputs.size(-1)
        loss = self.criterion(outputs[:, 1:, :].reshape(-1, vocab), tgt[:, 1:].reshape(-1))

        # --- Compute char, word, BLEU metrics ---
        preds = outputs.argmax(-1)
        correct_chars = total_chars = 0
        correct_words = total_words = 0
        bleu_scores = []
        smooth = SmoothingFunction().method1

        for pred_seq, true_seq in zip(preds, tgt):
            pred_chars, true_chars = [], []
            for p, t in zip(pred_seq.tolist(), true_seq.tolist()):
                if t == self.pad_idx: break
                if t == tgt_vocab['<SOS>']: continue
                if t == tgt_vocab['<EOS>']: break
                pred_chars.append(p)
                true_chars.append(t)

            min_len = min(len(pred_chars), len(true_chars))
            correct_chars += sum(p == t for p, t in zip(pred_chars[:min_len], true_chars[:min_len]))
            total_chars += len(true_chars)

            pred_str = ''.join(idx2tgt[i] for i in pred_chars if i not in (tgt_vocab['<PAD>'], tgt_vocab['<SOS>'], tgt_vocab['<EOS>']))
            true_str = ''.join(idx2tgt[i] for i in true_chars)

            if pred_str == true_str:
                correct_words += 1
            total_words += 1
            if len(true_str) > 0:
                bleu_scores.append(sentence_bleu([list(true_str)], list(pred_str), smoothing_function=smooth))

        char_acc = correct_chars / total_chars if total_chars else 0
        word_acc = correct_words / total_words if total_words else 0
        bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0

        self.log_dict({
            "val_loss": loss,
            "val_char_acc": char_acc,
            "val_word_acc": word_acc,
            "val_bleu": bleu
        }, prog_bar=True)

        return {"val_loss": loss, "char_acc": char_acc, "word_acc": word_acc, "bleu": bleu}

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)

    def on_fit_start(self):
        self.model.to(self.device)
        self.model.device = self.device   # ensure Seq2Seq.forward() allocates on correct device


# ----------------------------------------------------------
# sweep-compatible run_train using Lightning
# ----------------------------------------------------------
def run_train(config=None):
    with wandb.init(config=config):
        cfg = wandb.config

        # --- Build dataset and dataloaders ---
        train_data = TransliterationDataset(train_pairs, src_vocab, tgt_vocab, max_len=64)
        val_data = TransliterationDataset(dev_pairs, src_vocab, tgt_vocab, max_len=64)
        train_loader = DataLoader(train_data, batch_size=cfg.batch_size, shuffle=True, collate_fn=collate_fn)
        val_loader = DataLoader(val_data, batch_size=cfg.batch_size, shuffle=False, collate_fn=collate_fn)

        # --- Build model components ---
        enc = EncoderRNN(len(src_vocab), cfg.emb_dim, cfg.hidden_dim, cfg.num_layers,
                         cfg.cell_type, cfg.bidirectional, cfg.dropout)
        dec = DecoderRNN(len(tgt_vocab), cfg.emb_dim, cfg.hidden_dim, cfg.num_layers,
                         cfg.cell_type, cfg.dropout)

        model = LitSeq2Seq(
            encoder=enc,
            decoder=dec,
            lr=cfg.learning_rate,
            pad_idx=tgt_vocab['<PAD>'],
            teacher_forcing_ratio=cfg.teacher_forcing_ratio,
            weight_decay=cfg.weight_decay
        )

        # --- Setup loggers and checkpoint callbacks ---
        wandb_logger = WandbLogger(project= wandb.run, log_model=True)
        checkpoint_dir = "/content/drive/MyDrive/seq2seq_checkpoints/"
        os.makedirs(checkpoint_dir, exist_ok=True)

        checkpoint_callback = ModelCheckpoint(
            dirpath=checkpoint_dir,
            filename=f"best_model_{wandb.run.id}",
            monitor="val_loss",
            save_top_k=1,
            mode="min",
            save_weights_only=True
        )

        # --- Early Stopping ---
        early_stop_callback = EarlyStopping(
            monitor="val_bleu",   # or 'val_loss' if BLEU is noisy early on
            mode="max",
            patience=5,           # stop after 5 epochs of no improvement
            min_delta=0.002,      # require +0.002 BLEU improvement
            verbose=True
        )

        # --- Trainer setup ---
        trainer = pl.Trainer(
            max_epochs=30,
            logger=wandb_logger,
            callbacks=[checkpoint_callback,
                       early_stop_callback,
                       DynamicBleuCutoff(patience_epochs=10, fraction=0.8)],
            precision="16-mixed",  # fp16 for faster training
            accelerator="auto",
            devices=1,
            enable_progress_bar=True,
            log_every_n_steps=10
        )

        # --- Fit model ---
        trainer.fit(model, train_loader, val_loader)

        print(f"Best checkpoint saved at: {checkpoint_callback.best_model_path}")
        if checkpoint_callback.best_model_path and os.path.exists(checkpoint_callback.best_model_path):
            wandb.save(checkpoint_callback.best_model_path)


In [53]:
sweep_id = wandb.sweep(sweep_config, project="Machine Transliteration")
# start an agent: replace run_train with your function
wandb.agent(sweep_id, function=run_train, count=1)  # count = number of runs you want


Create sweep with ID: 74czrt6f
Sweep URL: https://wandb.ai/me21b118-iit-madras/Machine%20Transliteration/sweeps/74czrt6f


[34m[1mwandb[0m: Agent Starting Run: gly52viq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: False
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 256
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.003859307450740208
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7
[34m[1mwandb[0m: 	weight_decay: 0


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=30` reached.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_gly52viq.ckpt


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████
train_loss,█▇▅██▅▄▆▆▅▄▄▄▇▄▄▃▅▂▂▄▂▃▂▄▃▁▂▁▂▁▂▂▂▂▁▁▁▃▂
trainer/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
val_bleu,▁▂▃▃▃▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇█████████
val_char_acc,▁▂▃▃▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇█████████
val_loss,█▇▆▆▅▅▄▄▄▃▃▃▃▂▂▂▃▂▂▂▂▂▂▂▁▁▁▁▁▁
val_word_acc,▁▁▂▂▂▃▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇█████

0,1
epoch,29.0
train_loss,0.94421
trainer/global_step,23999.0
val_bleu,0.5142
val_char_acc,0.63607
val_loss,1.38032
val_word_acc,0.23901


In [54]:
sweep_id = "me21b118-iit-madras/Machine Transliteration/74czrt6f"
wandb.agent(sweep_id, function=run_train, count=5)

[34m[1mwandb[0m: Agent Starting Run: vaf5k2l6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	emb_dim: 32
[34m[1mwandb[0m: 	hidden_dim: 16
[34m[1mwandb[0m: 	learning_rate: 0.0031900564338289807
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.0001


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:751: Checkpoint directory /content/drive/MyDrive/seq2seq_checkpoints exists and is not empty.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supp

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=30` reached.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_vaf5k2l6.ckpt


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██████
train_loss,█▆▄▄▄▃▃▄▄▄▃▄▃▂▃▂▂▂▃▂▃▂▂▁▂▃▂▂▂▂▂▂▂▁▂▁▁▂▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇█████
val_bleu,▁▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇█▇█▇██████
val_char_acc,▁▂▃▄▄▅▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇████████
val_loss,█▇▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁
val_word_acc,▁▁▂▂▂▂▃▃▃▄▄▆▅▆▇▆▅▆▆▇▇▇▇▇▆▇███▇

0,1
epoch,29.0
train_loss,1.76615
trainer/global_step,23999.0
val_bleu,0.22156
val_char_acc,0.46224
val_loss,1.7266
val_word_acc,0.02979


[34m[1mwandb[0m: Agent Starting Run: 5m7w40es with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	emb_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 32
[34m[1mwandb[0m: 	learning_rate: 0.0023306119835993415
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.0001


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 31.4 K | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
31.4 K    Trainable params
0         Non-trainable params
31.4 K    Total params
0.125     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=30` reached.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_5m7w40es.ckpt


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇██████
train_loss,█▇▆▅▅▄▄▄▅▃▃▄▃▄▃▂▂▃▃▁▃▃▃▂▂▂▂▄▂▂▂▁▂▂▂▂▂▂▁▁
trainer/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇▇██
val_bleu,▁▂▃▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇██████████
val_char_acc,▁▃▄▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇███████████
val_loss,█▆▅▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_word_acc,▁▁▂▃▃▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█

0,1
epoch,29.0
train_loss,0.93816
trainer/global_step,11999.0
val_bleu,0.45021
val_char_acc,0.62441
val_loss,1.28209
val_word_acc,0.17236


[34m[1mwandb[0m: Agent Starting Run: 497hedrv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002272694116188844
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 589 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
589 K     Trainable params
0         Non-trainable params
589 K     Total params
2.359     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=30` reached.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_497hedrv.ckpt


0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▇▇▇▇▇████
train_loss,█▆▅▅▄▃▃▃▃▃▂▂▂▂▂▂▃▂▁▂▂▂▂▁▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇█████
val_bleu,▁▅▆▇▇▇▇▇██████████████████████
val_char_acc,▁▅▆▇▇▇▇▇██████████████████████
val_loss,█▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▁▁▂▁▂▂
val_word_acc,▁▄▅▆▆▇▇▇▇▇▇███████████████████

0,1
epoch,29.0
train_loss,0.13149
trainer/global_step,11999.0
val_bleu,0.75192
val_char_acc,0.84166
val_loss,0.78937
val_word_acc,0.54126


[34m[1mwandb[0m: Agent Starting Run: lgsuqulk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	emb_dim: 32
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004429174821457923
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 2.3 M  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
2.3 M     Trainable params
0         Non-trainable params
2.3 M     Total params
9.304     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


In [None]:
sweep_id = "me21b118-iit-madras/Machine Transliteration/74czrt6f"
wandb.agent(sweep_id, function=run_train, count=3)

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Agent Starting Run: rnqzshg0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	emb_dim: 32
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00014619920835259658
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.0001
[34m[1mwandb[0m: Currently logged in as: [33mme21b118[0m ([33mme21b118-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:751: Checkpoint directory /content/drive/MyDrive/seq2seq_checkpoints exists and is not empty.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supp

⚠️ Could not fetch global best BLEU: string indices must be integers, not 'str'


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.022


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.014 >= min_delta = 0.002. New best score: 0.037


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.006 >= min_delta = 0.002. New best score: 0.043


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.008 >= min_delta = 0.002. New best score: 0.050


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.003 >= min_delta = 0.002. New best score: 0.053


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.004 >= min_delta = 0.002. New best score: 0.057


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.005 >= min_delta = 0.002. New best score: 0.062


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.008 >= min_delta = 0.002. New best score: 0.069


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.011 >= min_delta = 0.002. New best score: 0.081


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.021 >= min_delta = 0.002. New best score: 0.102


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.029 >= min_delta = 0.002. New best score: 0.131
[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x7e839fcf9700>> (for post_run_cell):
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/IPython/core/events.py", line 89, in trigger
    func(*args, **kwargs)
  File "/usr/local/lib/python3.12/dist-packages/wandb/sdk/wandb_init.py", line 595, in _post_run_cell_hook
    self.backend.interface.publish_resume()
  File "/usr/local/lib/python3.12/dist-packages/wandb/sdk/interface/interface.py", line 818, in publish_resume
    self._publish_resume(resume)
  File "/usr/local/lib/python3.12/dist-packages/wandb/sdk/interface/interface_shared.py", line 296, in _publish_resume
    self._publish(rec)
  File "/usr/local/lib/python3.12/dist-packages/wandb/sdk/interface/interface_sock.py", line 43, in _publish
    self._asyncer.run(lambda: self._client.publish(request))
  File "/usr/local/lib/python3.12/dist-packages/wandb/sdk/lib/asyncio_manager.py", line 136,

In [19]:
sweep_id = "me21b118-iit-madras/Machine Transliteration/74czrt6f"
wandb.agent(sweep_id, function=run_train, count=5)

[34m[1mwandb[0m: Agent Starting Run: hqw57qh7 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 16
[34m[1mwandb[0m: 	learning_rate: 0.009562490670605252
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	teacher_forcing_ratio: 1
[34m[1mwandb[0m: 	weight_decay: 0.0001


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:751: Checkpoint directory /content/drive/MyDrive/seq2seq_checkpoints exists and is not empty.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/usr/local/lib/python3.12/dist-packages/pytorch_lightning/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supp

🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.077


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.007 >= min_delta = 0.002. New best score: 0.084


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.003 >= min_delta = 0.002. New best score: 0.087


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.003 >= min_delta = 0.002. New best score: 0.090


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.003 >= min_delta = 0.002. New best score: 0.093


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.004 >= min_delta = 0.002. New best score: 0.097


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.006 >= min_delta = 0.002. New best score: 0.103


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.005 >= min_delta = 0.002. New best score: 0.108


⛔ BLEU 0.108 < 0.602 (cutoff) → stopping this run early




Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_hqw57qh7.ckpt


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇█
train_loss,█▇▆▇▆▄▅▃▃▄▃▄▃▃▃▃▃▁▃▄▂▂▄▂▃▂▃▃▁▃▁▁▂▂▂▂▂▃▁▁
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇█
val_bleu,▁▃▃▃▄▄▅▆▄▇█
val_char_acc,▁▄▄▄▅▆▆▆▅▇█
val_loss,▁▃▁▇▇▂▃▆█▆▅
val_word_acc,▁▁▁▄▁▃▃▄▃▆█

0,1
epoch,10.0
train_loss,2.13238
trainer/global_step,4399.0
val_bleu,0.1081
val_char_acc,0.28262
val_loss,3.73184
val_word_acc,0.00269


[34m[1mwandb[0m: Agent Starting Run: 99zyxflo with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0040299696863160634
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7
[34m[1mwandb[0m: 	weight_decay: 1e-05


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 616 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
616 K     Trainable params
0         Non-trainable params
616 K     Total params
2.464     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.231


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.063 >= min_delta = 0.002. New best score: 0.294


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_bleu did not improve in the last 5 records. Best score: 0.294. Signaling Trainer to stop.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_99zyxflo.ckpt


0,1
epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇█████
train_loss,█▇▆▄▃▂▂▂▂▂▁▂▁▂▁▂▁▁▂▁▂▁▂▁▂▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂
trainer/global_step,▁▁▁▂▂▃▃▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇█
val_bleu,▁█▇▇▇▅▃
val_char_acc,▁█▇█▇▅▄
val_loss,█▁▁▂▃▅▆
val_word_acc,▁█▅▇▇▅▃

0,1
epoch,6.0
train_loss,1.70022
trainer/global_step,2799.0
val_bleu,0.24796
val_char_acc,0.45119
val_loss,2.13153
val_word_acc,0.03369


[34m[1mwandb[0m: Agent Starting Run: x6a9gf9o with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 256
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005112865700460457
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	teacher_forcing_ratio: 1
[34m[1mwandb[0m: 	weight_decay: 0


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 185 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
185 K     Trainable params
0         Non-trainable params
185 K     Total params
0.742     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.294


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.079 >= min_delta = 0.002. New best score: 0.373


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.042 >= min_delta = 0.002. New best score: 0.415


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.014 >= min_delta = 0.002. New best score: 0.428


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.012 >= min_delta = 0.002. New best score: 0.441


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.011 >= min_delta = 0.002. New best score: 0.451


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.020 >= min_delta = 0.002. New best score: 0.471


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.014 >= min_delta = 0.002. New best score: 0.485


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.004 >= min_delta = 0.002. New best score: 0.489


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.008 >= min_delta = 0.002. New best score: 0.497


⛔ BLEU 0.497 < 0.602 (cutoff) → stopping this run early
Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_x6a9gf9o.ckpt


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇███
train_loss,█▆▄▄▄▃▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇██
val_bleu,▁▄▅▆▆▆▇████
val_char_acc,▁▄▅▆▆▇▇████
val_loss,█▃▂▃▃▃▁▂▄▃▃
val_word_acc,▁▃▄▅▅▆▇▇▇▇█

0,1
epoch,10.0
train_loss,0.6254
trainer/global_step,4399.0
val_bleu,0.49686
val_char_acc,0.60919
val_loss,2.50472
val_word_acc,0.24292


[34m[1mwandb[0m: Agent Starting Run: r3bpm8uk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 1
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0060966253090196385
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7
[34m[1mwandb[0m: 	weight_decay: 0.0001


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 616 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
616 K     Trainable params
0         Non-trainable params
616 K     Total params
2.464     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.194


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.039 >= min_delta = 0.002. New best score: 0.233


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.044 >= min_delta = 0.002. New best score: 0.277


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_bleu did not improve in the last 5 records. Best score: 0.277. Signaling Trainer to stop.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_r3bpm8uk.ckpt


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇▇█████
train_loss,█▅▄▃▃▃▂▂▂▂▂▁▂▂▃▂▁▂▂▁▂▂▂▁▂▂▁▂▂▂▂▂▂▁▁▂▁▁▁▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇██
val_bleu,▁▄█▆▇▆▇▅
val_char_acc,▁▅█▆▇▇█▅
val_loss,█▄▁▁▂▃▁▄
val_word_acc,▁▄█▅▇▄▆▄

0,1
epoch,7.0
train_loss,1.7335
trainer/global_step,3199.0
val_bleu,0.23611
val_char_acc,0.42814
val_loss,2.21561
val_word_acc,0.02686


[34m[1mwandb[0m: Agent Starting Run: r46lt1aw with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0022620053374418597
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.0001


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 586 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
586 K     Trainable params
0         Non-trainable params
586 K     Total params
2.347     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.285


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.092 >= min_delta = 0.002. New best score: 0.377


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.040 >= min_delta = 0.002. New best score: 0.417


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.049 >= min_delta = 0.002. New best score: 0.466


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.036 >= min_delta = 0.002. New best score: 0.502


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.011 >= min_delta = 0.002. New best score: 0.514


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.021 >= min_delta = 0.002. New best score: 0.535


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.003 >= min_delta = 0.002. New best score: 0.538


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.012 >= min_delta = 0.002. New best score: 0.551


Validation: |          | 0/? [00:00<?, ?it/s]

⛔ BLEU 0.532 < 0.602 (cutoff) → stopping this run early
Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_r46lt1aw.ckpt


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▄▄▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇██████
train_loss,█▇▆▅▆▅▄▅▄▄▃▃▃▂▂▃▂▂▃▃▃▂▃▃▃▂▂▂▃▂▂▂▁▂▂▂▂▂▁▂
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇█
val_bleu,▁▃▄▆▇▇█▇███
val_char_acc,▁▄▅▆▇▇█▇███
val_loss,█▅▄▃▂▂▂▂▁▁▁
val_word_acc,▁▃▄▅▆▇█▇▇█▇

0,1
epoch,10.0
train_loss,1.13068
trainer/global_step,4399.0
val_bleu,0.53236
val_char_acc,0.68429
val_loss,1.11187
val_word_acc,0.23022


In [None]:
wandb.agent(sweep_id, function=run_train, count=10)

[34m[1mwandb[0m: Agent Starting Run: 327yj21x with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	beam_size: 2
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: rnn
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00389604775987926
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7
[34m[1mwandb[0m: 	weight_decay: 0.0001


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 586 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
586 K     Trainable params
0         Non-trainable params
586 K     Total params
2.347     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.206


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.037 >= min_delta = 0.002. New best score: 0.243


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.013 >= min_delta = 0.002. New best score: 0.256


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.025 >= min_delta = 0.002. New best score: 0.281


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_bleu did not improve in the last 5 records. Best score: 0.281. Signaling Trainer to stop.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_327yj21x.ckpt


0,1
epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▅▆▇▇▇███████
train_loss,█▇▆▇▆▃▃▃▃▄▄▂▃▄▃▁▂▂▂▂▃▃▄▂▂▂▃▃▂▃▄▃▂▅▂▁▂▄▃▂
trainer/global_step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇████
val_bleu,▁▄▆█▅▆▇▆▇
val_char_acc,▁▅▅█▆▇▇▇▇
val_loss,█▅▅▂▃▃▂▂▁
val_word_acc,▁▄▅█▅▅▅▅▅

0,1
epoch,8.0
train_loss,1.74178
trainer/global_step,7199.0
val_bleu,0.27153
val_char_acc,0.4681
val_loss,1.99348
val_word_acc,0.03345


[34m[1mwandb[0m: Agent Starting Run: 83ee8a72 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	emb_dim: 32
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0009258789438124792
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 376 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
376 K     Trainable params
0         Non-trainable params
376 K     Total params
1.505     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.156


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.211 >= min_delta = 0.002. New best score: 0.366


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.137 >= min_delta = 0.002. New best score: 0.503


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.057 >= min_delta = 0.002. New best score: 0.560


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.048 >= min_delta = 0.002. New best score: 0.608


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.030 >= min_delta = 0.002. New best score: 0.638


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.019 >= min_delta = 0.002. New best score: 0.657


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.017 >= min_delta = 0.002. New best score: 0.674


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.009 >= min_delta = 0.002. New best score: 0.683


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.009 >= min_delta = 0.002. New best score: 0.692


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.012 >= min_delta = 0.002. New best score: 0.704


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.006 >= min_delta = 0.002. New best score: 0.710


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.010 >= min_delta = 0.002. New best score: 0.720


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.004 >= min_delta = 0.002. New best score: 0.723


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.008 >= min_delta = 0.002. New best score: 0.732


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_bleu did not improve in the last 5 records. Best score: 0.732. Signaling Trainer to stop.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_83ee8a72.ckpt


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train_loss,██▅▃▄▃▃▄▂▂▂▂▂▂▂▂▁▂▂▂▂▂▁▁▁▁▂▂▁▁▂▁▁▁▁▁▁▁▁▂
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▃▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
val_bleu,▁▄▅▆▆▇▇▇▇███████████████
val_char_acc,▁▄▅▆▇▇▇▇▇█▇█████████████
val_loss,█▅▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_word_acc,▁▂▄▅▅▆▆▇▇▇▇▇▇▇▇█████████

0,1
epoch,23.0
train_loss,0.32339
trainer/global_step,9599.0
val_bleu,0.73284
val_char_acc,0.81924
val_loss,0.79848
val_word_acc,0.50146


[34m[1mwandb[0m: Agent Starting Run: 9ytt1qij with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003972081174932431
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 411 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
411 K     Trainable params
0         Non-trainable params
411 K     Total params
1.648     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.497


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.139 >= min_delta = 0.002. New best score: 0.637


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.038 >= min_delta = 0.002. New best score: 0.675


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.021 >= min_delta = 0.002. New best score: 0.695


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.016 >= min_delta = 0.002. New best score: 0.711


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.009 >= min_delta = 0.002. New best score: 0.721


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.004 >= min_delta = 0.002. New best score: 0.725


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.008 >= min_delta = 0.002. New best score: 0.733


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.008 >= min_delta = 0.002. New best score: 0.741


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.002 >= min_delta = 0.002. New best score: 0.743


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.003 >= min_delta = 0.002. New best score: 0.745


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.004 >= min_delta = 0.002. New best score: 0.749


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_bleu did not improve in the last 5 records. Best score: 0.749. Signaling Trainer to stop.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_9ytt1qij.ckpt


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇██████
train_loss,█▆▄▄▂▂▂▂▂▂▂▂▁▂▂▂▂▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▂▁▁
trainer/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇████
val_bleu,▁▅▆▆▇▇▇▇█▇████████████
val_char_acc,▁▅▆▆▇▇▇▇▇▇▇███████████
val_loss,█▅▃▃▃▂▂▂▂▁▂▂▂▂▂▂▁▂▁▁▂▁
val_word_acc,▁▄▅▆▇▇▇▇▇▇████████████

0,1
epoch,21.0
train_loss,0.26885
trainer/global_step,8799.0
val_bleu,0.746
val_char_acc,0.83836
val_loss,0.67213
val_word_acc,0.51831


[34m[1mwandb[0m: Agent Starting Run: obhfeka8 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004867845417808631
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.0001


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 2.3 M  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
2.3 M     Trainable params
0         Non-trainable params
2.3 M     Total params
9.167     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.540


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.105 >= min_delta = 0.002. New best score: 0.644


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.034 >= min_delta = 0.002. New best score: 0.678


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.019 >= min_delta = 0.002. New best score: 0.697


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.007 >= min_delta = 0.002. New best score: 0.704


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.009 >= min_delta = 0.002. New best score: 0.713


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.003 >= min_delta = 0.002. New best score: 0.717


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.005 >= min_delta = 0.002. New best score: 0.721


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.005 >= min_delta = 0.002. New best score: 0.726


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.003 >= min_delta = 0.002. New best score: 0.730


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_bleu did not improve in the last 5 records. Best score: 0.730. Signaling Trainer to stop.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_obhfeka8.ckpt


0,1
epoch,▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇████
train_loss,█▆▅▅▅▃▄▂▂▂▂▃▂▂▁▁▂▂▁▂▄▁▂▂▁▂▁▂▂▂▁▂▂▂▁▁▃▂▁▁
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇█
val_bleu,▁▅▆▇▇▇▇▇██▇█████▇███
val_char_acc,▁▅▆▇▇▇█▇██▇█████████
val_loss,█▄▃▂▃▃▂▂▁▂▂▁▁▂▂▂▂▁▁▂
val_word_acc,▁▄▅▆▇▇▇▇▇█▇█████▇███

0,1
epoch,19.0
train_loss,0.27431
trainer/global_step,7999.0
val_bleu,0.72505
val_char_acc,0.81807
val_loss,0.71969
val_word_acc,0.47192


[34m[1mwandb[0m: Agent Starting Run: dwxza5ma with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	emb_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005376031075885679
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 1.4 M  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
1.4 M     Trainable params
0         Non-trainable params
1.4 M     Total params
5.481     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7519


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.605


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.088 >= min_delta = 0.002. New best score: 0.693


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.023 >= min_delta = 0.002. New best score: 0.716


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.016 >= min_delta = 0.002. New best score: 0.733


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.004 >= min_delta = 0.002. New best score: 0.736


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.006 >= min_delta = 0.002. New best score: 0.742


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.008 >= min_delta = 0.002. New best score: 0.750


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.005 >= min_delta = 0.002. New best score: 0.756


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_bleu did not improve in the last 5 records. Best score: 0.756. Signaling Trainer to stop.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_dwxza5ma.ckpt


0,1
epoch,▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█
train_loss,█▂▂▁▂▂▁▁▁▂▂▁▁▁▁▁▁▁▂▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█████
val_bleu,▁▅▆▇▇▇▇▇████████
val_char_acc,▁▅▆▇▇▇▇▇██████▇█
val_loss,█▃▃▂▁▂▁▂▂▁▂▁▂▂▂▁
val_word_acc,▁▅▆▇▇▇▇▇████████

0,1
epoch,15.0
train_loss,0.33063
trainer/global_step,6399.0
val_bleu,0.7531
val_char_acc,0.84244
val_loss,0.68297
val_word_acc,0.53491


[34m[1mwandb[0m: Agent Starting Run: hdjd4tu0 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: lstm
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	emb_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006937185467144609
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.7
[34m[1mwandb[0m: 	weight_decay: 1e-05


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 643 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
643 K     Trainable params
0         Non-trainable params
643 K     Total params
2.574     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7531


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.536


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.120 >= min_delta = 0.002. New best score: 0.656


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.032 >= min_delta = 0.002. New best score: 0.688


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.009 >= min_delta = 0.002. New best score: 0.698


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.020 >= min_delta = 0.002. New best score: 0.718


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.010 >= min_delta = 0.002. New best score: 0.728


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.010 >= min_delta = 0.002. New best score: 0.738


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.005 >= min_delta = 0.002. New best score: 0.743


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.007 >= min_delta = 0.002. New best score: 0.750


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric val_bleu did not improve in the last 5 records. Best score: 0.750. Signaling Trainer to stop.


Best checkpoint saved at: /content/drive/MyDrive/seq2seq_checkpoints/best_model_hdjd4tu0.ckpt


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train_loss,█▃▃▄▄▂▃▂▂▂▂▂▂▂▂▁▂▂▁▁▁▂▂▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇█
val_bleu,▁▅▆▆▇▇▇▇▇▇███████████
val_char_acc,▁▅▆▆▇▇▇▇▇▇▇▇█████████
val_loss,█▅▃▃▃▃▂▃▃▂▂▂▂▂▂▂▂▁▂▂▂
val_word_acc,▁▄▅▆▇▇▇▇▇▇▇▇▇████▇███

0,1
epoch,20.0
train_loss,0.18121
trainer/global_step,8399.0
val_bleu,0.74586
val_char_acc,0.83189
val_loss,0.82632
val_word_acc,0.52466


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7teebzqe with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beam_size: 4
[34m[1mwandb[0m: 	bidirectional: True
[34m[1mwandb[0m: 	cell_type: gru
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	emb_dim: 32
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005250545074459772
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	teacher_forcing_ratio: 0.5
[34m[1mwandb[0m: 	weight_decay: 1e-05


INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | model     | Seq2Seq          | 1.7 M  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
1.7 M     Trainable params
0         Non-trainable params
1.7 M     Total params
6.998     Total estimated model params size (MB)
9         Modules in train mode
0         Modules in eval mode


🌍 Global best BLEU so far: 0.7531


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved. New best score: 0.626


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.038 >= min_delta = 0.002. New best score: 0.663


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.004 >= min_delta = 0.002. New best score: 0.667


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric val_bleu improved by 0.016 >= min_delta = 0.002. New best score: 0.684


In [15]:
import wandb
import json

api = wandb.Api()

# Replace 'YOUR_ENTITY' and 'YOUR_PROJECT' with your actual entity and project names
# You can find these in your wandb URL: wandb.ai/YOUR_ENTITY/YOUR_PROJECT
project_name = "me21b118-iit-madras/Machine Transliteration"

try:
    # Fetch runs ordered by validation BLEU in descending order
    # Note: Direct ordering by summary_metrics.val_bleu might still fail if summary_metrics is a string for *some* runs.
    # We will fetch all runs and sort in Python.
    runs = api.runs(project_name)

    best_run = None
    best_bleu = -1.0 # Initialize with a value lower than any possible BLEU score

    for run in runs:
        # Check if the run finished and has summary metrics
        if run.state == "finished" and run.summary_metrics is not None:
            metrics = run.summary_metrics

            # Check if summary_metrics is a string and try to parse it as JSON
            if isinstance(metrics, str):
                try:
                    metrics = json.loads(metrics)
                except json.JSONDecodeError:
                    print(f"Warning: Could not parse summary_metrics string for run {run.id}")
                    continue # Skip this run if metrics can't be parsed

            # Now, try to get the val_bleu from the parsed metrics (or the original dict)
            val_bleu = metrics.get("val_bleu")

            # Check if val_bleu exists and is a valid number
            if val_bleu is not None and isinstance(val_bleu, (int, float)):
                if val_bleu > best_bleu:
                    best_bleu = val_bleu
                    best_run = run

    if best_run:
        print(f"Found best run: {best_run.id} with val_bleu: {best_bleu:.4f}")
        print("\nBest Model Configuration:")

        # Check if config is a string and parse it as JSON
        config = best_run.config
        if isinstance(config, str):
            try:
                config = json.loads(config)
            except json.JSONDecodeError:
                 print(f"Warning: Could not parse config string for run {best_run.id}")
                 config = {} # Use empty dict if config can't be parsed


        # Access the config items
        for key, value in config.items():
            # Exclude wandb internal keys
            if not key.startswith('_'):
                print(f"  {key}: {value}")
    else:
        print("No finished runs with valid 'val_bleu' metric found in the project.")

except Exception as e:
    print(f"An error occurred: {e}")

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: Currently logged in as: [33mme21b118[0m ([33mme21b118-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Found best run: dwxza5ma with val_bleu: 0.7531

Best Model Configuration:
  dropout: {'value': 0}
  emb_dim: {'value': 16}
  beam_size: {'value': 4}
  cell_type: {'value': 'lstm'}
  batch_size: {'value': 128}
  hidden_dim: {'value': 256}
  num_layers: {'value': 2}
  weight_decay: {'value': 1e-05}
  bidirectional: {'value': True}
  learning_rate: {'value': 0.005376031075885679}
  teacher_forcing_ratio: {'value': 0.5}


#### Use the best model on test data set

In [20]:
import torch
from torch.utils.data import DataLoader
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from tqdm.auto import tqdm

# ----------------------------------------------------
# 1️⃣ Extract config values
# ----------------------------------------------------
cfg = {
    "dropout": 0.0,
    "emb_dim": 16,
    "beam_size": 4,
    "cell_type": "lstm",
    "batch_size": 128,
    "hidden_dim": 256,
    "num_layers": 2,
    "weight_decay": 1e-5,
    "bidirectional": True,
    "learning_rate": 0.005376031075885679,
    "teacher_forcing_ratio": 0.5
}

# ----------------------------------------------------
# 2️⃣ Build encoder-decoder model using best config
# ----------------------------------------------------
enc = EncoderRNN(
    len(src_vocab),
    cfg["emb_dim"],
    cfg["hidden_dim"],
    cfg["num_layers"],
    cfg["cell_type"],
    cfg["bidirectional"],
    cfg["dropout"]
)
dec = DecoderRNN(
    len(tgt_vocab),
    cfg["emb_dim"],
    cfg["hidden_dim"],
    cfg["num_layers"],
    cfg["cell_type"],
    cfg["dropout"]
)

model = LitSeq2Seq(
    encoder=enc,
    decoder=dec,
    lr=cfg["learning_rate"],
    pad_idx=tgt_vocab["<PAD>"],
    teacher_forcing_ratio=0.0,  # no teacher forcing for test
    weight_decay=cfg["weight_decay"]
)

# ----------------------------------------------------
# 3️⃣ Load the trained checkpoint
# ----------------------------------------------------
best_ckpt_path = "/content/drive/MyDrive/seq2seq_checkpoints/best_model_dwxza5ma.ckpt"  # update
model = LitSeq2Seq.load_from_checkpoint(
    best_ckpt_path,
    encoder=enc,
    decoder=dec,
    lr=cfg["learning_rate"],
    pad_idx=tgt_vocab["<PAD>"],
    teacher_forcing_ratio=0.0,
    weight_decay=cfg["weight_decay"]
)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

# ----------------------------------------------------
# 4️⃣ Prepare test dataset and dataloader
# ----------------------------------------------------
test_data = TransliterationDataset(test_pairs, src_vocab, tgt_vocab, max_len=64)
test_loader = DataLoader(test_data, batch_size=cfg["batch_size"], shuffle=False, collate_fn=collate_fn)

# ----------------------------------------------------
# 5️⃣ Evaluate on test set (reusing validation logic)
# ----------------------------------------------------
def evaluate_test(model, dataloader, idx2tgt, tgt_vocab):
    model.eval()
    total_chars = correct_chars = 0
    total_words = correct_words = 0
    bleu_scores = []
    smooth = SmoothingFunction().method1
    device = next(model.parameters()).device

    with torch.no_grad():
        for src_batch, tgt_batch, src_lens, tgt_lens in tqdm(dataloader, desc="Testing"):
            src_batch, tgt_batch = src_batch.to(device), tgt_batch.to(device)
            outputs = model.model(src_batch, tgt_batch, src_lens, teacher_forcing_ratio=0.0)
            preds = outputs.argmax(-1)

            for pred_seq, true_seq in zip(preds, tgt_batch):
                pred_chars, true_chars = [], []
                for p, t in zip(pred_seq.tolist(), true_seq.tolist()):
                    if t == tgt_vocab["<PAD>"]: break
                    if t == tgt_vocab["<SOS>"]: continue
                    if t == tgt_vocab["<EOS>"]: break
                    pred_chars.append(p)
                    true_chars.append(t)

                min_len = min(len(pred_chars), len(true_chars))
                correct_chars += sum(p == t for p, t in zip(pred_chars[:min_len], true_chars[:min_len]))
                total_chars += len(true_chars)

                pred_str = ''.join(idx2tgt[i] for i in pred_chars if i not in (tgt_vocab["<PAD>"], tgt_vocab["<SOS>"], tgt_vocab["<EOS>"]))
                true_str = ''.join(idx2tgt[i] for i in true_chars)

                if pred_str == true_str:
                    correct_words += 1
                total_words += 1
                if len(true_str) > 0:
                    bleu_scores.append(sentence_bleu([list(true_str)], list(pred_str), smoothing_function=smooth))

    char_acc = correct_chars / total_chars if total_chars else 0
    word_acc = correct_words / total_words if total_words else 0
    bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0

    print(f"\n📊 Test Results → Char Acc: {char_acc:.4f} | Word Acc: {word_acc:.4f} | BLEU: {bleu:.4f}")
    return {"char_acc": char_acc, "word_acc": word_acc, "bleu": bleu}, pred_str, true_str

# ----------------------------------------------------
# 6️⃣ Run evaluation
# ----------------------------------------------------
test_metrics, pred_str, true_str = evaluate_test(model, test_loader, idx2tgt, tgt_vocab)

Testing:   0%|          | 0/32 [00:00<?, ?it/s]


📊 Test Results → Char Acc: 0.8229 | Word Acc: 0.4983 | BLEU: 0.7188
