In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import wandb
from tqdm.notebook import tqdm
import random
import matplotlib.pyplot as plt
import copy

# Set random seeds for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Download data (if needed)
!wget https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
!tar -xf dakshina_dataset_v1.0.tar

# Define paths
train_path = "dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.train.tsv"
val_path = "dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.dev.tsv"
test_path = "dakshina_dataset_v1.0/hi/lexicons/hi.translit.sampled.test.tsv"

# Load data
train_df = pd.read_csv(train_path, delimiter='\t', names=['hi', 'en', '_'])
val_df = pd.read_csv(val_path, delimiter='\t', names=['hi', 'en', '_'])
test_df = pd.read_csv(test_path, delimiter='\t', names=['hi', 'en', '_'])

print(f"Train samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")
print(f"Test samples: {len(test_df)}")

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

--2025-05-20 16:48:18--  https://storage.googleapis.com/gresearch/dakshina/dakshina_dataset_v1.0.tar
Resolving storage.googleapis.com (storage.googleapis.com)... 173.194.174.207, 74.125.203.207, 108.177.97.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|173.194.174.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2008340480 (1.9G) [application/x-tar]
Saving to: ‘dakshina_dataset_v1.0.tar’


2025-05-20 16:49:22 (30.1 MB/s) - ‘dakshina_dataset_v1.0.tar’ saved [2008340480/2008340480]

Train samples: 44204
Validation samples: 4358
Test samples: 4502
Using device: cuda


In [2]:
# Create character mappings with safety checks
def create_vocab(texts, special_tokens=True):
    chars = set()
    for text in texts:
        for char in str(text):
            chars.add(char)
    
    # Create vocabulary dictionary
    if special_tokens:
        vocab = {'<PAD>': 0, '< SOS >': 1, '<EOS>': 2, '<UNK>': 3}
    else:
        vocab = {}
        
    for i, char in enumerate(sorted(list(chars))):
        vocab[char] = i + 4
    
    return vocab

# Convert text to indices with better error handling
def text_to_indices(text, vocab):
    indices = [vocab['< SOS >']]
    for char in str(text):
        if char in vocab:
            indices.append(vocab[char])
        elif char.lower() in vocab:
            indices.append(vocab[char.lower()])
        else:
            indices.append(vocab['<UNK>'])
    indices.append(vocab['<EOS>'])
    return indices

# Create vocabularies
src_vocab = create_vocab(train_df['en'])
tgt_vocab = create_vocab(train_df['hi'])

# Create reverse mappings for visualization
idx2src = {idx: char for char, idx in src_vocab.items()}
idx2tgt = {idx: char for char, idx in tgt_vocab.items()}

print(f"Source vocabulary size: {len(src_vocab)}")
print(f"Target vocabulary size: {len(tgt_vocab)}")

# Check max sequence lengths
src_max_len = max([len(str(text)) for text in train_df['en']])
tgt_max_len = max([len(str(text)) for text in train_df['hi']])
print(f"Max source sequence length: {src_max_len}")
print(f"Max target sequence length: {tgt_max_len}")

Source vocabulary size: 30
Target vocabulary size: 67
Max source sequence length: 20
Max target sequence length: 19


In [3]:
# Custom Dataset
class TransliterationDataset(Dataset):
    def __init__(self, dataframe, src_vocab, tgt_vocab):
        self.dataframe = dataframe
        self.src_vocab = src_vocab
        self.tgt_vocab = tgt_vocab
        
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        src_text = self.dataframe.iloc[idx]['en']
        tgt_text = self.dataframe.iloc[idx]['hi']
        
        src_indices = text_to_indices(src_text, self.src_vocab)
        tgt_indices = text_to_indices(tgt_text, self.tgt_vocab)
        
        return torch.tensor(src_indices), torch.tensor(tgt_indices)

# Collate function with safety checks
def collate_fn(batch):
    src_batch, tgt_batch = [], []
    for src, tgt in batch:
        # Safety check for index bounds
        src = torch.clamp(src, 0, len(src_vocab)-1)
        tgt = torch.clamp(tgt, 0, len(tgt_vocab)-1)
        
        src_batch.append(src)
        tgt_batch.append(tgt)
    
    src_batch = pad_sequence(src_batch, batch_first=True, padding_value=src_vocab['<PAD>'])
    tgt_batch = pad_sequence(tgt_batch, batch_first=True, padding_value=tgt_vocab['<PAD>'])
    
    return src_batch, tgt_batch

In [4]:
class Encoder(nn.Module):
    def __init__(self, input_vocab_size, embedding_dim, hidden_dim, num_layers, dropout, cell_type):
        super().__init__()
        self.embedding = nn.Embedding(input_vocab_size, embedding_dim)
        self.cell_type = cell_type.lower()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        if self.cell_type == "lstm":
            self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, 
                              dropout=dropout if num_layers > 1 else 0, batch_first=True)
        elif self.cell_type == "gru":
            self.rnn = nn.GRU(embedding_dim, hidden_dim, num_layers=num_layers, 
                             dropout=dropout if num_layers > 1 else 0, batch_first=True)
        else:  # rnn
            self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=num_layers, 
                             dropout=dropout if num_layers > 1 else 0, batch_first=True)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src):
        # src: [batch_size, src_len]
        embedded = self.dropout(self.embedding(src))  # [batch_size, src_len, emb_dim]
        
        if self.cell_type == "lstm":
            outputs, (hidden, cell) = self.rnn(embedded)
            return outputs, hidden, cell
        else:
            outputs, hidden = self.rnn(embedded)
            return outputs, hidden, None

In [5]:
class Decoder(nn.Module):
    def __init__(self, output_vocab_size, embedding_dim, hidden_dim, num_layers, dropout, cell_type):
        super().__init__()
        self.output_vocab_size = output_vocab_size
        self.embedding = nn.Embedding(output_vocab_size, embedding_dim)
        self.cell_type = cell_type.lower()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        if self.cell_type == "lstm":
            self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, 
                              dropout=dropout if num_layers > 1 else 0, batch_first=True)
        elif self.cell_type == "gru":
            self.rnn = nn.GRU(embedding_dim, hidden_dim, num_layers=num_layers, 
                             dropout=dropout if num_layers > 1 else 0, batch_first=True)
        else:  # rnn
            self.rnn = nn.RNN(embedding_dim, hidden_dim, num_layers=num_layers, 
                             dropout=dropout if num_layers > 1 else 0, batch_first=True)
        
        self.fc_out = nn.Linear(hidden_dim, output_vocab_size)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden, cell=None):
        # input: [batch_size]
        input = input.unsqueeze(1)  # [batch_size, 1]
        embedded = self.dropout(self.embedding(input))  # [batch_size, 1, emb_dim]
        
        if self.cell_type == "lstm":
            output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        else:
            output, hidden = self.rnn(embedded, hidden)
            cell = None
            
        prediction = self.fc_out(output.squeeze(1))  # [batch_size, output_vocab_size]
        
        return prediction, hidden, cell

In [6]:
class Seq2Seq(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.encoder = Encoder(
            config['input_vocab_size'],
            config['embedding_dim'],
            config['hidden_dim'],
            config['num_encoding_layers'],
            config['dropout'],
            config['cell_type']
        )
        self.decoder = Decoder(
            config['output_vocab_size'],
            config['embedding_dim'],
            config['hidden_dim'],
            config['num_decoding_layers'],
            config['dropout'],
            config['cell_type']
        )
        self.device = config.get('device', device)
        self.teacher_forcing_ratio = config.get('teacher_forcing_ratio', 0.5)
        self.cell_type = config['cell_type'].lower()
        self.config = config
        
        # Apply weight initialization
        self.apply(self._init_weights)
        
    def _init_weights(self, m):
        if isinstance(m, nn.GRU) or isinstance(m, nn.LSTM) or isinstance(m, nn.RNN):
            for name, param in m.named_parameters():
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(param.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(param.data)  # Orthogonal initialization for recurrent weights
                elif 'bias' in name:
                    param.data.fill_(0)
        elif isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight.data)
            if m.bias is not None:
                m.bias.data.fill_(0)
        elif isinstance(m, nn.Embedding):
            nn.init.uniform_(m.weight.data, -0.1, 0.1)
        
    def forward(self, src, trg):
        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        trg_vocab_size = self.decoder.output_vocab_size
        
        # Tensor to store decoder outputs
        outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(self.device)
        
        # Encode source sequence
        encoder_outputs, hidden, cell = self.encoder(src)
        
        # Adjust hidden state dimensions if needed
        enc_layers = self.config['num_encoding_layers']
        dec_layers = self.config['num_decoding_layers']
        hidden_size = self.config['hidden_dim']
        
        if enc_layers != dec_layers:
            if self.cell_type != 'lstm':
                # Case 1: Encoder has more layers - take only what we need
                if enc_layers > dec_layers:
                    hidden = hidden[:dec_layers]
                # Case 2: Decoder has more layers - pad with zeros
                else:
                    padding = torch.zeros(dec_layers - enc_layers, batch_size, hidden_size).to(self.device)
                    hidden = torch.cat([hidden, padding], dim=0)
            else:  # LSTM case
                if enc_layers > dec_layers:
                    hidden = hidden[:dec_layers]
                    cell = cell[:dec_layers]
                else:
                    padding = torch.zeros(dec_layers - enc_layers, batch_size, hidden_size).to(self.device)
                    hidden = torch.cat([hidden, padding], dim=0)
                    cell = torch.cat([cell, padding], dim=0)
        
        # First input to decoder is < SOS > token
        input = trg[:, 0]
        
        for t in range(1, trg_len):
            # Get decoder output
            output, hidden, cell = self.decoder(input, hidden, cell)
            
            # Store prediction
            outputs[:, t, :] = output
            
            # Teacher forcing
            teacher_force = random.random() < self.teacher_forcing_ratio
            
            # Get highest predicted token
            top1 = output.argmax(1)
            
            # Next input is either ground truth or predicted token
            input = trg[:, t] if teacher_force else top1
            
        return outputs

In [7]:
def train_epoch(model, dataloader, optimizer, criterion, clip=1.0):
    model.train()
    epoch_loss = 0
    
    for src, trg in tqdm(dataloader, desc="Training", leave=False):
        src, trg = src.to(model.device), trg.to(model.device)
        
        optimizer.zero_grad()
        
        output = model(src, trg)
        
        # Exclude < SOS > token
        output_dim = output.shape[-1]
        output = output[:, 1:].reshape(-1, output_dim)
        trg = trg[:, 1:].reshape(-1)
        
        loss = criterion(output, trg)
        loss.backward()
        
        # Clip gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        epoch_loss += loss.item()
        
    return epoch_loss / len(dataloader)

In [8]:
def evaluate(model, dataloader, criterion):
    model.eval()
    epoch_loss = 0
    
    # For exact match accuracy
    exact_match_correct = 0
    exact_match_total = 0
    
    # For character-level accuracy
    char_correct = 0
    char_total = 0
    
    with torch.no_grad():
        for src, trg in tqdm(dataloader, desc="Evaluating", leave=False):
            src, trg = src.to(model.device), trg.to(model.device)
            
            output = model(src, trg)
            
            # For loss calculation
            output_dim = output.shape[-1]
            output_flat = output[:, 1:].reshape(-1, output_dim)
            trg_flat = trg[:, 1:].reshape(-1)
            
            loss = criterion(output_flat, trg_flat)
            epoch_loss += loss.item()
            
            # Get predictions
            predictions = output.argmax(dim=2)
            
            # Calculate exact match accuracy (sequence level)
            for i in range(len(predictions)):
                pred_seq = predictions[i, 1:].cpu().numpy()  # Skip < SOS >
                target_seq = trg[i, 1:].cpu().numpy()  # Skip < SOS >
                
                # Get valid sequence (remove padding)
                valid_length = (target_seq != tgt_vocab['<PAD>']).sum()
                pred_clean = pred_seq[:valid_length]
                target_clean = target_seq[:valid_length]
                
                # Check exact match
                if np.array_equal(pred_clean, target_clean):
                    exact_match_correct += 1
                exact_match_total += 1
                
                # Calculate character-level accuracy
                for j in range(valid_length):
                    if pred_seq[j] == target_seq[j]:
                        char_correct += 1
                    char_total += 1
    
    # Calculate metrics
    exact_match_accuracy = exact_match_correct / exact_match_total if exact_match_total > 0 else 0
    char_accuracy = char_correct / char_total if char_total > 0 else 0
    
    return {
        'loss': epoch_loss / len(dataloader),
        'exact_match_accuracy': exact_match_accuracy,
        'char_accuracy': char_accuracy
    }

In [9]:
def translate_sentence(model, sentence, src_vocab, tgt_vocab, idx2tgt, max_len=50):
    model.eval()
    
    # Convert to indices and add < SOS > and <EOS>
    indices = text_to_indices(sentence, src_vocab)
    src_tensor = torch.LongTensor(indices).unsqueeze(0).to(model.device)
    
    # Get encoder outputs
    with torch.no_grad():
        encoder_outputs, hidden, cell = model.encoder(src_tensor)
    
    # Adjust hidden state dimensions if needed
    enc_layers = model.config['num_encoding_layers']
    dec_layers = model.config['num_decoding_layers']
    hidden_size = model.config['hidden_dim']
    
    if enc_layers != dec_layers:
        batch_size = 1  # Since we're translating one sentence
        if model.cell_type != 'lstm':
            if enc_layers > dec_layers:
                hidden = hidden[:dec_layers]
            else:
                padding = torch.zeros(dec_layers - enc_layers, batch_size, hidden_size).to(model.device)
                hidden = torch.cat([hidden, padding], dim=0)
        else:  # LSTM case
            if enc_layers > dec_layers:
                hidden = hidden[:dec_layers]
                cell = cell[:dec_layers]
            else:
                padding = torch.zeros(dec_layers - enc_layers, batch_size, hidden_size).to(model.device)
                hidden = torch.cat([hidden, padding], dim=0)
                cell = torch.cat([cell, padding], dim=0)
    
    # Start with < SOS > token
    trg_idx = [tgt_vocab['< SOS >']]
    
    for _ in range(max_len):
        trg_tensor = torch.LongTensor([trg_idx[-1]]).to(model.device)
        
        with torch.no_grad():
            output, hidden, cell = model.decoder(trg_tensor, hidden, cell)
        
        pred_token = output.argmax(1).item()
        
        # Stop if <EOS> token
        if pred_token == tgt_vocab['<EOS>']:
            break
        
        trg_idx.append(pred_token)
    
    # Convert indices to characters
    trg_tokens = [idx2tgt[i] for i in trg_idx if i not in [tgt_vocab['< SOS >'], tgt_vocab['<EOS>'], tgt_vocab['<PAD>']]]
    
    return ''.join(trg_tokens)

In [10]:
!pip install wandb -q

import wandb
wandb.login(key='130161b8988911058327a18dbbdfb663c58411b2')

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mda24m005[0m ([33mda24m005-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [11]:
# Early stopping setup
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0.0):
        """
        Args:
            patience: Number of epochs to wait if no improvement and then stop training
            min_delta: Minimum change in monitored value to qualify as improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        
    def __call__(self, val_loss):
        score = -val_loss  # Higher score is better (i.e., lower validation loss)
        
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score + self.min_delta:  # No improvement
            self.counter += 1
            print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:  # Improvement
            self.best_score = score
            self.counter = 0
            
        return self.early_stop

In [12]:
def train_best_model(config, epochs=40):
    # Initialize wandb run for final training
    run = wandb.init(project="seq2seq-transliteration-final", name="best_model_final_training")
    
    # Log the configuration
    wandb.config.update(config)
    
    # Create datasets
    train_dataset = TransliterationDataset(train_df, src_vocab, tgt_vocab)
    val_dataset = TransliterationDataset(val_df, src_vocab, tgt_vocab)
    test_dataset = TransliterationDataset(test_df, src_vocab, tgt_vocab)
    
    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=config['batch_size'],
        shuffle=True,
        collate_fn=collate_fn
    )
    
    val_loader = DataLoader(
        val_dataset,
        batch_size=config['batch_size'],
        shuffle=False,
        collate_fn=collate_fn
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=config['batch_size'],
        shuffle=False,
        collate_fn=collate_fn
    )
    
    # Initialize model with weight initialization
    model = Seq2Seq(config).to(device)
    
    # Apply weight initialization
    def init_weights(m):
        if isinstance(m, nn.GRU) or isinstance(m, nn.LSTM) or isinstance(m, nn.RNN):
            for name, param in m.named_parameters():
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(param.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(param.data)
                elif 'bias' in name:
                    param.data.fill_(0)
        elif isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight.data)
            if m.bias is not None:
                m.bias.data.fill_(0)
        elif isinstance(m, nn.Embedding):
            nn.init.uniform_(m.weight.data, -0.1, 0.1)
    
    model.apply(init_weights)
    
    # Optimizer and criterion
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    criterion = nn.CrossEntropyLoss(ignore_index=tgt_vocab['<PAD>'])
    
    # Initialize early stopping
    early_stopping = EarlyStopping(patience=10)
    
    # Training loop
    best_exact_match = 0
    best_model_state = None
    
    for epoch in range(epochs):
        # Train
        train_loss = train_epoch(model, train_loader, optimizer, criterion)
        
        # Evaluate on validation set
        eval_metrics = evaluate(model, val_loader, criterion)
        
        # Log metrics
        wandb.log({
            'epoch': epoch,
            'train_loss': train_loss,
            'val_loss': eval_metrics['loss'],
            'val_exact_match': eval_metrics['exact_match_accuracy'],
            'val_char_accuracy': eval_metrics['char_accuracy']
        })
        
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {eval_metrics['loss']:.4f}")
        print(f"Exact Match Accuracy: {eval_metrics['exact_match_accuracy']:.4f}, Char Accuracy: {eval_metrics['char_accuracy']:.4f}")
        
        # Save best model
        if eval_metrics['exact_match_accuracy'] > best_exact_match:
            best_exact_match = eval_metrics['exact_match_accuracy']
            best_model_state = model.state_dict().copy()
            torch.save(model.state_dict(), 'best_model_final.pt')
            wandb.save('best_model_final.pt')
        
        # Check early stopping condition
        if early_stopping(eval_metrics['loss']):
            print("Early stopping triggered!")
            break
    
    # Load best model for testing
    model.load_state_dict(best_model_state)
    
    # Evaluate on test set
    test_metrics = evaluate(model, test_loader, criterion)
    
    # Log final test metrics
    wandb.run.summary['test_loss'] = test_metrics['loss']
    wandb.run.summary['test_exact_match'] = test_metrics['exact_match_accuracy']
    wandb.run.summary['test_char_accuracy'] = test_metrics['char_accuracy']
    
    print(f"\nFinal Test Results:")
    print(f"Test Loss: {test_metrics['loss']:.4f}")
    print(f"Test Exact Match Accuracy: {test_metrics['exact_match_accuracy']:.4f}")
    print(f"Test Character Accuracy: {test_metrics['char_accuracy']:.4f}")
    
    # Generate predictions for test examples
    predictions = []
    for i in range(len(test_df)):
        src_text = test_df.iloc[i]['en']
        tgt_text = test_df.iloc[i]['hi']
        pred_text = translate_sentence(model, src_text, src_vocab, tgt_vocab, idx2tgt)
        
        predictions.append({
            'Source': src_text,
            'Target': tgt_text,
            'Prediction': pred_text,
            'Correct': pred_text == tgt_text
        })
    
    # Create a table for predictions
    prediction_table = wandb.Table(dataframe=pd.DataFrame(predictions))
    wandb.log({"test_predictions": prediction_table})
    
    # Save predictions to file for assignment submission
    pd.DataFrame(predictions).to_csv('predictions_vanilla.csv', index=False)
    wandb.save('predictions_vanilla.csv')
    
    # Create error analysis
    error_examples = [p for p in predictions if not p['Correct']]
    error_table = wandb.Table(dataframe=pd.DataFrame(error_examples[:20]))
    wandb.log({"error_analysis": error_table})
    
    # Close wandb run
    wandb.finish()
    
    return model, pd.DataFrame(predictions)

In [13]:
def get_best_config(project_name, entity="da24m005-iit-madras"):
    """
    Retrieves the best configuration from a wandb sweep based on validation accuracy.
    
    Args:
        project_name (str): The name of your wandb project
        entity (str, optional): Your wandb username/entity
    
    Returns:
        dict: The configuration of the best performing run
    """
    api = wandb.Api()
    path = f"{entity}/{project_name}" if entity else project_name
    runs = api.runs(path)
    
    best_run = None
    best_metric = float('-inf')
    
    for run in runs:
        if run.state == "finished":
            # Check different possible metric names
            val_metric = (run.summary.get('val_exact_match') or 
                         run.summary.get('val_accuracy') or 
                         run.summary.get('best_val_accuracy'))
            
            if val_metric is not None and val_metric > best_metric:
                best_metric = val_metric
                best_run = run
    
    if best_run:
        print(f"Best run ID: {best_run.id}")
        print(f"Best run name: {best_run.name}")
        print(f"Best validation metric: {best_metric:.4f}")
        
        # Create a clean config dictionary
        best_config = {
            'input_vocab_size': len(src_vocab),
            'output_vocab_size': len(tgt_vocab),
            'embedding_dim': best_run.config['embedding_dim'],
            'hidden_dim': best_run.config['hidden_dim'],
            'num_encoding_layers': best_run.config['num_encoding_layers'],
            'num_decoding_layers': best_run.config['num_decoding_layers'],
            'dropout': best_run.config['dropout'],
            'cell_type': best_run.config['cell_type'],
            'teacher_forcing_ratio': best_run.config['teacher_forcing_ratio'],
            'learning_rate': best_run.config['learning_rate'],
            'batch_size': best_run.config['batch_size'],
            'device': device
        }
        
        print(f"Best config: {best_config}")
        return best_config
    else:
        print("No finished runs found.")
        return None

best_config = get_best_config('seq2seq-transliteration')

Best run ID: zanpt1wh
Best run name: trim-sweep-8
Best validation metric: 0.4190
Best config: {'input_vocab_size': 30, 'output_vocab_size': 67, 'embedding_dim': 512, 'hidden_dim': 256, 'num_encoding_layers': 2, 'num_decoding_layers': 3, 'dropout': 0.3, 'cell_type': 'lstm', 'teacher_forcing_ratio': 0.9, 'learning_rate': 0.0006954403380443931, 'batch_size': 128, 'device': device(type='cuda')}


In [14]:
# Train the best model for 40 epochs
best_model, predictions = train_best_model(best_config, epochs=40)

Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 1/40, Train Loss: 3.0708, Val Loss: 2.7767
Exact Match Accuracy: 0.0000, Char Accuracy: 0.2610


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 2/40, Train Loss: 2.4477, Val Loss: 2.1535
Exact Match Accuracy: 0.0009, Char Accuracy: 0.3920


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 3/40, Train Loss: 1.8246, Val Loss: 1.4178
Exact Match Accuracy: 0.0425, Char Accuracy: 0.5928


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 4/40, Train Loss: 1.1829, Val Loss: 0.9346
Exact Match Accuracy: 0.1613, Char Accuracy: 0.7262


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 5/40, Train Loss: 0.8396, Val Loss: 0.7208
Exact Match Accuracy: 0.2565, Char Accuracy: 0.7891


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 6/40, Train Loss: 0.6873, Val Loss: 0.6312
Exact Match Accuracy: 0.2969, Char Accuracy: 0.8119


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 7/40, Train Loss: 0.5810, Val Loss: 0.5969
Exact Match Accuracy: 0.3346, Char Accuracy: 0.8238


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 8/40, Train Loss: 0.5225, Val Loss: 0.5278
Exact Match Accuracy: 0.3522, Char Accuracy: 0.8396


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 9/40, Train Loss: 0.4675, Val Loss: 0.5016
Exact Match Accuracy: 0.3690, Char Accuracy: 0.8495


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 10/40, Train Loss: 0.4357, Val Loss: 0.5094
Exact Match Accuracy: 0.3825, Char Accuracy: 0.8470
EarlyStopping counter: 1 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 11/40, Train Loss: 0.3944, Val Loss: 0.5023
Exact Match Accuracy: 0.3873, Char Accuracy: 0.8511
EarlyStopping counter: 2 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 12/40, Train Loss: 0.3697, Val Loss: 0.5028
Exact Match Accuracy: 0.3885, Char Accuracy: 0.8509
EarlyStopping counter: 3 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 13/40, Train Loss: 0.3374, Val Loss: 0.4763
Exact Match Accuracy: 0.3915, Char Accuracy: 0.8572


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 14/40, Train Loss: 0.3225, Val Loss: 0.4862
Exact Match Accuracy: 0.4009, Char Accuracy: 0.8572
EarlyStopping counter: 1 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 15/40, Train Loss: 0.2959, Val Loss: 0.4953
Exact Match Accuracy: 0.4006, Char Accuracy: 0.8568
EarlyStopping counter: 2 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 16/40, Train Loss: 0.2808, Val Loss: 0.5078
Exact Match Accuracy: 0.3988, Char Accuracy: 0.8572
EarlyStopping counter: 3 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 17/40, Train Loss: 0.2669, Val Loss: 0.4511
Exact Match Accuracy: 0.4013, Char Accuracy: 0.8657


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 18/40, Train Loss: 0.2480, Val Loss: 0.4643
Exact Match Accuracy: 0.4130, Char Accuracy: 0.8669
EarlyStopping counter: 1 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 19/40, Train Loss: 0.2336, Val Loss: 0.5135
Exact Match Accuracy: 0.4167, Char Accuracy: 0.8601
EarlyStopping counter: 2 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 20/40, Train Loss: 0.2242, Val Loss: 0.4946
Exact Match Accuracy: 0.4080, Char Accuracy: 0.8621
EarlyStopping counter: 3 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 21/40, Train Loss: 0.2119, Val Loss: 0.5359
Exact Match Accuracy: 0.4114, Char Accuracy: 0.8587
EarlyStopping counter: 4 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 22/40, Train Loss: 0.2018, Val Loss: 0.5139
Exact Match Accuracy: 0.4066, Char Accuracy: 0.8619
EarlyStopping counter: 5 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 23/40, Train Loss: 0.1937, Val Loss: 0.5237
Exact Match Accuracy: 0.4101, Char Accuracy: 0.8595
EarlyStopping counter: 6 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 24/40, Train Loss: 0.1863, Val Loss: 0.4839
Exact Match Accuracy: 0.4087, Char Accuracy: 0.8669
EarlyStopping counter: 7 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 25/40, Train Loss: 0.1781, Val Loss: 0.5435
Exact Match Accuracy: 0.4121, Char Accuracy: 0.8577
EarlyStopping counter: 8 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 26/40, Train Loss: 0.1719, Val Loss: 0.4950
Exact Match Accuracy: 0.4133, Char Accuracy: 0.8662
EarlyStopping counter: 9 out of 10


Training:   0%|          | 0/346 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/35 [00:00<?, ?it/s]

Epoch 27/40, Train Loss: 0.1620, Val Loss: 0.5338
Exact Match Accuracy: 0.4045, Char Accuracy: 0.8617
EarlyStopping counter: 10 out of 10
Early stopping triggered!


Evaluating:   0%|          | 0/36 [00:00<?, ?it/s]


Final Test Results:
Test Loss: 0.5370
Test Exact Match Accuracy: 0.4096
Test Character Accuracy: 0.8609


0,1
epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
train_loss,█▇▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_char_accuracy,▁▃▅▆▇▇█████████████████████
val_exact_match,▁▁▂▄▅▆▇▇▇▇█████████████████
val_loss,█▆▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,26.0
test_char_accuracy,0.86092
test_exact_match,0.4096
test_loss,0.53696
train_loss,0.16198
val_char_accuracy,0.86169
val_exact_match,0.40454
val_loss,0.53378


In [None]:
predictions.to_csv('predictions_vanilla.csv')

In [17]:
print("hello")

hello


In [18]:
predictions.to_csv('preds.csv')