## Base

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl

from collections import Counter
import numpy as np
from pathlib import Path
from collections import Counter
from typing import Dict, List, Optional, Tuple, Union
import time
import os

In [2]:
from src.data_prep import load_dakshina_data 
from src.seq2seq_model import Seq2SeqTransliteration
from src.data_prep import create_data_loaders

In [None]:
# Code to load the Dakshina dataset (replace with your own data loading code)

train_lines, val_lines, test_lines = load_dakshina_data()

# Create data loaders
train_loader, val_loader, test_loader, src_vocab, tgt_vocab = create_data_loaders(
    train_lines,
    batch_size=64,
    min_freq=1,
    val_lines=val_lines,
    test_lines=test_lines
)

# Initialize model
model = Seq2SeqTransliteration(
    src_vocab_size=len(src_vocab),
    tgt_vocab_size=len(tgt_vocab),
    embedding_dim=32,
    hidden_size=256,
    encoder_layers=2,
    decoder_layers=2,
    dropout=0.2,
    cell_type="gru",
    learning_rate=0.001,
)

print(f'Using device: {"gpu" if torch.cuda.is_available() else "cpu"}')
# Create trainer
trainer = pl.Trainer(
    max_epochs=20,
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    callbacks=[
        pl.callbacks.EarlyStopping(monitor='val_loss', patience=3),
        pl.callbacks.ModelCheckpoint(monitor='val_char_acc', mode='max')
    ]
)

# Train model
trainer.fit(model, train_loader, val_loader)

trainer.save_checkpoint(f'./data/final_model_{time.strftime(r"%m_%d__%H_%M_%S")}')

# Total number of parameters
total_params = model.compute_parameters()
print(f"Total number of parameters: {total_params}")

Loaded 44204 training examples
Loaded 4358 validation examples
Loaded 4502 test examples


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | encoder   | Encoder            | 619 K  | train
1 | decoder   | Decoder            | 626 K  | train
2 | train_acc | MulticlassAccuracy | 0      | train
3 | val_acc   | MulticlassAccuracy | 0      | train
4 | test_acc  | MulticlassAccuracy | 0      | train
---------------------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.983     Total estimated model params size (MB)
12        Modules in train mode
0         Modules in eval mode


Using device: gpu


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\DELL\.conda\envs\DL\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
c:\Users\DELL\.conda\envs\DL\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Total number of parameters: 454654


In [10]:
# Test model
trainer.test(model, test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\DELL\.conda\envs\DL\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.4956575334072113,
  'test_acc': 0.8310022354125977,
  'test_char_acc': 0.8295981287956238}]

In [11]:
from src.sweeper import save_predictions

my_pred_arr = save_predictions(model, test_loader, src_vocab, tgt_vocab, output_path=f'./data/predictions_{time.strftime(r"%m_%d__%H_%M_%S")}.tsv')

Saved predictions to ./data/predictions_05_17__16_46_03.tsv


## Sweep

In [None]:
from src.data_prep import load_dakshina_data
from src.sweeper import run_wandb_sweep

In [None]:
# Load Hindi data from Dakshina dataset
train_lines, val_lines, test_lines = load_dakshina_data()

# Or run a hyperparameter sweep
run_wandb_sweep(train_lines, val_lines, test_lines, num_runs=1, cont_id='bullseye2608-indian-institute-of-technology-madras/hindi-transliteration/6bo8mal9')

NameError: name 'load_dakshina_data' is not defined

In [8]:
import time

## Attention

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchmetrics
import pytorch_lightning as pl
from typing import Dict, List, Optional, Tuple, Union

In [None]:
from src.data_prep import create_data_loaders, load_dakshina_data
from src.seq2seq_attention_model import Seq2SeqAttentionTransliteration

In [None]:


train_lines, val_lines, test_lines = load_dakshina_data(base_path_data='./dataset/dakshina_dataset_v1.0/')

# Create data loaders
train_loader, val_loader, test_loader, src_vocab, tgt_vocab = create_data_loaders(
    train_lines,
    batch_size=64,
    min_freq=1,
    val_lines=val_lines,
    test_lines=test_lines
)

# Initialize model
model = Seq2SeqAttentionTransliteration(
    src_vocab_size=len(src_vocab),
    tgt_vocab_size=len(tgt_vocab),
    embedding_dim=32,
    attention_method='general',
    hidden_size=256,
    encoder_layers=2,
    decoder_layers=2,
    dropout=0.2,
    cell_type="gru",
    learning_rate=0.001,
)


Loaded 44204 training examples
Loaded 4358 validation examples
Loaded 4502 test examples


In [None]:

print(f'Using device: {"cuda" if torch.cuda.is_available() else "cpu"}') # Corrected "gpu" to "cuda" for torch
# Create trainer
trainer = pl.Trainer(
    max_epochs=15, # For testing, increase for actual training
    accelerator='cuda' if torch.cuda.is_available() else 'cpu',
    callbacks=[
        pl.callbacks.EarlyStopping(monitor='val_loss', patience=3, verbose=True),
        pl.callbacks.ModelCheckpoint(monitor='val_char_acc', mode='max', filename='best_model-{epoch:02d}-{val_char_acc:.2f}')
    ],
    # precision="16-mixed" # Optional: for mixed precision training
)

# Train model
trainer.fit(model, train_loader, val_loader)

trainer.save_checkpoint(f'./data/final_model_{time.strftime(r"%m_%d__%H_%M_%S")}')


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | encoder   | Encoder            | 1.8 M  | train
1 | decoder   | AttentionDecoder   | 1.2 M  | train
2 | train_acc | MulticlassAccuracy | 0      | train
3 | val_acc   | MulticlassAccuracy | 0      | train
4 | test_acc  | MulticlassAccuracy | 0      | train
---------------------------------------------------------
2.9 M     Trainable params
0         Non-trainable params
2.9 M     Total params
11.714    Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Using device: cuda


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 1.020


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.172 >= min_delta = 0.0. New best score: 0.849


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.156 >= min_delta = 0.0. New best score: 0.693


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.681


In [None]:

# Test model (using the best checkpoint automatically by default if checkpoint_callback=True)
trainer.test(model, test_loader)

In [None]:
from src.sweeper import save_predictions

my_pred_arr = save_predictions(model, test_loader, src_vocab, tgt_vocab, output_path=f'./data/predictions_{time.strftime(r"%m_%d__%H_%M_%S")}.tsv')