In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import numpy as np
import pandas as pd
import time

import wandb

# Initialize wandb
wandb.login(key='208eb9fbdf5d2187fde3a83cdf51d2c458066577')

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define constants for special tokens
SOS_token = 0
EOS_token = 1

# Language class to handle vocabulary
class Language:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {SOS_token: "<SOS>", EOS_token: "<EOS>"}
        self.n_words = 2  # Count SOS and EOS

    def addWord(self, word):
        for char in word:
            self.addChar(char)

    def addChar(self, char):
        if char not in self.word2index:
            self.word2index[char] = self.n_words
            self.word2count[char] = 1
            self.index2word[self.n_words] = char
            self.n_words += 1
        else:
            self.word2count[char] += 1

# Function to load and preprocess data
def load_data(language, data_type):
    path = f"/kaggle/input/akshantar-data/aksharantar_sampled/{language}/{language}_{data_type}.csv"
    df = pd.read_csv(path, header=None)
    pairs = df.values.tolist()
    return pairs

# Function to prepare language objects and data
def prepare_data(language):
    input_lang = Language('eng')
    output_lang = Language(language)
    pairs = load_data(language, "train")
    for pair in pairs:
        input_lang.addWord(pair[0])
        output_lang.addWord(pair[1])
    return input_lang, output_lang, pairs

# Function to get cell type for RNN
def get_cell(cell_type):
    if cell_type == "LSTM":
        return nn.LSTM
    elif cell_type == "GRU":
        return nn.GRU
    elif cell_type == "RNN":
        return nn.RNN
    else:
        raise ValueError("Invalid cell type")

# Function to get optimizer
def get_optimizer(optimizer):
    if optimizer == "Adam":
        return optim.Adam
    else:
        raise ValueError("Invalid optimizer")

# Encoder class
class Encoder(nn.Module):
    def __init__(self, input_size, embed_size, hidden_size, cell_type, num_layers, dropout):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, embed_size)
        self.rnn = get_cell(cell_type)(input_size=embed_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output, hidden = self.rnn(embedded, hidden)
        return output, hidden

    def initHidden(self):
        # Initialize hidden state with zeros
        num_layers = self.rnn.num_layers
        return torch.zeros(num_layers, 1, self.hidden_size, device=device)

# Decoder class
class Decoder(nn.Module):
    def __init__(self, output_size, embed_size, hidden_size, cell_type, num_layers, dropout):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, embed_size)
        self.rnn = get_cell(cell_type)(input_size=embed_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.rnn(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(self.rnn.num_layers, 1, self.hidden_size, device=device)

# Function to convert word to tensor of indices
def tensorFromWord(lang, word):
    indexes = [lang.word2index[char] for char in word]
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

# Function to prepare tensors from pairs
def tensorsFromPair(input_lang, output_lang, pair):
    input_tensor = tensorFromWord(input_lang, pair[0])
    target_tensor = tensorFromWord(output_lang, pair[1])
    return input_tensor, target_tensor

# Function to train the model
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=50):
    encoder_hidden = encoder.initHidden()
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)

    decoder_input = torch.tensor([[SOS_token]], device=device)
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[di])
        if decoder_input.item() == EOS_token:
            break

    loss.backward()
    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

# Function to evaluate the model
def evaluate(encoder, decoder, word, input_lang, output_lang, max_length=50):
    with torch.no_grad():
        input_tensor = tensorFromWord(input_lang, word)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)

        decoder_input = torch.tensor([[SOS_token]], device=device)
        decoder_hidden = encoder_hidden

        decoded_chars = ""

        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)

            if topi.item() == EOS_token:
                break
            else:
                decoded_chars += output_lang.index2word[topi.item()]

            decoder_input = topi.squeeze().detach()

        return decoded_chars

# Function to test the model
def test(encoder, decoder, language, data_type):
    pairs = load_data(language, data_type)
    accuracy = np.sum([evaluate(encoder, decoder, pair[0], input_lang, output_lang) == pair[1] for pair in pairs])
    return accuracy / len(pairs)

# Main training function
def train_model(input_lang, output_lang, pairs, config):
    # Initialize encoder and decoder
    encoder = Encoder(input_lang.n_words, config.embed_size, config.hidden_size, config.cell_type, config.num_layers, config.dropout).to(device)
    decoder = Decoder(output_lang.n_words, config.embed_size, config.hidden_size, config.cell_type, config.num_layers, config.dropout).to(device)

    # Define optimizer and criterion
    encoder_optimizer = get_optimizer(config.optimizer)(encoder.parameters(), lr=config.learning_rate)
    decoder_optimizer = get_optimizer(config.optimizer)(decoder.parameters(), lr=config.learning_rate)
    criterion = nn.NLLLoss()

    # Training loop
    for epoch in range(config.epochs):
        total_loss = 0
        for pair in pairs:
            input_tensor, target_tensor = tensorsFromPair(input_lang, output_lang, pair)
            loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
            total_loss += loss

        # Log training loss
        wandb.log({"epoch": epoch+1})
        wandb.log({"Training_Loss": total_loss / len(pairs)})

    # Test the model
    validation_accuracy = test(encoder, decoder, output_lang.name, "valid")
    wandb.log({"validation_accuracy": validation_accuracy})

# Define configuration for hyperparameter sweep
sweep_config = {
    "method": "bayes",
    "name": "without attention sweep",
    "metric": {"name": "validation_accuracy", "goal": "maximize"},
    "parameters": {
        "embed_size": {"values": [16, 32, 64]},
        "hidden_size": {"values": [128, 256, 512]},
        "cell_type": {"values": ["LSTM", "GRU"]},
        "num_layers": {"values": [1, 2, 3]},
        "dropout": {"values": [0, 0.1, 0.2]},
        "learning_rate": {"value": 0.001},
        "optimizer": {"value": "Adam"},
        "epochs": {"value": 5}
    },
}

# Initialize wandb sweep
wandb.sweep(sweep_config)

# Main function to train models using sweep
def train_sweep():
    # Initialize wandb run
    wandb.init()

    # Load data and prepare languages
    input_lang, output_lang, pairs = prepare_data('hin')

    # Get config from wandb sweep
    config = wandb.config

    # Train model
    train_model(input_lang, output_lang, pairs, config)

# Run sweep
wandb_id = wandb.sweep(sweep_config, project="CS6910_Assignment_3")
wandb.agent(wandb_id, train_sweep, count=50)
