# Assignment 3 

### Importing Libraries

In [42]:
import matplotlib
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import csv
import time
import math
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Helper Functions

In [43]:
import time
import math


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))
    
def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

### Preparing Data

In [44]:
# Reading the data
train_file = open('tel_train.csv', encoding='utf-8')
test_file = open('tel_test.csv', encoding='utf-8')
val_file = open('tel_valid.csv', encoding='utf-8')

# Get pairs of (English, Telugu) chars
def read_data(file):
    lines = file.readlines()
    pairs = [[s for s in l.split(',')] for l in lines]
    # remove \n from telugu
    for pair in pairs:
        pair[1] = pair[1].rstrip()
    return pairs

train_pairs = read_data(train_file)
test_pairs = read_data(test_file)
val_pairs = read_data(val_file)

# Converting
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self):
        self.char2index = {}
        self.char2count = {}
        self.index2char = {0: "SOS", 1: "EOS"}
        self.n_chars = 2  # Count SOS and EOS

    def addword(self, word):
        for char in word:
            self.addchar(char)

    def addchar(self, char):
        if char not in self.char2index:
            self.char2index[char] = self.n_chars
            self.char2count[char] = 1
            self.index2char[self.n_chars] = char
            self.n_chars += 1
        else:
            self.char2count[char] += 1

def prep_data(pairs):
    input_lang = Lang()
    output_lang = Lang()
    for pair in pairs:
        input_lang.addword(pair[0])
        output_lang.addword(pair[1])
    print("Counted Chars:")
    print(input_lang.n_chars)
    print(output_lang.n_chars)
    return input_lang, output_lang

input_lang, output_lang = prep_data(train_pairs)
print(random.choice(train_pairs))

MAX_LENGTH = 20


Counted Chars:
28
64
['sarayevo', 'సరయేవో']


### The Encoder and Decoder

In [40]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1, drop = 0.1, bidir = True, cell_type='lstm', embed_size = 300):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.cell_type = cell_type

        self.embedding = nn.Embedding(input_size, embed_size)
        if cell_type == 'rnn':
            self.cell = nn.RNN(embed_size, hidden_size, n_layers, dropout = drop, bidirectional=bidir)
        if cell_type == 'gru':
            self.cell = nn.GRU(embed_size, hidden_size, n_layers, dropout = drop, bidirectional=bidir)
        else :
            self.cell = nn.LSTM(embed_size, hidden_size, n_layers, dropout = drop, bidirectional=bidir)

    def forward(self, input, hidden, cell):
        embedded = self.embedding(input).view(1, 1, -1) 
        output = embedded
        if self.cell_type == 'lstm':
            output, (hidden, cell) = self.cell(output, hidden, cell)
        else:
            output, hidden = self.cell(output, hidden)
        return output, hidden, cell

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

### Decoder

In [None]:
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1, drop=0.1, bidir=True, cell_type='lstm', embed_size = 300):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(output_size, embed_size)
        if cell_type == 'rnn':
            self.cell = nn.RNN(hidden_size, hidden_size, n_layers, dropout=drop, bidirectional=bidir)
        if cell_type == 'gru':
            self.cell = nn.GRU(hidden_size, hidden_size, n_layers, dropout=drop, bidirectional=bidir)
        else :
            self.cell = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=drop, bidirectional=bidir)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden, cell, enc_output):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        
        if self.cell_type == 'lstm':
            output, (hidden, cell) = self.cell(output, hidden, cell)
        else:
            output, hidden = self.cell(output, hidden)
        output = self.softmax(self.out(output[0]))

        # Dummy attention weights for same train function
        attn_weights = enc_output
        attn_weights = 0
        
        return output, hidden, attn_weights, cell

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

### Data Preprocessing to Tensors

In [41]:
def indexesFromWord(lang, word):
    return [lang.char2index[char] for char in word]

def tensorFromWord(lang, word):
    indexes = indexesFromWord(lang, word)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(pair):
    input_tensor = tensorFromWord(input_lang, pair[0])
    target_tensor = tensorFromWord(output_lang, pair[1])
    return (input_tensor, target_tensor)

### Training the model

In [None]:
teacher_forcing_ratio = 0.5


def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden, encoder_cell = encoder(
            input_tensor[ei], encoder_hidden, encoder_cell)
        encoder_outputs[ei] = encoder_output[0, 0]

    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_cell, decoder_attention = decoder(
                decoder_input, decoder_hidden, decoder_cell, encoder_outputs)
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [None]:
def trainIters(encoder, decoder, n_iters,train_pairs, val_pairs, print_every=1000, plot_every=100, learning_rate=0.01, ):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(pairs))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()

    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0

    showPlot(plot_losses)