In [26]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# import other libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [27]:
train_data = pd.read_csv('aksharantar_sampled/hin/hin_train.csv')
test_data = pd.read_csv('aksharantar_sampled/hin/hin_test.csv')
valid_data = pd.read_csv('aksharantar_sampled/hin/hin_valid.csv')

# rename columns ['input_seq', 'target_seq']
train_data.columns = ['input_seq', 'target_seq']
test_data.columns = ['input_seq', 'target_seq']
valid_data.columns = ['input_seq', 'target_seq']

In [8]:
train_data.head()

Unnamed: 0,input_seq,target_seq
0,bindhya,बिन्द्या
1,kirankant,किरणकांत
2,yagyopaveet,यज्ञोपवीत
3,ratania,रटानिया
4,vaganyache,वागण्याचे


In [37]:
# define Lang
class Lang:
    def __init__(self, wordList):
        self.char2index = {}
        self.char2count = {}
        self.index2char = {0: 'A', 1: 'Z'}
        self.n_chars = 2

        for word in wordList:
            self.addWord(word)

    def addWord(self, word):
        for char in word:
            self.addChar(char)

    def addChar(self, char):
        if char not in self.char2index:
            self.char2index[char] = self.n_chars
            self.char2count[char] = 1
            self.index2char[self.n_chars] = char
            self.n_chars += 1
        else:
            self.char2count[char] += 1

    def encode(self, word):
        embedded = []
        for i in range(len(word)):
            embedded.append([self.char2index[word[i]]])
        return Variable(torch.LongTensor(embedded))

    def one_hot_encode(self, word):
        one_hot = torch.zeros(len(word), self.n_chars)
        for i in range(len(word)):
            one_hot[i][self.char2index[word[i]]] = 1
        return one_hot
    
    def decode(self, word):
        decoded = ''
        for i in range(len(word)):
            decoded += self.index2char[word[i]]
        return decoded
    
    def decode_one_hot(self, word):
        decoded = ''
        for i in range(len(word)):
            decoded += self.index2char[word[i].argmax()]
        return decoded

In [38]:
# create Lang objects
eng = Lang(train_data['input_seq'])
hin = Lang(train_data['target_seq'])

In [48]:
# create a seq2seq model using 2 RNNs
class Seq2Seq(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(Seq2Seq, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        # encoder and decoder
        self.encoder = nn.RNN(input_size, hidden_size, n_layers)
        self.decoder = nn.RNN(hidden_size, hidden_size, n_layers)

        # linear layer to get output
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        # encoder
        output, hidden = self.encoder(input, hidden)
        
        # decoder
        output, hidden = self.decoder(output, hidden)
        
        # get output
        output = self.linear(output)
        return output, hidden
    
    def init_hidden(self, batch_size):
        return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))

In [73]:
model = Seq2Seq(eng.n_chars, 128, hin.n_chars, 1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# train 1 input
def train(input_variable, target_variable):
    # zero gradients
    optimizer.zero_grad()
    
    # initialize hidden layer
    hidden = model.init_hidden(1)
    
    # get output
    output, hidden = model.forward(input_variable, hidden)

    print(output)
    print(target_variable)
    
    # calculate loss for only 1 input
    loss = criterion(output.squeeze(1), target_variable.squeeze(1))
        
    # backpropagate
    loss.backward()
    
    # update weights
    optimizer.step()
    
    return loss.data[0] / len(input_variable)

In [75]:
input = eng.one_hot_encode(train_data['input_seq'][0])
target = hin.one_hot_encode(train_data['target_seq'][0])

input = Variable(input).unsqueeze(1)
# input = input.view(len(input), 1, -1)

train(input, target)

tensor([[[ 5.8422e-02, -3.1012e-02, -1.5374e-02,  1.1672e-01, -3.1614e-02,
           6.2741e-02,  9.4655e-02, -8.6708e-02,  1.1314e-01, -1.6554e-02,
           1.9975e-02, -2.9220e-02, -3.2056e-02,  3.8242e-02, -1.9582e-01,
          -5.0839e-02,  1.4054e-02, -4.0073e-02,  7.3279e-02, -6.1698e-02,
           1.2384e-02, -3.4292e-02, -4.4332e-02, -8.5935e-02, -2.3491e-02,
           4.6409e-03,  1.1455e-02,  6.5165e-02,  9.0315e-02, -4.7754e-04,
           5.6276e-02,  2.0614e-02, -7.6677e-02,  5.3173e-03,  1.7214e-02,
           2.4812e-02, -9.5182e-02, -6.4312e-02, -6.1101e-02, -4.0227e-02,
           4.6829e-02,  4.3193e-02,  9.2885e-02,  5.7450e-02, -5.5630e-03,
          -3.7661e-02,  4.2154e-02, -3.7428e-02, -2.3863e-02, -4.5701e-02,
          -2.2759e-02,  8.2863e-02,  4.4256e-02, -2.5558e-02, -7.1612e-02,
           8.7065e-02,  3.1592e-02, -8.0684e-02,  3.4176e-02, -7.2115e-02,
          -7.8599e-03, -3.5380e-02, -4.5364e-02,  1.5377e-02, -7.1890e-02,
           1.2490e-01]],


ValueError: Expected input batch_size (7) to match target batch_size (8).

In [None]:
out, hid = model.forward(eng.one_hot_encode('hello'), model.init_hidden(1))

hin.decode_one_hot(out.data)