In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# import other libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
from typing import *
import time
import math
import random
import wandb
wandb.login()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmantra7[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
# define Lang
class Lang:
	def __init__(self, wordList):
		self.char2index = {'A': 0, 'Z': 1, '_': 2, '^':3}
		self.char2count = {}
		self.index2char = {0: 'A', 1: 'Z', 2: '_', 3: '^'}
		self.n_chars = 4

		for word in wordList:
			self.addWord(word)

	def addWord(self, word):
		for char in word:
			self.addChar(char)

	def addChar(self, char):
		if char not in self.char2index:
			self.char2index[char] = self.n_chars
			self.char2count[char] = 1
			self.index2char[self.n_chars] = char
			self.n_chars += 1
		elif char != '_' and char != '^' and char != 'A' and char != 'Z':
			self.char2count[char] += 1

	def encode(self, word):
		encoded = [0] * len(word)
		for i in range(len(word)):
			if word[i] in self.char2index:
				encoded[i] = self.char2index[word[i]]
			else:
				encoded[i] = self.char2index['^']
		return encoded
	
	def one_hot_encode(self, word):
		one_hot = torch.zeros(len(word), self.n_chars, device=device)
		for i in range(len(word)):
			if word[i] in self.char2index:
				one_hot[i][self.char2index[word[i]]] = 1
			else:
				one_hot[i][self.char2index['^']] = 1			
		return one_hot
		
	def decode(self, word):
		decoded = ''
		for i in range(len(word)):
			if word[i].argmax().item() in self.index2char:
				decoded += self.index2char[word[i].argmax().item()]
			else:
				decoded += '^'
		return decoded
	
	def decode_words(self, words):
		decoded = ['']*len(words[0])
		for i in range(len(words[0])):
			for j in range(len(words)):
				if words[j][i].argmax().item() in self.index2char:
					decoded[i] += self.index2char[words[j][i].argmax().item()]
				else:
					decoded[i] += '^'
		 
		return decoded
	
	def decode_one_hot(self, word):
		decoded = ''
		for i in range(len(word)):
			if word[i].argmax().item() in self.index2char:
				decoded += self.index2char[word[i].argmax().item()]
			else:
				decoded += '^'
		return decoded

In [3]:
def tensorFromWord(lang : Lang, word : str):
	indexes = lang.encode(word)
	return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorFromWords(lang : Lang, words : List[str]):
	tensors = []
	for word in words:
		tensors.append(tensorFromWord(lang, word))
	return torch.cat(tensors, dim=1)

def tensorsFromPair(pair, inp_lang : Lang, out_lang : Lang):
	input_tensor = tensorFromWord(inp_lang, pair[0])
	target_tensor = tensorFromWord(out_lang, pair[1])
	return (input_tensor.unsqueeze(1), target_tensor)

def tensorsFromPairs(pairs, inp_lang : Lang, out_lang : Lang, batch_size):
	tensors_inp = []
	tensors_out = []
	for pair in pairs:
		tensors_inp.append(tensorFromWord(inp_lang, pair[0]))
		tensors_out.append(tensorFromWord(out_lang, pair[1]))
	return torch.cat(tensors_inp, dim=1).view(-1,1,batch_size), torch.cat(tensors_out, dim=1)

In [4]:
# # create dataset
# class AksharantarDataset(Dataset):
# 	def __init__(self, data, inp_lang, out_lang):
# 		self.data = data
# 		self.inp_lang = inp_lang
# 		self.out_lang = out_lang

# 	def __len__(self):
# 		return len(self.data)

# 	def __getitem__(self, idx):
# 		if torch.is_tensor(idx):
# 			idx = idx.tolist()

# 		inp_seq = self.inp_lang.one_hot_encode(self.data['input_seq'][idx]).unsqueeze(1)
# 		out_seq = self.out_lang.one_hot_encode(self.data['target_seq'][idx]).unsqueeze(1)

# 		sample = {'input_seq': inp_seq, 'target_seq': out_seq}
# 		return sample

In [5]:
def DataLoader(lang : str, pad : bool = False, max_length : int = 40):
	train_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_train.csv')
	test_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_test.csv')
	valid_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_valid.csv')
	
	train_data.columns = ['input_seq', 'target_seq']
	test_data.columns = ['input_seq', 'target_seq']
	valid_data.columns = ['input_seq', 'target_seq']
        
	train_data['input_seq'] = train_data['input_seq'].apply(lambda x: x + 'Z')
	train_data['target_seq'] = train_data['target_seq'].apply(lambda x: x + 'Z')
	test_data['input_seq'] = test_data['input_seq'].apply(lambda x: x + 'Z')
	test_data['target_seq'] = test_data['target_seq'].apply(lambda x: x + 'Z')
	valid_data['input_seq'] = valid_data['input_seq'].apply(lambda x: x + 'Z')
	valid_data['target_seq'] = valid_data['target_seq'].apply(lambda x: x + 'Z')

	if pad:
		train_data['input_seq'] = train_data['input_seq'].apply(lambda x: x + '_'*(max_length -1 - len(x)))
		train_data['target_seq'] = train_data['target_seq'].apply(lambda x: x + '_'*(max_length -1 - len(x)))
		test_data['input_seq'] = test_data['input_seq'].apply(lambda x: x + '_'*(max_length -1 - len(x)))
		test_data['target_seq'] = test_data['target_seq'].apply(lambda x: x + '_'*(max_length -1 - len(x)))
		valid_data['input_seq'] = valid_data['input_seq'].apply(lambda x: x + '_'*(max_length -1 - len(x)))
		valid_data['target_seq'] = valid_data['target_seq'].apply(lambda x: x + '_'*(max_length -1 - len(x)))

	return train_data, test_data, valid_data

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [6]:
def get_cell(str):
	if str == 'lstm':
		return nn.LSTM
	elif str == 'gru':
		return nn.GRU
	elif str == 'rnn':
		return nn.RNN
	else:
		raise ValueError('Invalid cell type')

In [7]:
class EncoderRNN(nn.Module):
	def __init__(self, input_size, embed_size, hidden_size, n_layers=1, type='gru', dropout=0.2, batch_size=1):
		super(EncoderRNN, self).__init__()
		self.hidden_size = hidden_size
		self.n_layers = n_layers
		self.type_t = type
		self.batch_size = batch_size

		self.embedding = nn.Embedding(input_size, embed_size)
		self.cell = get_cell(type)(embed_size, hidden_size, n_layers, dropout=dropout)

	def forward(self, input, hidden):
		embedded = self.embedding(input)
		output = embedded
		output, hidden = self.cell(output, hidden)
		return output, hidden

	def initHidden(self, is_batch=True):
		if is_batch:
			if self.type_t == 'lstm':
				return torch.zeros(self.n_layers, self.batch_size, self.hidden_size, device=device), torch.zeros(self.n_layers, self.batch_size, self.hidden_size, device=device)
			else:
				return torch.zeros(self.n_layers, self.batch_size, self.hidden_size, device=device)
		else:
			if self.type_t == 'lstm':
				return torch.zeros(self.n_layers, 1, self.hidden_size, device=device), torch.zeros(self.n_layers, 1, self.hidden_size, device=device)
			else:
				return torch.zeros(self.n_layers, 1, self.hidden_size, device=device)
	
class DecoderRNN(nn.Module):
	def __init__(self, hidden_size, output_size, n_layers=1, type='gru', dropout=0.2, batch_size=1):
		super(DecoderRNN, self).__init__()
		self.hidden_size = hidden_size
		self.n_layers = n_layers
		self.type_t = type
		self.batch_size = batch_size

		self.embedding = nn.Embedding(output_size, hidden_size)
		self.cell = get_cell(type)(hidden_size, hidden_size, n_layers, dropout=dropout)
		self.out = nn.Linear(hidden_size, output_size)
		self.softmax = nn.LogSoftmax(dim=1)

	def forward(self, input, hidden, is_batch=True):
		if is_batch:
			output = self.embedding(input).view(1, self.batch_size, -1)
		else:
			output = self.embedding(input).view(1, 1, -1)
		output = F.relu(output)
		output, hidden = self.cell(output, hidden)
		output = self.softmax(self.out(output[0]))
		return output, hidden

	def initHidden(self):
		if self.type_t == 'lstm':
			return torch.zeros(self.n_layers, self.batch_size, self.hidden_size, device=device), torch.zeros(self.n_layers, self.batch_size, self.hidden_size, device=device)
		else:
			return torch.zeros(self.n_layers, self.batch_size, self.hidden_size, device=device)


In [14]:
class Seq2Seq(nn.Module):
	def __init__(self, input_size, hidden_size, embed_size, output_size, n_layers=1, type='gru', dropout=0.2, batch_size=1):
		super(Seq2Seq, self).__init__()
		self.input_size = input_size
		self.hidden_size = hidden_size
		self.output_size = output_size
		self.n_layers = n_layers
		self.batch_size = batch_size

		self.encoder = EncoderRNN(input_size, embed_size, hidden_size, n_layers, type, dropout, batch_size).to(device)
		self.decoder = DecoderRNN(hidden_size, output_size, n_layers, type, dropout, batch_size).to(device)

	def forward(self, input_tensor, target_tensor, max_length=50):
		encoder_hidden = self.encoder.initHidden()

		input_length = input_tensor.size(0)
		target_length = target_tensor.size(0)

		encoder_outputs = torch.zeros(max_length, self.encoder.hidden_size, device=device)

		for ei in range(input_length):
			encoder_output, encoder_hidden = self.encoder(
				input_tensor[ei], encoder_hidden)
			encoder_outputs[ei] = encoder_output[0, 0]

		lst = []
		for i in range(self.batch_size):
			lst.append([0])
		decoder_input = torch.tensor(lst, device=device)  # SOS

		decoder_hidden = encoder_hidden
		use_teacher_forcing = True if random.random() < 0.5 else False

		decoder_outputs = []
		if use_teacher_forcing:
			# Teacher forcing: Feed the target as the next input
			for di in range(target_length):
				decoder_output, decoder_hidden = self.decoder(
					decoder_input, decoder_hidden)
				decoder_outputs.append(decoder_output)
				decoder_input = target_tensor[di]  # Teacher forcing
		else:
			# Without teacher forcing: use its own predictions as the next input
			for di in range(target_length):
				decoder_output, decoder_hidden = self.decoder(
					decoder_input, decoder_hidden)
				decoder_outputs.append(decoder_output)
				topv, topi = decoder_output.topk(1)
				decoder_input = topi.squeeze().detach()  # detach from history as input
				if self.batch_size == 1:
					if topi == 1:
						break

		return decoder_outputs

	def predict(self, input_tensor, max_length = 50):
		encoder_hidden = self.encoder.initHidden(False)

		input_length = input_tensor.size(0)

		encoder_outputs = torch.zeros(max_length, self.encoder.hidden_size, device=device)

		for ei in range(input_length):
			encoder_output, encoder_hidden = self.encoder(
				input_tensor[ei], encoder_hidden)
			encoder_outputs[ei] = encoder_output[0, 0]

		decoder_input = torch.tensor([[0]], device=device)  # SOS

		decoder_outputs = []

		decoder_hidden = encoder_hidden
		for di in range(max_length):
			decoder_output, decoder_hidden = self.decoder(
				decoder_input, decoder_hidden, False)
			topv, topi = decoder_output.data.topk(1)
			if topi == 1:
				break
			decoder_outputs.append(decoder_output)
			decoder_input = topi.squeeze().detach()
		
		return decoder_outputs

In [15]:
class Translator:
	def __init__(self, lang, embed_size=10, hidden_size=10, n_layers=1, max_length=50, type='gru', dropout=0.2, batch_size=1):
		if batch_size != 1:
			self.train_data, self.test_data, self.valid_data = DataLoader(lang, pad = True, max_length = max_length)
		else:
			self.train_data, self.test_data, self.valid_data = DataLoader(lang, pad = False, max_length = max_length)

		self.inp_lang = Lang(self.train_data['input_seq'])
		self.out_lang = Lang(self.train_data['target_seq'])
		self.batch_size = batch_size

		self.model = Seq2Seq(self.inp_lang.n_chars, hidden_size, embed_size, self.out_lang.n_chars, n_layers, type, dropout, batch_size)
		self.criterion = nn.NLLLoss()
		self.max_length = max_length
		self.encoder_optim = optim.SGD(self.model.encoder.parameters(), lr=0.001)
		self.decoder_optim = optim.SGD(self.model.decoder.parameters(), lr=0.001)

		ps = [(self.train_data['input_seq'][i], self.train_data['target_seq'][i]) for i in range(len(self.train_data))]
		left = len(ps) % batch_size
		if left != 0:
			ps = ps[:-left]
		self.pairs = [tensorsFromPairs(ps[x:x+batch_size], self.inp_lang, self.out_lang, self.batch_size) for x in range(0, len(ps), batch_size)]

	def trainOne(self, input_tensor, target_tensor):
		self.encoder_optim.zero_grad()
		self.decoder_optim.zero_grad()

		decoder_outputs = self.model.forward(input_tensor, target_tensor, self.max_length)

		loss = 0
		for di in range(len(decoder_outputs)):
			loss += self.criterion(decoder_outputs[di], target_tensor[di])
		loss.backward()

		self.encoder_optim.step()
		self.decoder_optim.step()

		return loss.item() / target_tensor.size(0)

	def train(self,epoch=1, n_iters=10000, print_every=1000, plot_every=100, learning_rate=0.01, rand=False, dumpName='model'):
		self.encoder_optim = optim.SGD(self.model.encoder.parameters(), lr=learning_rate)
		self.decoder_optim = optim.SGD(self.model.decoder.parameters(), lr=learning_rate)

		start = time.time()
		train_loss = []
		train_acc = []
		valid_loss = []
		valid_acc = []

		for i in range(epoch):
			print_loss_total = 0
			tot_loss = 0
			print("Epoch: ", i)
			if rand:
				training_pairs = [random.choice(self.pairs) for i in range(n_iters)]
			else:
				training_pairs = self.pairs

			for iter in tqdm(range(1, len(training_pairs) + 1)):
				training_pair = training_pairs[iter - 1]
				input_tensor = training_pair[0]
				target_tensor = training_pair[1]

				loss = self.trainOne(input_tensor, target_tensor)
				print_loss_total += loss
				tot_loss += loss

				if iter % print_every == 0:
					print_loss_avg = print_loss_total / print_every
					print_loss_total = 0
					print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
												iter, iter / n_iters * 100, print_loss_avg))
			train_loss.append(tot_loss / len(training_pairs))
			train_acc.append(self.accuracy(self.train_data))
			valid_stats = self.calculate_stats(self.valid_data)
			valid_loss.append(valid_stats[0])
			valid_acc.append(valid_stats[1])
			pickle.dump(self, open(dumpName + '_'+str(i) + '.pkl', 'wb'))
		return train_loss, train_acc, valid_loss, valid_acc

	def accuracy(self, data):
		with torch.no_grad():
			acc = 0
			if len(data) % self.batch_size != 0:
				data = data[:-(len(data) % self.batch_size)]
			for i in range(0, len(data), self.batch_size):
				tensors = tensorsFromPairs([(data['input_seq'][j], data['target_seq'][j]) for j in range(i, i + self.batch_size)], self.inp_lang, self.out_lang, self.batch_size)
				outputs = self.model.forward(tensors[0], tensors[1], self.max_length)
				words = self.out_lang.decode_words(outputs)
				acc += np.sum([words[j] == data['target_seq'][i + j] for j in range(self.batch_size)])
			return acc / len(data)

	def calculate_stats(self, data):
		with torch.no_grad():
			loss = 0
			acc = 0
			if len(data) % self.batch_size != 0:
				data = data[:-(len(data) % self.batch_size)]
			for i in range(0, len(data), self.batch_size):
				tensors = tensorsFromPairs([(data['input_seq'][j], data['target_seq'][j]) for j in range(i, i + self.batch_size)], self.inp_lang, self.out_lang, self.batch_size)
				outputs = self.model.forward(tensors[0], tensors[1], self.max_length)
				loss += np.sum([self.criterion(outputs[di], tensors[1][di]) / len(outputs) for di in range(len(outputs))])
				words = self.out_lang.decode_words(outputs)
				acc += np.sum([words[j] == data['target_seq'][i + j] for j in range(self.batch_size)])
			return loss.item() / len(data), acc / len(data)
	
	def translate(self, word):
		with torch.no_grad():
			tensor = tensorFromWord(self.inp_lang, word).unsqueeze(1)
			outs = self.model.predict(tensor, self.max_length)
			return self.out_lang.decode(outs)

In [16]:
sweep_configuration_new = {
	'method': 'random',
	'metric': {'goal': 'maximize', 'name': 'val_accuracy'},
	'parameters': 
	{
		'epochs' : {'values': [10]},
		'lr' : {'values': [0.001, 0.005]},
		'hidden_size' : {'values': [64, 128, 256]},
        'embed_size' : {'values': [32, 64]},
        'n_layers' : {'values': [1]},
        'type' : {'values': ['gru']},
        'dropout' : {'values': [0, 0.2, 0.3]}
	}
}

In [17]:
# f = open('configs', 'w')
# count = 0

# def get_name(config):
# 	global count
# 	count += 1
# 	f.write(str(count) + '_' + str(config) + '\n')
# 	f.flush()
# 	return 'lr'+str(config['lr'])+'_hidden_size'+str(config['hidden_size'])+'_embed_size'+str(config['embed_size'])+'_n_layers'+str(config['n_layers'])+'_type'+str(config['type'])+'_dropout'+str(config['dropout'])

# # 1: Define objective/training function
# def objective(config):
# 	translator = Translator('guj', hidden_size=config.hidden_size, embed_size=config.embed_size, n_layers=config.n_layers, type=config.type, dropout=config.dropout)
# 	dump = 'trans'+'_'+str(config.hidden_size)+'_'+str(config.embed_size)+'_'+str(config.n_layers)+'_'+str(config.type)+'_'+str(config.dropout)
# 	train_loss, train_acc, valid_loss, valid_acc = translator.train(epoch=config.epochs, print_every=10000, learning_rate=config.lr, dumpName=dump)

# 	for i in range(config.epochs):
# 		wandb.log({'train_loss': train_loss[i], 'train_accuracy': train_acc[i], 'val_loss': valid_loss[i], 'val_accuracy': valid_acc[i]})

# def main():
# 	run = wandb.init(project='rnn-first-proj')
# 	objective(wandb.config)
# 	run.name = get_name(wandb.config)
# 	run.finish()

# # 2: Define the search space
# sweep_configuration = sweep_configuration_new

# # 3: Start the sweep
# sweep_id = wandb.sweep(sweep=sweep_configuration, project='rnn-first-proj')
# wandb.agent(sweep_id, function=main, count=10)

# f.close()

In [19]:
trans = Translator('guj', hidden_size=256, embed_size=16, n_layers=2, type='lstm', dropout=0.2, batch_size=64, max_length=40)
trans.train(epoch=10, print_every=25, learning_rate=0.001)

Epoch:  0


  3%|▎         | 25/799 [00:11<06:27,  2.00it/s]

0m 11s (- 78m 26s) (25 0%) 2.6735


  6%|▋         | 50/799 [00:24<05:56,  2.10it/s]

0m 24s (- 80m 46s) (50 0%) 1.2860


  9%|▉         | 75/799 [00:37<06:22,  1.89it/s]

0m 37s (- 82m 55s) (75 0%) 1.2669


 13%|█▎        | 100/799 [00:49<05:39,  2.06it/s]

0m 49s (- 82m 19s) (100 1%) 1.2188


 16%|█▌        | 125/799 [01:02<05:46,  1.95it/s]

1m 2s (- 82m 17s) (125 1%) 1.2059


 19%|█▉        | 150/799 [01:14<05:12,  2.08it/s]

1m 14s (- 81m 49s) (150 1%) 1.1832


 22%|██▏       | 175/799 [01:27<05:04,  2.05it/s]

1m 27s (- 81m 49s) (175 1%) 1.1583


 25%|██▌       | 200/799 [01:40<05:19,  1.88it/s]

1m 40s (- 81m 46s) (200 2%) 1.0938


 28%|██▊       | 225/799 [01:52<04:47,  2.00it/s]

1m 52s (- 81m 32s) (225 2%) 1.0819


 31%|███▏      | 250/799 [02:05<04:48,  1.91it/s]

2m 5s (- 81m 36s) (250 2%) 1.0786


 34%|███▍      | 275/799 [02:18<05:04,  1.72it/s]

2m 18s (- 81m 33s) (275 2%) 1.0531


 38%|███▊      | 300/799 [02:32<05:11,  1.60it/s]

2m 32s (- 82m 12s) (300 3%) 1.0307


 41%|████      | 325/799 [02:45<03:56,  2.00it/s]

2m 45s (- 82m 15s) (325 3%) 1.0204


 44%|████▍     | 350/799 [03:01<05:20,  1.40it/s]

3m 1s (- 83m 15s) (350 3%) 1.0182


 47%|████▋     | 375/799 [03:17<04:18,  1.64it/s]

3m 17s (- 84m 24s) (375 3%) 1.0078


 50%|█████     | 400/799 [03:33<04:12,  1.58it/s]

3m 33s (- 85m 24s) (400 4%) 1.0107


 53%|█████▎    | 425/799 [03:49<03:33,  1.75it/s]

3m 49s (- 86m 5s) (425 4%) 1.0092


 56%|█████▋    | 450/799 [04:04<03:36,  1.61it/s]

4m 4s (- 86m 34s) (450 4%) 0.9948


 59%|█████▉    | 475/799 [04:19<03:18,  1.63it/s]

4m 19s (- 86m 46s) (475 4%) 0.9895


 63%|██████▎   | 500/799 [04:36<03:36,  1.38it/s]

4m 36s (- 87m 24s) (500 5%) 0.9831


 66%|██████▌   | 525/799 [04:52<02:49,  1.61it/s]

4m 52s (- 88m 6s) (525 5%) 0.9776


 66%|██████▌   | 525/799 [04:53<02:33,  1.79it/s]


KeyboardInterrupt: 

In [None]:
trans.accuracy(trans.valid_data)

0.0

In [21]:
trans.translate('a')

'ા'

In [None]:
# # create a seq2seq model using 2 RNNs
# class Seq2Seq(nn.Module):
# 	def __init__(self, input_size, hidden_size, output_size, n_layers=1):
# 		super(Seq2Seq, self).__init__()
# 		self.input_size = input_size
# 		self.hidden_size = hidden_size
# 		self.output_size = output_size
# 		self.n_layers = n_layers

# 		# encoder and decoder
# 		self.encoder = nn.RNN(input_size, hidden_size, n_layers)
# 		self.decoder = nn.RNN(hidden_size, hidden_size, n_layers)

# 		# linear layer to get output
# 		self.linear = nn.Linear(hidden_size, output_size)

# 	def forward(self, input, hidden):
# 		# encoder
# 		output, hidden = self.encoder(input, hidden)
		
# 		# decoder
# 		output, hidden = self.decoder(output, hidden)
		
# 		# get output
# 		output = self.linear(output)
# 		return output, hidden
	
# 	def predict(self, input, inp_lang, out_lang):
# 		out, hidden = self.forward(inp_lang.one_hot_encode(input).unsqueeze(1), self.init_hidden(1))
# 		return out_lang.decode_one_hot(out)
	
# 	def init_hidden(self, batch_size):
# 		return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))

In [None]:
# class Translator:
# 	def __init__(self, lang):
# 		train_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_train.csv')
# 		test_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_test.csv')
# 		valid_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_valid.csv')

# 		train_data.columns = ['input_seq', 'target_seq']
# 		test_data.columns = ['input_seq', 'target_seq']
# 		valid_data.columns = ['input_seq', 'target_seq']

# 		self.inp_lang = Lang(train_data['input_seq'])
# 		self.out_lang = Lang(train_data['target_seq'])

# 		self.model = Seq2Seq(self.inp_lang.n_chars, 10, self.out_lang.n_chars, 1)
# 		self.criterion = nn.CrossEntropyLoss()
# 		self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)

# 		train_dataset = AksharantarDataset(train_data, self.inp_lang, self.out_lang)
# 		test_dataset = AksharantarDataset(test_data, self.inp_lang, self.out_lang)
# 		valid_dataset = AksharantarDataset(valid_data, self.inp_lang, self.out_lang)

# 		self.train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
# 		self.test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)
# 		self.valid_dataloader = DataLoader(valid_dataset, batch_size=1, shuffle=True)

# 	def translate(self, word):
# 		return self.model.predict(word, self.inp_lang, self.out_lang)
	
# 	def train_one(self, inp, target):
# 		# zero gradients
# 		self.optimizer.zero_grad()
		
# 		# initialize hidden layer
# 		hidden = self.model.init_hidden(1)
		
# 		# get output
# 		output, hidden = self.model.forward(inp, hidden)
		
# 		mx_len = min(len(output), len(target))

# 		if(len(output) != len(inp)):
# 			print('lol')

# 		# append output and target with 'Z' to make them of mx_len
# 		while(len(output) < mx_len):
# 			output = torch.cat((output, self.out_lang.one_hot_encode_char('Z')), 0)

# 		while(len(target) < mx_len):
# 			target = torch.cat((target, self.out_lang.one_hot_encode_char('Z')), 0)
		
# 		# calculate loss 
# 		loss = self.criterion(torch.flatten(output[:mx_len], 0, 1), torch.flatten(target[:mx_len], 0, 1).max(1)[1])
			
# 		# backpropagate
# 		loss.backward()
		
# 		# update weights
# 		self.optimizer.step()

# 		return loss.data.item() / len(output)
	
# 	def train_epoch(self, data_loader, print_every=100):
# 		loss = 0
# 		for i_batch, sample_batched in tqdm(enumerate(data_loader)):
# 			loss += self.train_one(sample_batched['input_seq'][0], sample_batched['target_seq'][0])
# 			if i_batch % print_every == print_every-1:
# 				print(' Train Loss: ', loss / (i_batch+1))
# 				# print(' Valid Loss: ', self.calc_loss_full(self.valid_dataloader))
# 				# print(' Train Accuracy: ', self.calc_accuracy(self.train_dataloader))
# 				# print(' Valid Accuracy: ', self.calc_accuracy(self.valid_dataloader))
# 		return self.calc_loss_full(data_loader)
	
# 	def train(self, epochs, print_every=100):
# 		losses = []
# 		for epoch in range(epochs):
# 			print('Epoch ', epoch + 1)
# 			loss = self.train_epoch(self.train_dataloader, print_every)
# 			losses.append(loss)
# 		return losses
	
# 	def calc_loss_full(self, data_loader):
# 		loss = 0
# 		for i_batch, sample_batched in tqdm(enumerate(data_loader)):
# 			inp = sample_batched['input_seq'][0]
# 			target = sample_batched['target_seq'][0]
# 			hidden = self.model.init_hidden(1)
# 			output, hidden = self.model.forward(inp, hidden)
# 			mx_len = min(len(output), len(target))
# 			while(len(output) < mx_len):
# 				output = torch.cat((output, self.out_lang.one_hot_encode_char('Z')), 0)
# 			while(len(target) < mx_len):
# 				target = torch.cat((target, self.out_lang.one_hot_encode_char('Z')), 0)
# 			loss += self.criterion(torch.flatten(output[:mx_len], 0, 1), torch.flatten(target[:mx_len], 0, 1).max(1)[1])
# 		return loss / len(data_loader)
	
# 	def calc_accuracy(self, dataset):
# 		return np.sum([(self.translate(sample_batched['input_seq']) ==  sample_batched['target_seq']) for sample_batched in dataset]) / len(dataset)

In [None]:
# guj_trans = Translator('guj')
# guj_trans.train(10, 1000)