In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# import other libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
from typing import *
import time
import math
import random
import wandb
wandb.login()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmantra7[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
# define Lang
class Lang:
	def __init__(self, wordList):
		self.char2index = {'A': 0, 'Z': 1, '_': 2, '^':3}
		self.char2count = {}
		self.index2char = {0: 'A', 1: 'Z', 2: '_', 3: '^'}
		self.n_chars = 4

		for word in wordList:
			self.addWord(word)

	def addWord(self, word):
		for char in word:
			self.addChar(char)

	def addChar(self, char):
		if char not in self.char2index:
			self.char2index[char] = self.n_chars
			self.char2count[char] = 1
			self.index2char[self.n_chars] = char
			self.n_chars += 1
		else:
			self.char2count[char] += 1

	def encode(self, word):
		encoded = [0] * len(word)
		for i in range(len(word)):
			if word[i] in self.char2index:
				encoded[i] = self.char2index[word[i]]
			else:
				encoded[i] = self.char2index['^']
		return encoded
	
	def one_hot_encode(self, word):
		one_hot = torch.zeros(len(word), self.n_chars, device=device)
		for i in range(len(word)):
			if word[i] in self.char2index:
				one_hot[i][self.char2index[word[i]]] = 1
			else:
				one_hot[i][self.char2index['^']] = 1			
		return one_hot
		
	def decode(self, word):
		decoded = ''
		for i in range(len(word)):
			if word[i].argmax().item() in self.index2char:
				decoded += self.index2char[word[i].argmax().item()]
			else:
				decoded += '^'
		return decoded
	
	def decode_one_hot(self, word):
		decoded = ''
		for i in range(len(word)):
			if word[i].argmax().item() in self.index2char:
				decoded += self.index2char[word[i].argmax().item()]
			else:
				decoded += '^'
		return decoded

In [3]:
def tensorFromWord(lang : Lang, word : str):
    indexes = lang.encode(word)
    indexes.append(1)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(pair, inp_lang : Lang, out_lang : Lang):
    input_tensor = tensorFromWord(inp_lang, pair[0])
    target_tensor = tensorFromWord(out_lang, pair[1])
    return (input_tensor.unsqueeze(1), target_tensor)

In [4]:
# # create dataset
# class AksharantarDataset(Dataset):
# 	def __init__(self, data, inp_lang, out_lang):
# 		self.data = data
# 		self.inp_lang = inp_lang
# 		self.out_lang = out_lang

# 	def __len__(self):
# 		return len(self.data)

# 	def __getitem__(self, idx):
# 		if torch.is_tensor(idx):
# 			idx = idx.tolist()

# 		inp_seq = self.inp_lang.one_hot_encode(self.data['input_seq'][idx]).unsqueeze(1)
# 		out_seq = self.out_lang.one_hot_encode(self.data['target_seq'][idx]).unsqueeze(1)

# 		sample = {'input_seq': inp_seq, 'target_seq': out_seq}
# 		return sample

In [5]:
def DataLoader(lang : str):
	train_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_train.csv')
	test_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_test.csv')
	valid_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_valid.csv')
	
	train_data.columns = ['input_seq', 'target_seq']
	test_data.columns = ['input_seq', 'target_seq']
	valid_data.columns = ['input_seq', 'target_seq']

	return train_data, test_data, valid_data

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [6]:
def get_cell(str):
	if str == 'lstm':
		return nn.LSTM
	elif str == 'gru':
		return nn.GRU
	elif str == 'rnn':
		return nn.RNN
	else:
		raise ValueError('Invalid cell type')

In [7]:
class EncoderRNN(nn.Module):
	def __init__(self, input_size, embed_size, hidden_size, n_layers=1, type='gru', dropout=0.2):
		super(EncoderRNN, self).__init__()
		self.hidden_size = hidden_size
		self.n_layers = n_layers
		self.type_t = type

		self.embedding = nn.Embedding(input_size, embed_size)
		self.cell = get_cell(type)(embed_size, hidden_size, n_layers, dropout=dropout)

	def forward(self, input, hidden):
		embedded = self.embedding(input)
		output = embedded
		output, hidden = self.cell(output, hidden)
		return output, hidden

	def initHidden(self):
		if self.type_t == 'lstm':
			return (torch.zeros(self.n_layers, 1, self.hidden_size, device=device), torch.zeros(self.n_layers, 1, self.hidden_size, device=device))
		return torch.zeros(self.n_layers, 1, self.hidden_size, device=device)
	
class DecoderRNN(nn.Module):
	def __init__(self, hidden_size, output_size, n_layers=1, type='gru', dropout=0.2):
		super(DecoderRNN, self).__init__()
		self.hidden_size = hidden_size
		self.n_layers = n_layers
		self.type_t = type

		self.embedding = nn.Embedding(output_size, hidden_size)
		self.cell = get_cell(type)(hidden_size, hidden_size, n_layers, dropout=dropout)
		self.out = nn.Linear(hidden_size, output_size)
		self.softmax = nn.LogSoftmax(dim=1)

	def forward(self, input, hidden):
		output = self.embedding(input).view(1, 1, -1)
		output = F.relu(output)
		if self.type_t == 'lstm':
			output, (hidden, cell) = self.cell(output, (hidden, cell))
		else:
			output, hidden = self.cell(output, hidden)
		output = self.softmax(self.out(output[0]))
		return output, hidden

	def initHidden(self):
		if self.type_t == 'lstm':
			return (torch.zeros(self.n_layers, 1, self.hidden_size, device=device), torch.zeros(self.n_layers, 1, self.hidden_size, device=device))
		return torch.zeros(self.n_layers, 1, self.hidden_size, device=device)


In [8]:
class Seq2Seq(nn.Module):
	def __init__(self, input_size, hidden_size, embed_size, output_size, n_layers=1, type='gru', dropout=0.2):
		super(Seq2Seq, self).__init__()
		self.input_size = input_size
		self.hidden_size = hidden_size
		self.output_size = output_size
		self.n_layers = n_layers

		self.encoder = EncoderRNN(input_size, embed_size, hidden_size, n_layers, type, dropout).to(device)
		self.decoder = DecoderRNN(hidden_size, output_size, n_layers, type, dropout).to(device)

	def forward(self, input_tensor, target_tensor, max_length=50):
		encoder_hidden = self.encoder.initHidden()

		input_length = input_tensor.size(0)
		target_length = target_tensor.size(0)

		encoder_outputs = torch.zeros(max_length, self.encoder.hidden_size, device=device)

		for ei in range(input_length):
			encoder_output, encoder_hidden = self.encoder(
				input_tensor[ei], encoder_hidden)
			encoder_outputs[ei] = encoder_output[0, 0]

		decoder_input = torch.tensor([[0]], device=device)  # SOS

		decoder_hidden = encoder_hidden
		use_teacher_forcing = True if random.random() < 0.5 else False

		decoder_outputs = []
		if use_teacher_forcing:
			# Teacher forcing: Feed the target as the next input
			for di in range(target_length):
				decoder_output, decoder_hidden = self.decoder(
					decoder_input, decoder_hidden)
				decoder_outputs.append(decoder_output)
				decoder_input = target_tensor[di]  # Teacher forcing
		else:
			# Without teacher forcing: use its own predictions as the next input
			for di in range(target_length):
				decoder_output, decoder_hidden = self.decoder(
					decoder_input, decoder_hidden)
				decoder_outputs.append(decoder_output)
				topv, topi = decoder_output.topk(1)
				decoder_input = topi.squeeze().detach()  # detach from history as input

				if decoder_input.item() == 1:
					break

		return decoder_outputs

	def predict(self, input_tensor, max_length = 50):
		encoder_hidden = self.encoder.initHidden()

		input_length = input_tensor.size(0)

		encoder_outputs = torch.zeros(max_length, self.encoder.hidden_size, device=device)

		for ei in range(input_length):
			encoder_output, encoder_hidden = self.encoder(
				input_tensor[ei], encoder_hidden)
			encoder_outputs[ei] = encoder_output[0, 0]

		decoder_input = torch.tensor([[0]], device=device)  # SOS

		decoder_outputs = []

		decoder_hidden = encoder_hidden
		for di in range(max_length):
			decoder_output, decoder_hidden = self.decoder(
				decoder_input, decoder_hidden)
			topv, topi = decoder_output.data.topk(1)
			if topi.item() == 1:
				break
			decoder_outputs.append(decoder_output)

			decoder_input = topi.squeeze().detach()
		
		return decoder_outputs

In [9]:
class Translator:
	def __init__(self, lang, embed_size=10, hidden_size=10, n_layers=1, max_length=50, type='gru', dropout=0.2):
		self.train_data, self.test_data, self.valid_data = DataLoader(lang)

		self.inp_lang = Lang(self.train_data['input_seq'])
		self.out_lang = Lang(self.train_data['target_seq'])

		self.model = Seq2Seq(self.inp_lang.n_chars, hidden_size, embed_size, self.out_lang.n_chars, n_layers, type, dropout)
		self.criterion = nn.NLLLoss()
		self.max_length = max_length

		self.pairs = [tensorsFromPair((self.train_data['input_seq'][i], self.train_data['target_seq'][i]), self.inp_lang, self.out_lang)
								for i in range(len(self.train_data))]

	def trainOne(self, input_tensor, target_tensor):
		self.encoder_optim.zero_grad()
		self.decoder_optim.zero_grad()

		decoder_outputs = self.model.forward(input_tensor, target_tensor, self.max_length)

		loss = 0
		for di in range(len(decoder_outputs)):
			loss += self.criterion(decoder_outputs[di], target_tensor[di])
		loss.backward()

		self.encoder_optim.step()
		self.decoder_optim.step()

		return loss.item() / target_tensor.size(0)

	def train(self,epoch=1, n_iters=10000, print_every=1000, plot_every=100, learning_rate=0.01, rand=False, dumpName='model', log=False, wandb = None):
		self.encoder_optim = optim.Adam(self.model.encoder.parameters(), lr=learning_rate)
		self.decoder_optim = optim.Adam(self.model.decoder.parameters(), lr=learning_rate)

		start = time.time()
		train_loss = []
		train_acc = []
		valid_loss = []
		valid_acc = []

		for i in range(epoch):
			print_loss_total = 0
			tot_loss = 0
			print("Epoch: ", i)
			if rand:
				training_pairs = [random.choice(self.pairs) for i in range(n_iters)]
			else:
				training_pairs = self.pairs

			for iter in tqdm(range(1, len(training_pairs) + 1)):
				training_pair = training_pairs[iter - 1]
				input_tensor = training_pair[0]
				target_tensor = training_pair[1]

				loss = self.trainOne(input_tensor, target_tensor)
				print_loss_total += loss 
				tot_loss += loss

				if iter % print_every == 0:
					print_loss_avg = print_loss_total / print_every
					print_loss_total = 0
					print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
												iter, iter / n_iters * 100, print_loss_avg))
			train_loss.append(tot_loss / len(training_pairs))
			train_acc.append(self.accuracy(self.train_data))
			valid_stats = self.calculate_stats(self.valid_data)
			valid_loss.append(valid_stats[0])
			valid_acc.append(valid_stats[1])
			print("Train Loss: ", train_loss[-1], "Valid Loss: ", valid_loss[-1], "Train Acc: ", train_acc[-1], "Valid Acc: ", valid_acc[-1])
			pickle.dump(self, open(dumpName + '_'+str(i) + '.pkl', 'wb'))
			if log:
				wandb.log({'train_loss': train_loss[i], 'train_accuracy': train_acc[i], 'val_loss': valid_loss[i], 'val_accuracy': valid_acc[i]})
		return train_loss, train_acc, valid_loss, valid_acc
					
	def accuracy(self, data):
		with torch.no_grad():
			return np.sum([(self.translate(data['input_seq'][i]) == data['target_seq'][i]) for i in range(len(data))]) / len(data)
				
	def translate(self, word):
		with torch.no_grad():
			tensor = tensorFromWord(self.inp_lang, word).unsqueeze(1)
			outs = self.model.predict(tensor, self.max_length)
			return self.out_lang.decode(outs)
	
	def calculate_stats(self, data):
		with torch.no_grad():
			loss = 0
			acc = 0
			for i in range(len(data)):
				tensor = tensorFromWord(self.inp_lang, data['input_seq'][i]).unsqueeze(1)
				output = self.model.predict(tensor, self.max_length)
				word = self.out_lang.decode(output)
				target = data['target_seq'][i]
				acc += (word == target)
				target = tensorFromWord(self.out_lang, data['target_seq'][i])

				mx_len = min(len(output), len(target))

				while(len(output) < mx_len):
					output = torch.cat((output, self.out_lang.encode('_')), 0, device=device)

				while(len(target) < mx_len):
					target = torch.cat((target, self.out_lang.encode('_')), 0, device=device)

				for di in range(mx_len):
					loss += self.criterion(output[di], target[di]) / mx_len
			return loss.item() / len(data), acc / len(data)

In [10]:
sweep_configuration_new = {
	'method': 'bayes',
	'metric': {'goal': 'maximize', 'name': 'val_accuracy'},
	'parameters': 
	{
		'epochs' : {'values': [10]},
		'lr' : {'values': [0.001, 0.005]},
		'hidden_size' : {'values': [512]},
        'embed_size' : {'values': [16, 32]},
        'n_layers' : {'values': [1]},
        'type' : {'values': ['lstm', 'gru']},
        'dropout' : {'values': [0, 0.2, 0.3]}
	}
}

In [11]:
f = open('configs', 'w')
count = 0

def get_name(config):
	global count
	count += 1
	f.write(str(count) + '_' + str(config) + '\n')
	f.flush()
	return 'lr'+str(config['lr'])+'_hidden_size'+str(config['hidden_size'])+'_embed_size'+str(config['embed_size'])+'_n_layers'+str(config['n_layers'])+'_type'+str(config['type'])+'_dropout'+str(config['dropout'])

# 1: Define objective/training function
def objective(config):
	translator = Translator('guj', hidden_size=config.hidden_size, embed_size=config.embed_size, n_layers=config.n_layers, type=config.type, dropout=config.dropout)
	dump = 'trans'+'_'+str(config.hidden_size)+'_'+str(config.embed_size)+'_'+str(config.n_layers)+'_'+str(config.type)+'_'+str(config.dropout)
	train_loss, train_acc, valid_loss, valid_acc = translator.train(epoch=config.epochs, print_every=10000, learning_rate=config.lr, dumpName=dump, log=True, wandb=wandb)

	# for i in range(config.epochs):
	# 	wandb.log({'train_loss': train_loss[i], 'train_accuracy': train_acc[i], 'val_loss': valid_loss[i], 'val_accuracy': valid_acc[i]})

def main():
	run = wandb.init(project='rnn-first-proj')
	objective(wandb.config)
	run.name = get_name(wandb.config)
	run.finish()

# 2: Define the search space
sweep_configuration = sweep_configuration_new

# 3: Start the sweep
sweep_id = wandb.sweep(sweep=sweep_configuration, project='rnn-first-proj')
wandb.agent(sweep_id, function=main, count=10)

f.close()

Create sweep with ID: ohi6vwf6
Sweep URL: https://wandb.ai/mantra7/rnn-first-proj/sweeps/ohi6vwf6


[34m[1mwandb[0m: Agent Starting Run: 8ggf93de with config:
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	n_layers: 1
[34m[1mwandb[0m: 	type: gru
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
  warn("The `IPython.html` package has been deprecated since IPython 4.0. "


Epoch:  0


 20%|█▉        | 10001/51199 [15:15<57:26, 11.95it/s] 

15m 15s (- 0m 0s) (10000 100%) 1.8664


 39%|███▉      | 20001/51199 [32:26<46:19, 11.22it/s]  

32m 26s (- -17m 46s) (20000 200%) 1.4376


 59%|█████▊    | 30001/51199 [48:25<36:19,  9.73it/s]  

48m 25s (- -33m 42s) (30000 300%) 1.3211


 78%|███████▊  | 40000/51199 [1:02:22<14:35, 12.80it/s]

62m 22s (- -47m 12s) (40000 400%) 1.2439


 98%|█████████▊| 50001/51199 [1:15:43<01:23, 14.40it/s]

75m 43s (- -61m 25s) (50000 500%) 1.2068


100%|██████████| 51199/51199 [1:17:16<00:00, 11.04it/s]


Train Loss:  1.409788341056522 Valid Loss:  1.463964366796398 Train Acc:  0.08394695208890798 Valid Acc:  0.1203907203907204
Epoch:  1


 20%|█▉        | 10002/51199 [13:22<57:14, 12.00it/s] 

96m 37s (- 0m 0s) (10000 100%) 1.1896


 39%|███▉      | 20002/51199 [27:44<44:39, 11.64it/s]  

111m 0s (- -56m 29s) (20000 200%) 1.1445


 59%|█████▊    | 30002/51199 [41:18<31:11, 11.33it/s]

124m 34s (- -84m 57s) (30000 300%) 1.1189


 78%|███████▊  | 40000/51199 [55:04<16:00, 11.66it/s]

138m 20s (- -104m 14s) (40000 400%) 1.0979


 98%|█████████▊| 50002/51199 [1:08:52<01:29, 13.43it/s]

152m 7s (- -122m 18s) (50000 500%) 1.0776


100%|██████████| 51199/51199 [1:10:27<00:00, 12.11it/s]


Train Loss:  1.1241400061020344 Valid Loss:  1.32058591365232 Train Acc:  0.1175608898611301 Valid Acc:  0.1536019536019536
Epoch:  2


 20%|█▉        | 10002/51199 [13:52<1:02:17, 11.02it/s]

173m 50s (- 0m 0s) (10000 100%) 1.0738


 39%|███▉      | 20002/51199 [28:01<43:29, 11.95it/s]  

187m 59s (- -94m 0s) (20000 200%) 1.0526


 59%|█████▊    | 30001/51199 [42:14<30:11, 11.70it/s]

202m 12s (- -135m 11s) (30000 300%) 1.0375


 78%|███████▊  | 40000/51199 [56:20<15:31, 12.02it/s]

216m 18s (- -163m 46s) (40000 400%) 1.0176


 98%|█████████▊| 50002/51199 [1:10:23<01:32, 12.93it/s]

230m 21s (- -185m 42s) (50000 500%) 1.0057


100%|██████████| 51199/51199 [1:12:01<00:00, 11.85it/s]


Train Loss:  1.0360428753119988 Valid Loss:  1.3194549374236875 Train Acc:  0.1358620285552452 Valid Acc:  0.1711843711843712
Epoch:  3


 20%|█▉        | 10002/51199 [14:31<1:03:55, 10.74it/s]

252m 35s (- 0m 0s) (10000 100%) 1.0123


 39%|███▉      | 20001/51199 [28:42<44:15, 11.75it/s]  

266m 45s (- -134m 37s) (20000 200%) 0.9927


 59%|█████▊    | 30001/51199 [43:06<29:42, 11.89it/s]

281m 10s (- -188m 33s) (30000 300%) 0.9847


 78%|███████▊  | 40000/51199 [57:27<16:52, 11.06it/s]

295m 30s (- -222m 21s) (40000 400%) 0.9771


 98%|█████████▊| 50000/51199 [1:11:48<01:32, 13.01it/s]

309m 51s (- -248m 6s) (50000 500%) 0.9772


100%|██████████| 51199/51199 [1:13:28<00:00, 11.61it/s]


Train Loss:  0.9872075373473819 Valid Loss:  1.30166504502442 Train Acc:  0.15894841696126877 Valid Acc:  0.18437118437118438
Epoch:  4


 20%|█▉        | 10001/51199 [14:32<1:01:27, 11.17it/s]

332m 26s (- 0m 0s) (10000 100%) 0.9865


 39%|███▉      | 20002/51199 [28:56<44:17, 11.74it/s]  

346m 50s (- -174m 34s) (20000 200%) 0.9627


 59%|█████▊    | 30002/51199 [43:40<32:19, 10.93it/s]  

361m 34s (- -242m 56s) (30000 300%) 0.9640


 78%|███████▊  | 40000/51199 [58:26<16:01, 11.65it/s]

376m 20s (- -283m 44s) (40000 400%) 0.9444


 98%|█████████▊| 50000/51199 [1:12:54<01:35, 12.58it/s]

390m 48s (- -313m 20s) (50000 500%) 0.9590


100%|██████████| 51199/51199 [1:14:46<00:00, 11.41it/s]


Train Loss:  0.9631800804805722 Valid Loss:  1.2654701093177656 Train Acc:  0.15431942030117776 Valid Acc:  0.1838827838827839
Epoch:  5


 20%|█▉        | 10001/51199 [15:10<1:05:02, 10.56it/s]

414m 20s (- 0m 0s) (10000 100%) 0.9569


 39%|███▉      | 20002/51199 [30:19<50:00, 10.40it/s]  

429m 28s (- -215m 15s) (20000 200%) 0.9327


 59%|█████▊    | 30001/51199 [45:16<34:12, 10.33it/s]  

444m 26s (- -297m 42s) (30000 300%) 0.9281


 78%|███████▊  | 40000/51199 [1:00:21<16:20, 11.43it/s]

459m 31s (- -345m 21s) (40000 400%) 0.9297


 98%|█████████▊| 50001/51199 [1:15:19<01:39, 12.09it/s]

474m 29s (- -380m 24s) (50000 500%) 0.9333


100%|██████████| 51199/51199 [1:17:08<00:00, 11.06it/s]


Train Loss:  0.9358756375364051 Valid Loss:  1.250145947802198 Train Acc:  0.17424168440789858 Valid Acc:  0.1877899877899878
Epoch:  6


 20%|█▉        | 10000/51199 [15:08<1:17:53,  8.82it/s]

497m 54s (- 0m 0s) (10000 100%) 0.9469


 39%|███▉      | 20001/51199 [30:30<47:30, 10.95it/s]  

513m 16s (- -257m 21s) (20000 200%) 0.9281


 59%|█████▊    | 30000/51199 [45:31<51:24,  6.87it/s]  

528m 17s (- -353m 48s) (30000 300%) 0.9282


 78%|███████▊  | 40001/51199 [1:00:41<16:04, 11.61it/s]

543m 27s (- -408m 24s) (40000 400%) 0.9096


 98%|█████████▊| 50000/51199 [1:16:12<01:51, 10.71it/s]

558m 58s (- -448m 48s) (50000 500%) 0.9242


100%|██████████| 51199/51199 [1:18:06<00:00, 10.92it/s]


Train Loss:  0.9270777948276003 Valid Loss:  1.2460800375839438 Train Acc:  0.16855797964804 Valid Acc:  0.19023199023199022
Epoch:  7


 20%|█▉        | 10001/51199 [15:37<1:09:05,  9.94it/s]

582m 48s (- 0m 0s) (10000 100%) 0.9286


 39%|███▉      | 20000/51199 [31:07<49:48, 10.44it/s]  

598m 19s (- -300m 50s) (20000 200%) 0.9235


 59%|█████▊    | 30001/51199 [47:11<33:03, 10.69it/s]  

614m 22s (- -410m 24s) (30000 300%) 0.9114


 78%|███████▊  | 40000/51199 [1:02:57<18:30, 10.08it/s]

630m 8s (- -473m 23s) (40000 400%) 0.9116


 98%|█████████▊| 50001/51199 [1:18:26<01:33, 12.86it/s]

645m 37s (- -517m 29s) (50000 500%) 0.9177


100%|██████████| 51199/51199 [1:20:11<00:00, 10.64it/s]


Train Loss:  0.9182100723390434 Valid Loss:  1.2605803809905372 Train Acc:  0.18191761557842925 Valid Acc:  0.20757020757020758
Epoch:  8


 20%|█▉        | 10000/51199 [15:59<1:06:12, 10.37it/s]

670m 25s (- 0m 0s) (10000 100%) 0.9138


 39%|███▉      | 20002/51199 [32:00<50:36, 10.27it/s]  

686m 26s (- -344m 46s) (20000 200%) 0.9006


 59%|█████▊    | 30001/51199 [47:39<36:49,  9.59it/s]  

702m 5s (- -469m 56s) (30000 300%) 0.9054


 78%|███████▊  | 40001/51199 [1:03:56<16:28, 11.33it/s]

718m 22s (- -539m 13s) (40000 400%) 0.8919


 98%|█████████▊| 50001/51199 [1:19:40<01:45, 11.36it/s]

734m 7s (- -588m 42s) (50000 500%) 0.8932


100%|██████████| 51199/51199 [1:21:38<00:00, 10.45it/s]


Train Loss:  0.9006901539362034 Valid Loss:  1.2752275068681318 Train Acc:  0.18514033477216352 Valid Acc:  0.20976800976800977
Epoch:  9


 20%|█▉        | 10001/51199 [15:24<1:06:10, 10.38it/s]

757m 54s (- 0m 0s) (10000 100%) 0.9126


 39%|███▉      | 20002/51199 [31:14<47:40, 10.90it/s]  

773m 44s (- -387m 7s) (20000 200%) 0.9016


 59%|█████▊    | 30001/51199 [46:27<32:32, 10.86it/s]

788m 58s (- -526m 0s) (30000 300%) 0.8914


 78%|███████▊  | 40000/51199 [1:02:03<16:33, 11.27it/s]

804m 34s (- -604m 34s) (40000 400%) 0.8773


 98%|█████████▊| 50001/51199 [1:17:36<01:46, 11.28it/s]

820m 6s (- -657m 54s) (50000 500%) 0.8905


100%|██████████| 51199/51199 [1:19:24<00:00, 10.75it/s]


Train Loss:  0.8941795092700455 Valid Loss:  1.2959070226648353 Train Acc:  0.19045293853395573 Valid Acc:  0.20122100122100123


0,1
train_accuracy,▁▃▄▆▆▇▇▇██
train_loss,█▄▃▂▂▂▁▁▁▁
val_accuracy,▁▄▅▆▆▆▆██▇
val_loss,█▃▃▃▂▁▁▁▂▃

0,1
train_accuracy,0.19045
train_loss,0.89418
val_accuracy,0.20122
val_loss,1.29591


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: alt54tnf with config:
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	embed_size: 16
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_size: 512
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	n_layers: 1
[34m[1mwandb[0m: 	type: gru
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch:  0


 20%|█▉        | 10001/51199 [12:34<56:34, 12.14it/s] 

12m 34s (- 0m 0s) (10000 100%) 1.9339


 39%|███▉      | 20001/51199 [25:16<42:13, 12.32it/s]  

25m 15s (- -13m 22s) (20000 200%) 1.4452


 59%|█████▊    | 30002/51199 [38:30<28:53, 12.23it/s]

38m 30s (- -26m 19s) (30000 300%) 1.3225


 78%|███████▊  | 40000/51199 [51:59<13:37, 13.70it/s]

51m 59s (- -39m 0s) (40000 400%) 1.2431


 98%|█████████▊| 50002/51199 [1:05:09<01:29, 13.38it/s]

65m 9s (- -53m 52s) (50000 500%) 1.1909


100%|██████████| 51199/51199 [1:06:43<00:00, 12.79it/s]


Train Loss:  1.4208558011193069 Valid Loss:  1.3843910971840658 Train Acc:  0.091329908787281 Valid Acc:  0.14456654456654458
Epoch:  1


 20%|█▉        | 10002/51199 [13:19<1:01:11, 11.22it/s]

85m 38s (- 0m 0s) (10000 100%) 1.1580


 39%|███▉      | 20002/51199 [26:37<42:31, 12.23it/s]  

98m 56s (- -50m 31s) (20000 200%) 1.1180


 59%|█████▊    | 30001/51199 [40:05<27:37, 12.79it/s]

112m 24s (- -75m 3s) (30000 300%) 1.0837


 78%|███████▊  | 40001/51199 [53:55<14:18, 13.04it/s]

126m 15s (- -95m 18s) (40000 400%) 1.0643


 98%|█████████▊| 50002/51199 [1:07:33<01:32, 12.90it/s]

139m 52s (- -112m 6s) (50000 500%) 1.0449


100%|██████████| 51199/51199 [1:09:08<00:00, 12.34it/s]


Train Loss:  1.0921851949318195 Valid Loss:  1.3006725045787546 Train Acc:  0.1323658665208305 Valid Acc:  0.16288156288156289
Epoch:  2


 20%|█▉        | 10002/51199 [13:57<1:03:19, 10.84it/s]

161m 10s (- 0m 0s) (10000 100%) 1.0327


 39%|███▉      | 20001/51199 [27:38<40:55, 12.70it/s]  

174m 51s (- -88m 34s) (20000 200%) 1.0153


 59%|█████▊    | 30001/51199 [41:28<30:39, 11.52it/s]

188m 42s (- -126m 11s) (30000 300%) 0.9921


 78%|███████▊  | 40000/51199 [55:11<14:45, 12.64it/s]

202m 25s (- -152m 11s) (40000 400%) 0.9802


 98%|█████████▊| 50002/51199 [1:09:27<01:40, 11.95it/s]

216m 40s (- -174m 39s) (50000 500%) 0.9739


100%|██████████| 51199/51199 [1:11:04<00:00, 12.01it/s]


Train Loss:  0.9976526574580737 Valid Loss:  1.2790830805479243 Train Acc:  0.1389870895915936 Valid Acc:  0.17094017094017094
Epoch:  3


 20%|█▉        | 10002/51199 [13:57<1:04:34, 10.63it/s]

237m 54s (- 0m 0s) (10000 100%) 0.9848


 39%|███▉      | 20001/51199 [28:32<47:06, 11.04it/s]  

252m 29s (- -127m 45s) (20000 200%) 0.9595


 59%|█████▊    | 30001/51199 [42:36<31:05, 11.36it/s]

266m 33s (- -178m 17s) (30000 300%) 0.9462


 78%|███████▊  | 40001/51199 [57:03<14:57, 12.47it/s]

281m 0s (- -211m 14s) (40000 400%) 0.9404


 98%|█████████▊| 50001/51199 [1:11:06<01:26, 13.85it/s]

295m 3s (- -237m 57s) (50000 500%) 0.9393


100%|██████████| 51199/51199 [1:12:45<00:00, 11.73it/s]


Train Loss:  0.9530271473321295 Valid Loss:  1.2655288938492064 Train Acc:  0.1744955956171019 Valid Acc:  0.1916971916971917
Epoch:  4


 20%|█▉        | 10001/51199 [14:25<1:04:52, 10.58it/s]

316m 52s (- 0m 0s) (10000 100%) 0.9416


 39%|███▉      | 20001/51199 [30:08<43:40, 11.91it/s]  

332m 35s (- -167m 42s) (20000 200%) 0.9204


 59%|█████▊    | 30000/51199 [44:49<34:23, 10.27it/s]

347m 17s (- -232m 28s) (30000 300%) 0.9085


 78%|███████▊  | 40000/51199 [59:24<16:53, 11.05it/s]

361m 51s (- -272m 36s) (40000 400%) 0.8966


 98%|█████████▊| 50002/51199 [1:14:25<01:42, 11.70it/s]

376m 52s (- -302m 29s) (50000 500%) 0.8975


100%|██████████| 51199/51199 [1:16:14<00:00, 11.19it/s]


Train Loss:  0.9119909261078772 Valid Loss:  1.2318395146520147 Train Acc:  0.18769897849567374 Valid Acc:  0.20854700854700856
Epoch:  5


 20%|█▉        | 10001/51199 [15:12<1:03:33, 10.80it/s]

399m 37s (- 0m 0s) (10000 100%) 0.9062


 39%|███▉      | 20001/51199 [30:14<46:46, 11.12it/s]  

414m 39s (- -208m 40s) (20000 200%) 0.8836


 59%|█████▊    | 30001/51199 [45:05<31:20, 11.27it/s]

429m 30s (- -287m 39s) (30000 300%) 0.8814


 78%|███████▊  | 40001/51199 [1:00:24<35:29,  5.26it/s]

444m 49s (- -334m 23s) (40000 400%) 0.8709


 97%|█████████▋| 49566/51199 [1:18:16<02:34, 10.55it/s]

In [None]:
# trans = Translator('guj', hidden_size=128, embed_size=32, n_layers=2, type='gru', dropout=0.2)
# trans.train(n_iters=1, print_every=1000, learning_rate=0.005, dumpName='lol', rand=True)

Epoch:  0


100%|██████████| 1/1 [00:00<00:00, 22.38it/s]


([4.174344635009765], [0.0], [4.133323794261294], [0.0])

In [None]:
trans.translate('a')

'સરાાાાા'

In [None]:
# # create a seq2seq model using 2 RNNs
# class Seq2Seq(nn.Module):
# 	def __init__(self, input_size, hidden_size, output_size, n_layers=1):
# 		super(Seq2Seq, self).__init__()
# 		self.input_size = input_size
# 		self.hidden_size = hidden_size
# 		self.output_size = output_size
# 		self.n_layers = n_layers

# 		# encoder and decoder
# 		self.encoder = nn.RNN(input_size, hidden_size, n_layers)
# 		self.decoder = nn.RNN(hidden_size, hidden_size, n_layers)

# 		# linear layer to get output
# 		self.linear = nn.Linear(hidden_size, output_size)

# 	def forward(self, input, hidden):
# 		# encoder
# 		output, hidden = self.encoder(input, hidden)
		
# 		# decoder
# 		output, hidden = self.decoder(output, hidden)
		
# 		# get output
# 		output = self.linear(output)
# 		return output, hidden
	
# 	def predict(self, input, inp_lang, out_lang):
# 		out, hidden = self.forward(inp_lang.one_hot_encode(input).unsqueeze(1), self.init_hidden(1))
# 		return out_lang.decode_one_hot(out)
	
# 	def init_hidden(self, batch_size):
# 		return Variable(torch.zeros(self.n_layers, batch_size, self.hidden_size))

In [None]:
# class Translator:
# 	def __init__(self, lang):
# 		train_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_train.csv')
# 		test_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_test.csv')
# 		valid_data = pd.read_csv(f'aksharantar_sampled/{lang}/{lang}_valid.csv')

# 		train_data.columns = ['input_seq', 'target_seq']
# 		test_data.columns = ['input_seq', 'target_seq']
# 		valid_data.columns = ['input_seq', 'target_seq']

# 		self.inp_lang = Lang(train_data['input_seq'])
# 		self.out_lang = Lang(train_data['target_seq'])

# 		self.model = Seq2Seq(self.inp_lang.n_chars, 10, self.out_lang.n_chars, 1)
# 		self.criterion = nn.CrossEntropyLoss()
# 		self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)

# 		train_dataset = AksharantarDataset(train_data, self.inp_lang, self.out_lang)
# 		test_dataset = AksharantarDataset(test_data, self.inp_lang, self.out_lang)
# 		valid_dataset = AksharantarDataset(valid_data, self.inp_lang, self.out_lang)

# 		self.train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
# 		self.test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)
# 		self.valid_dataloader = DataLoader(valid_dataset, batch_size=1, shuffle=True)

# 	def translate(self, word):
# 		return self.model.predict(word, self.inp_lang, self.out_lang)
	
# 	def train_one(self, inp, target):
# 		# zero gradients
# 		self.optimizer.zero_grad()
		
# 		# initialize hidden layer
# 		hidden = self.model.init_hidden(1)
		
# 		# get output
# 		output, hidden = self.model.forward(inp, hidden)
		
# 		mx_len = min(len(output), len(target))

# 		if(len(output) != len(inp)):
# 			print('lol')

# 		# append output and target with 'Z' to make them of mx_len
# 		while(len(output) < mx_len):
# 			output = torch.cat((output, self.out_lang.one_hot_encode_char('Z')), 0)

# 		while(len(target) < mx_len):
# 			target = torch.cat((target, self.out_lang.one_hot_encode_char('Z')), 0)
		
# 		# calculate loss 
# 		loss = self.criterion(torch.flatten(output[:mx_len], 0, 1), torch.flatten(target[:mx_len], 0, 1).max(1)[1])
			
# 		# backpropagate
# 		loss.backward()
		
# 		# update weights
# 		self.optimizer.step()

# 		return loss.data.item() / len(output)
	
# 	def train_epoch(self, data_loader, print_every=100):
# 		loss = 0
# 		for i_batch, sample_batched in tqdm(enumerate(data_loader)):
# 			loss += self.train_one(sample_batched['input_seq'][0], sample_batched['target_seq'][0])
# 			if i_batch % print_every == print_every-1:
# 				print(' Train Loss: ', loss / (i_batch+1))
# 				# print(' Valid Loss: ', self.calc_loss_full(self.valid_dataloader))
# 				# print(' Train Accuracy: ', self.calc_accuracy(self.train_dataloader))
# 				# print(' Valid Accuracy: ', self.calc_accuracy(self.valid_dataloader))
# 		return self.calc_loss_full(data_loader)
	
# 	def train(self, epochs, print_every=100):
# 		losses = []
# 		for epoch in range(epochs):
# 			print('Epoch ', epoch + 1)
# 			loss = self.train_epoch(self.train_dataloader, print_every)
# 			losses.append(loss)
# 		return losses
	
# 	def calc_loss_full(self, data_loader):
# 		loss = 0
# 		for i_batch, sample_batched in tqdm(enumerate(data_loader)):
# 			inp = sample_batched['input_seq'][0]
# 			target = sample_batched['target_seq'][0]
# 			hidden = self.model.init_hidden(1)
# 			output, hidden = self.model.forward(inp, hidden)
# 			mx_len = min(len(output), len(target))
# 			while(len(output) < mx_len):
# 				output = torch.cat((output, self.out_lang.one_hot_encode_char('Z')), 0)
# 			while(len(target) < mx_len):
# 				target = torch.cat((target, self.out_lang.one_hot_encode_char('Z')), 0)
# 			loss += self.criterion(torch.flatten(output[:mx_len], 0, 1), torch.flatten(target[:mx_len], 0, 1).max(1)[1])
# 		return loss / len(data_loader)
	
# 	def calc_accuracy(self, dataset):
# 		return np.sum([(self.translate(sample_batched['input_seq']) ==  sample_batched['target_seq']) for sample_batched in dataset]) / len(dataset)

In [None]:
# guj_trans = Translator('guj')
# guj_trans.train(10, 1000)