In [1]:
import datetime

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

import pandas as pd
from itertools import product
from IPython.display import clear_output

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
data_dir = 'drive/My Drive/'
train_lang = 'en'

In [5]:
class DatasetSeq(Dataset):
    def __init__(self, data_dir, train_lang='en'):
	#open file
        with open(data_dir + train_lang + '.train', 'r') as f:
            train = f.read().split('\n\n')

        # delete extra tag markup
        train = [x for x in train if not '_ ' in x]
	    #init vocabs of tokens for encoding {<str> token: <int> id}
        self.target_vocab = {} # {p: 1, a: 2, r: 3, pu: 4}
        self.word_vocab = {} # {cat: 1, sat: 2, on: 3, mat: 4, '.': 5}
        self.char_vocab = {} # {c: 1, a: 2, t: 3, ' ': 4, s: 5}
	    
        # Cat sat on mat. -> [1, 2, 3, 4, 5]
        # p    a  r  p pu -> [1, 2, 3, 1, 4]
        # chars  -> [1, 2, 3, 4, 5, 2, 3, 4]

	    #init encoded sequences lists (processed data)
        self.encoded_sequences = []
        self.encoded_targets = []
        self.encoded_char_sequences = []
        # n=1 because first value is padding
        n_word = 1
        n_target = 1
        n_char = 1
        for line in train:
            sequence = []
            target = []
            chars = []
            for item in line.split('\n'):
                if item != '':
                    word, label = item.split(' ')

                    if self.word_vocab.get(word) is None:
                        self.word_vocab[word] = n_word
                        n_word += 1
                    if self.target_vocab.get(label) is None:
                        self.target_vocab[label] = n_target
                        n_target += 1
                    for char in word:
                        if self.char_vocab.get(char) is None:
                            self.char_vocab[char] = n_char
                            n_char += 1
                    sequence.append(self.word_vocab[word])
                    target.append(self.target_vocab[label])
                    chars.append([self.char_vocab[char] for char in word])
            self.encoded_sequences.append(sequence)
            self.encoded_targets.append(target)
            self.encoded_char_sequences.append(chars)

    def __len__(self):
        return len(self.encoded_sequences)

    def __getitem__(self, index):
        return {
            'data': self.encoded_sequences[index], # [1, 2, 3, 4, 6] len=5
            'char': self.encoded_char_sequences[index],# [[1,2,3], [4,5], [1,2], [2,6,5,4], []] len=5
            'target': self.encoded_targets[index], #  (1)
        }

In [6]:
dataset = DatasetSeq(data_dir)

In [7]:
#padding
# seq1 = [1, 2, 3, 4]
# seq2 = [9, 7, 6, 4, 3, 7, 5]
# pad seq1 equal seq2
# seq1 = [1, 2, 3, 4, 0, 0, 0]
# concat(seq1, seq2) [[1, 2, 3, 4, 0, 0, 0],
#                     [9, 7, 6, 4, 3, 7, 5]]

In [8]:
def collate_fn(input_data):
    data = []
    chars = []
    targets = []
    max_len = 0
    for item in input_data:
        if len(item['data']) > max_len:
            max_len = len(item['data'])
        data.append(torch.as_tensor(item['data']))
        chars.append(item['char'])
        targets.append(torch.as_tensor(item['target']))
    chars_seq = [[torch.as_tensor([0]) for _ in range(len(input_data))] for _ in range(max_len)]
    for j in range(len(input_data)):
        for i in range(max_len):
            if len(chars[j]) > i:
                chars_seq[i][j] = torch.as_tensor(chars[j][i])
    for j in range(max_len):
        chars_seq[j] = pad_sequence(chars_seq[j], batch_first=True, padding_value=0)
    data = pad_sequence(data, batch_first=True, padding_value=0)
    targets = pad_sequence(targets, batch_first=True, padding_value=0)
    return {'data': data, 'chars': chars_seq, 'target': targets}

In [9]:
class CharRNN(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim):
        super().__init__()
        self.char_emb = nn.Embedding(vocab_size, emb_dim)
        self.rnn = nn.GRU(emb_dim, hidden_dim, batch_first=True)

    def forward(self, x):
        emb = self.char_emb(x) # B x T x Emb_dim
        _, out = self.rnn(emb)
        # _: B x T x Hidden 
        # out: 1 x B x Hidden

        return out.transpose(0, 1) # B x 1 x Hidden

# #TODO try to use other RNN archicetures, f.e. RNN and LSTM

In [10]:
class RNNPredictor(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, n_classes,
                 char_vocab, char_emb, char_hidden):
        super().__init__()
        #TODO try to use other RNN archicetures, f.e. RNN and LSTM
        self.word_emb = nn.Embedding(vocab_size, emb_dim)
        # batch_first = False: T x B x Vec
        # batch_first = True: B x T x Vec
        self.rnn = nn.RNN(emb_dim + char_hidden, hidden_dim, batch_first=True) 
        self.clf = nn.Linear(hidden_dim, n_classes)
        self.do = nn.Dropout(0.1)
        self.hidden_dim = hidden_dim
        self.char_rnn = CharRNN(char_vocab, char_emb, char_hidden)

    def forward(self, x, chars):
        emb = self.word_emb(x)
        char_features = [self.char_rnn(c.to(x.device)) for c in chars]
        char_features = torch.cat(char_features, dim=1) # конкатенация по времени B x T x Char_hid
        emb = torch.cat((emb, char_features), dim=-1) # конкатенация векторов
        hidden, _ = self.rnn(emb)

        return self.clf(self.do(hidden))


class GRUPredictor(RNNPredictor):
    def __init__(self, vocab_size, emb_dim, hidden_dim, n_classes,
                 char_vocab, char_emb, char_hidden):
        super().__init__(vocab_size, emb_dim, hidden_dim, n_classes,
                 char_vocab, char_emb, char_hidden)
        self.rnn = nn.GRU(emb_dim + char_hidden, hidden_dim, batch_first=True) 

class LSTMPredictor(RNNPredictor):
    def __init__(self, vocab_size, emb_dim, hidden_dim, n_classes,
                 char_vocab, char_emb, char_hidden):
        super().__init__(vocab_size, emb_dim, hidden_dim, n_classes,
                 char_vocab, char_emb, char_hidden)
        self.rnn = nn.LSTM(emb_dim + char_hidden, hidden_dim, batch_first=True)         


In [11]:
#hyper params
vocab_size = len(dataset.word_vocab) + 1
n_classes = len(dataset.target_vocab) + 1
n_chars = len(dataset.char_vocab) + 1
#TODO try to use other model parameters
emb_dim = 256
hidden = 256
char_hid = 64
char_emb = 32
n_epochs = 10
batch_size = 64
cuda_device = 0
batch_size = 100
device = f'cuda:{cuda_device}' if cuda_device != -1 else 'cpu'

In [12]:
models = []
models.append(
     {'name':'GRU-model',
      'model':GRUPredictor(vocab_size, emb_dim, hidden, n_classes, n_chars, char_emb, char_hid).to(device),
      'loss_func':nn.CrossEntropyLoss()
            }
)

models.append(
     {'name':'RNN-model',
      'model':RNNPredictor(vocab_size, emb_dim, hidden, n_classes, n_chars, char_emb, char_hid).to(device),
      'loss_func':nn.CrossEntropyLoss()
            }
)

models.append(
     {'name':'LSTM-model',
      'model':LSTMPredictor(vocab_size, emb_dim, hidden, n_classes, n_chars, char_emb, char_hid).to(device),
      'loss_func':nn.CrossEntropyLoss()
            }
)


for mdl in models:
  mdl['optim'] = torch.optim.Adam(mdl['model'].parameters(), lr=0.001)
  mdl['model'].train()
  print (mdl['name'], mdl['model'])

GRU-model GRUPredictor(
  (word_emb): Embedding(29588, 256)
  (rnn): GRU(320, 256, batch_first=True)
  (clf): Linear(in_features=256, out_features=18, bias=True)
  (do): Dropout(p=0.1, inplace=False)
  (char_rnn): CharRNN(
    (char_emb): Embedding(168, 32)
    (rnn): GRU(32, 64, batch_first=True)
  )
)
RNN-model RNNPredictor(
  (word_emb): Embedding(29588, 256)
  (rnn): RNN(320, 256, batch_first=True)
  (clf): Linear(in_features=256, out_features=18, bias=True)
  (do): Dropout(p=0.1, inplace=False)
  (char_rnn): CharRNN(
    (char_emb): Embedding(168, 32)
    (rnn): GRU(32, 64, batch_first=True)
  )
)
LSTM-model LSTMPredictor(
  (word_emb): Embedding(29588, 256)
  (rnn): LSTM(320, 256, batch_first=True)
  (clf): Linear(in_features=256, out_features=18, bias=True)
  (do): Dropout(p=0.1, inplace=False)
  (char_rnn): CharRNN(
    (char_emb): Embedding(168, 32)
    (rnn): GRU(32, 64, batch_first=True)
  )
)


In [13]:
p_chekpoint = 100

n_steps=len(dataset)//batch_size

df_train_step = pd.DataFrame(columns=['epoch', 'step'], data=product(range(n_epochs), range(0, n_steps, p_chekpoint)))
#df_train_step.set_index(['epoch'], inplace=True)
df_train_step.set_index(['epoch', 'step'], inplace=True)
df_train_step

for mdl in models:
  model = mdl['model']
  optim = mdl['optim']
  loss_func = mdl['loss_func']

  mdl_name = mdl['name']
  #f_train_step[mdl_name+'_loss']=[None]*
  df_train_step.insert(df_train_step.shape[1], mdl_name+'_loss', None)
  df_train_step.insert(df_train_step.shape[1], mdl_name+'_time', None)

  start = datetime.datetime.now()
  for epoch in range(n_epochs):
      dataloader = DataLoader(dataset, 
                              batch_size, 
                              shuffle=True, 
                              collate_fn=collate_fn,
                              drop_last = True,
                              )
      for i, batch in enumerate(dataloader):
          optim.zero_grad()

          predict = model(batch['data'].to(device), batch['chars'])
          loss = loss_func(predict.view(-1, n_classes),
                          batch['target'].to(device).view(-1), 
                          )
          loss.backward()
          optim.step()
          if i % p_chekpoint == 0:
              clear_output(wait=True)
              df_train_step.loc[(epoch, i)][mdl_name+'_loss'] = loss.item()
              df_train_step.loc[(epoch, i)][mdl_name+'_time'] = datetime.datetime.now()-start
              display(df_train_step)
    
      torch.save(model.state_dict(), f'./rnn_chkpt__{mdl_name}_{epoch}.pth')

Unnamed: 0_level_0,Unnamed: 1_level_0,GRU-model_loss,GRU-model_time,RNN-model_loss,RNN-model_time,LSTM-model_loss,LSTM-model_time
epoch,step,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,3.193892,0:00:01.202440,3.127689,0:00:00.091872,2.90793,0:00:00.137730
0,100,0.248736,0:00:19.517199,0.158152,0:00:12.805543,0.253652,0:00:14.084355
0,200,0.190125,0:00:32.142616,0.185377,0:00:25.309484,0.22073,0:00:26.744361
1,0,0.122578,0:00:33.754095,0.153994,0:00:26.724815,0.184448,0:00:28.218634
1,100,0.091903,0:00:46.595711,0.128943,0:00:38.975224,0.102279,0:00:41.217333
1,200,0.10671,0:01:00.723328,0.109267,0:00:53.391768,0.070369,0:00:55.012668
2,0,0.088397,0:01:02.347290,0.041755,0:00:55.089980,0.078411,0:00:56.484365
2,100,0.064719,0:01:15.847579,0.084755,0:01:07.517807,0.099119,0:01:09.413309
2,200,0.082728,0:01:28.424689,0.047379,0:01:20.106898,0.074203,0:01:21.961460
3,0,0.062054,0:01:29.945284,0.059451,0:01:21.691853,0.06743,0:01:23.289928


In [18]:
#example

phrase = 'What can you tell me about neural networks ?'
words = phrase.split(' ') 
tokens = [dataset.word_vocab[w] for w in words]
chars = [torch.tensor([dataset.char_vocab[c] for c in w]).unsqueeze(0).to(device) for w in words]

start = datetime.datetime.now()
for mdl in models:
  model = mdl['model']
  print("predict with ", mdl['name'])
  start = datetime.datetime.now()
  with torch.no_grad():
      model.eval()
      predict = model(torch.tensor(tokens).unsqueeze(0).to(device), chars) # 1 x T x N_classes
      labels = torch.argmax(predict, dim=-1).squeeze().cpu().detach().tolist()
      end = datetime.datetime.now() - start

  target_labels = list(dataset.target_vocab.keys())
  print([target_labels[l-1] for l in labels])
  print("Predicted in", datetime.datetime.now()-start, '\n')

predict with  GRU-model
['PRON', 'AUX', 'PRON', 'VERB', 'PRON', 'ADP', 'ADJ', 'NOUN', 'PUNCT']
Predicted in 0:00:00.006137 

predict with  RNN-model
['PRON', 'AUX', 'PRON', 'VERB', 'PRON', 'ADP', 'ADJ', 'NOUN', 'PUNCT']
Predicted in 0:00:00.002750 

predict with  LSTM-model
['PRON', 'AUX', 'PRON', 'VERB', 'PRON', 'ADP', 'ADJ', 'NOUN', 'PUNCT']
Predicted in 0:00:00.003884 



# #TODO try to use other model parameters


In [96]:
def create_models_with_params(p_params):
  emb_dim = p_params['emb_dim']
  hidden = p_params['hidden']
  char_emb = p_params['char_emb']
  char_hid = p_params['char_hid']

  models = []

  models.append(
      {'name':'GRU-model',
        'model':GRUPredictor(vocab_size, emb_dim, hidden, n_classes, n_chars, char_emb, char_hid).to(device),
        'loss_func':nn.CrossEntropyLoss()
              }
  )

  models.append(
      {'name':'RNN-model',
        'model':RNNPredictor(vocab_size, emb_dim, hidden, n_classes, n_chars, char_emb, char_hid).to(device),
        'loss_func':nn.CrossEntropyLoss()
              }
  )

  models.append(
      {'name':'LSTM-model',
        'model':LSTMPredictor(vocab_size, emb_dim, hidden, n_classes, n_chars, char_emb, char_hid).to(device),
        'loss_func':nn.CrossEntropyLoss()
              }
  )


  for mdl in models:
    mdl['optim'] = torch.optim.Adam(mdl['model'].parameters(), lr=0.001)
    mdl['model'].train()

  return (models)


In [127]:
import time

def train_with_params(p_params):

  n_epochs = p_params['n_epochs']
  batch_size = p_params['batch_size']

  p_chekpoint = 100
  n_steps = len(dataset)//batch_size
  p_checked_steps = list(range(0, n_steps, p_chekpoint))
  p_checked_steps.append(n_steps-1)

  models = create_models_with_params(p_params)
  #print (p_checked_steps)
  #time.sleep(10)

  df_train_step = pd.DataFrame(columns=['epoch', 'step'], data=product(range(n_epochs), p_checked_steps))
  #df_train_step.set_index(['epoch'], inplace=True)
  df_train_step.set_index(['epoch', 'step'], inplace=True)

  for mdl in models:
    model = mdl['model']
    optim = mdl['optim']
    loss_func = mdl['loss_func']

    mdl_name = mdl['name']
    df_train_step.insert(df_train_step.shape[1], mdl_name+'_loss', None)
    df_train_step.insert(df_train_step.shape[1], mdl_name+'_time', None)

    start = datetime.datetime.now()
    for epoch in range(n_epochs):
        dataloader = DataLoader(dataset, 
                                batch_size, 
                                shuffle=True, 
                                collate_fn=collate_fn,
                                drop_last = True,
                                )
        for i, batch in enumerate(dataloader):
            optim.zero_grad()

            predict = model(batch['data'].to(device), batch['chars'])
            loss = loss_func(predict.view(-1, n_classes),
                            batch['target'].to(device).view(-1), 
                            )
            loss.backward()
            optim.step()
            if (i % p_chekpoint == 0) or (i==n_steps-1):
                clear_output(wait=True)
                df_train_step.loc[(epoch, i)][mdl_name+'_loss'] = loss.item()
                df_train_step.loc[(epoch, i)][mdl_name+'_time'] = datetime.datetime.now()-start
                display(df_train_step)
        
            torch.save(model.state_dict(), f'./rnn_chkpt__{mdl_name}_{epoch}.pth')
    
  return(models, df_train_step)

In [128]:
params1 = {
    'emb_dim':128,
    'hidden':128,
    'char_hid':32,
    'char_emb':16,
    'n_epochs':5,
    'batch_size':50
         }
models1, df1 = train_with_params(params1)


Unnamed: 0_level_0,Unnamed: 1_level_0,GRU-model_loss,GRU-model_time,RNN-model_loss,RNN-model_time,LSTM-model_loss,LSTM-model_time
epoch,step,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,3.039315,0:00:00.095831,2.72506,0:00:00.066779,3.028796,0:00:00.071287
0,100,0.544996,0:00:11.988813,0.430627,0:00:11.478546,0.483895,0:00:11.582726
0,200,0.350741,0:00:23.743414,0.251834,0:00:22.951847,0.287818,0:00:23.353007
0,300,0.271577,0:00:35.384090,0.203385,0:00:34.222999,0.220455,0:00:35.433829
0,400,0.212486,0:00:47.489843,0.188354,0:00:46.973405,0.180906,0:00:48.482774
0,423,0.171084,0:00:50.042464,0.214683,0:00:49.573828,0.17792,0:00:51.146041
1,0,0.168799,0:00:50.163937,0.154384,0:00:49.721697,0.210726,0:00:51.273873
1,100,0.227883,0:01:03.176063,0.232015,0:01:01.308725,0.083444,0:01:02.833395
1,200,0.159651,0:01:14.682748,0.168696,0:01:12.266275,0.134459,0:01:14.566067
1,300,0.19347,0:01:26.263878,0.16792,0:01:22.958835,0.125301,0:01:25.519030


In [129]:
params2 = {
    'emb_dim':128,
    'hidden':128,
    'char_hid':32,
    'char_emb':16,
    'n_epochs':5,
    'batch_size':100
         }
models2, df2 = train_with_params(params2)

Unnamed: 0_level_0,Unnamed: 1_level_0,GRU-model_loss,GRU-model_time,RNN-model_loss,RNN-model_time,LSTM-model_loss,LSTM-model_time
epoch,step,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,3.03403,0:00:00.139864,3.211715,0:00:00.191399,2.940763,0:00:00.095111
0,100,0.42468,0:00:17.923698,0.427071,0:00:17.258502,0.293812,0:00:15.881462
0,200,0.255159,0:00:33.699505,0.274083,0:00:34.617552,0.273257,0:00:33.812059
0,211,0.239572,0:00:35.232081,0.238593,0:00:36.202610,0.266679,0:00:35.960275
1,0,0.116543,0:00:35.649792,0.216045,0:00:36.376841,0.149972,0:00:36.172681
1,100,0.165558,0:00:51.371329,0.223507,0:00:52.341978,0.191555,0:00:52.486704
1,200,0.119555,0:01:08.258113,0.140223,0:01:08.308561,0.173397,0:01:08.271320
1,211,0.133414,0:01:10.823971,0.152728,0:01:10.791060,0.111108,0:01:09.836582
2,0,0.195927,0:01:10.993641,0.118567,0:01:11.077296,0.123361,0:01:10.002208
2,100,0.115097,0:01:28.072689,0.145745,0:01:28.287371,0.139531,0:01:27.622541


In [130]:
params3 = {
    'emb_dim':16,
    'hidden':16,
    'char_hid':8,
    'char_emb':8,
    'n_epochs':5,
    'batch_size':100
         }
models3, df3 = train_with_params(params3)


Unnamed: 0_level_0,Unnamed: 1_level_0,GRU-model_loss,GRU-model_time,RNN-model_loss,RNN-model_time,LSTM-model_loss,LSTM-model_time
epoch,step,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,2.829763,0:00:00.434135,3.246931,0:00:00.324276,2.931488,0:00:00.102295
0,100,1.18076,0:00:12.805156,1.173164,0:00:12.681141,0.794211,0:00:12.497990
0,200,0.415626,0:00:26.352919,0.895554,0:00:25.208407,0.862722,0:00:24.737916
0,211,0.867672,0:00:27.524575,0.994369,0:00:26.291256,0.566258,0:00:25.892344
1,0,0.91011,0:00:27.623233,0.786917,0:00:26.405146,0.815184,0:00:26.007347
1,100,0.583334,0:00:40.351299,0.923742,0:00:38.603546,0.826715,0:00:38.076363
1,200,0.66404,0:00:52.772741,0.440387,0:00:50.850335,0.500231,0:00:51.604054
1,211,0.388813,0:00:54.095199,0.758145,0:00:52.348477,0.769255,0:00:52.643900
2,0,0.504345,0:00:54.249370,0.787524,0:00:52.446596,0.493501,0:00:52.771275
2,100,0.663982,0:01:06.552324,0.551559,0:01:05.831569,0.350689,0:01:05.066402


In [135]:
params4 = {
    'emb_dim':320,
    'hidden':320,
    'char_hid':128,
    'char_emb':64,
    'n_epochs':5,
    'batch_size':100
         }
models4, df4 = train_with_params(params4)


Unnamed: 0_level_0,Unnamed: 1_level_0,GRU-model_loss,GRU-model_time,RNN-model_loss,RNN-model_time,LSTM-model_loss,LSTM-model_time
epoch,step,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,2.694156,0:00:00.146078,3.212856,0:00:00.329527,2.867287,0:00:00.117051
0,100,0.24067,0:00:28.594018,0.191581,0:00:30.103578,0.182169,0:00:29.408637
0,200,0.124785,0:00:58.009449,0.120272,0:00:57.350324,0.173129,0:00:57.027461
0,211,0.126291,0:01:00.894159,0.152567,0:01:00.411495,0.097462,0:00:59.967492
1,0,0.115327,0:01:01.125444,0.106734,0:01:00.717054,0.118612,0:01:00.219005
1,100,0.059679,0:01:28.669126,0.104084,0:01:29.382278,0.087427,0:01:30.065716
1,200,0.054877,0:01:58.753680,0.090072,0:01:57.486607,0.07083,0:01:58.227267
1,211,0.056294,0:02:01.635955,0.116443,0:02:00.280437,0.079915,0:02:01.149746
2,0,0.065782,0:02:02.054375,0.106052,0:02:00.501620,0.072639,0:02:01.403275
2,100,0.08541,0:02:30.987384,0.065234,0:02:30.200902,0.072509,0:02:31.225238


In [143]:
params5 = {
    'emb_dim':8,
    'hidden':16,
    'char_hid':32,
    'char_emb':32,
    'n_epochs':5,
    'batch_size':100
         }
models5, df5 = train_with_params(params5)

Unnamed: 0_level_0,Unnamed: 1_level_0,GRU-model_loss,GRU-model_time,RNN-model_loss,RNN-model_time,LSTM-model_loss,LSTM-model_time
epoch,step,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,3.083146,0:00:00.075556,3.372885,0:00:00.096985,3.254342,0:00:00.332056
0,100,0.885262,0:00:12.515627,1.134809,0:00:12.739047,0.839311,0:00:12.773745
0,200,0.656762,0:00:28.148276,0.835395,0:00:24.872564,0.881179,0:00:24.203094
0,211,0.817306,0:00:29.400814,0.595941,0:00:26.426501,0.673644,0:00:26.009876
1,0,0.667592,0:00:29.572407,0.562732,0:00:26.584309,0.560402,0:00:26.485621
1,100,0.746458,0:00:42.523818,0.663458,0:00:39.491428,0.760232,0:00:38.474897
1,200,0.670283,0:00:56.717466,0.385001,0:00:51.675866,0.586183,0:00:52.165394
1,211,0.518611,0:00:57.891117,0.720851,0:00:53.198189,0.802959,0:00:53.631835
2,0,0.498963,0:00:58.056453,0.432647,0:00:53.372261,0.837818,0:00:53.797512
2,100,0.690586,0:01:10.701641,0.676492,0:01:06.930420,0.66054,0:01:05.861698


In [144]:
params6 = {
    'emb_dim':512,
    'hidden':512,
    'char_hid':128,
    'char_emb':128,
    'n_epochs':5,
    'batch_size':50
         }
models6, df6 = train_with_params(params6)

Unnamed: 0_level_0,Unnamed: 1_level_0,GRU-model_loss,GRU-model_time,RNN-model_loss,RNN-model_time,LSTM-model_loss,LSTM-model_time
epoch,step,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,2.94926,0:00:00.119967,2.824761,0:00:00.133562,2.811168,0:00:00.105512
0,100,0.16889,0:00:33.092209,0.195873,0:00:30.972818,0.209588,0:00:32.552054
0,200,0.1016,0:01:04.900940,0.133723,0:01:01.229593,0.158092,0:01:05.631413
0,300,0.118869,0:01:36.915442,0.123901,0:01:31.609142,0.129413,0:01:38.169803
0,400,0.06371,0:02:10.142664,0.113115,0:02:02.106415,0.160888,0:02:10.976412
0,423,0.089546,0:02:16.889286,0.108538,0:02:09.648738,0.095988,0:02:18.084801
1,0,0.087218,0:02:17.219755,0.075693,0:02:09.953770,0.12145,0:02:18.389754
1,100,0.103156,0:02:48.004566,0.063863,0:02:38.605466,0.098871,0:02:51.688956
1,200,0.095278,0:03:21.841042,0.089778,0:03:09.314029,0.104499,0:03:23.767144
1,300,0.061152,0:03:53.013020,0.100895,0:03:38.955711,0.077038,0:03:56.253060


In [145]:
print('params1:', params1)
print('params2:', params2)
print('params3:', params3)
print('params4:', params4)
print('params5:', params5)
print('params6:', params6)

params1: {'emb_dim': 128, 'hidden': 128, 'char_hid': 32, 'char_emb': 16, 'n_epochs': 5, 'batch_size': 50}
params2: {'emb_dim': 128, 'hidden': 128, 'char_hid': 32, 'char_emb': 16, 'n_epochs': 5, 'batch_size': 100}
params3: {'emb_dim': 16, 'hidden': 16, 'char_hid': 8, 'char_emb': 8, 'n_epochs': 5, 'batch_size': 100}
params4: {'emb_dim': 320, 'hidden': 320, 'char_hid': 128, 'char_emb': 64, 'n_epochs': 5, 'batch_size': 100}
params5: {'emb_dim': 8, 'hidden': 16, 'char_hid': 32, 'char_emb': 32, 'n_epochs': 5, 'batch_size': 100}
params6: {'emb_dim': 512, 'hidden': 512, 'char_hid': 128, 'char_emb': 128, 'n_epochs': 5, 'batch_size': 50}


In [146]:
df = pd.DataFrame(columns = df1.columns)
df.loc['params1']=df1.iloc[-1]
df.loc['params2']=df2.iloc[-1]
df.loc['params3']=df3.iloc[-1]
df.loc['params4']=df4.iloc[-1]
df.loc['params5']=df5.iloc[-1]
df.loc['params6']=df6.iloc[-1]
df

Unnamed: 0,GRU-model_loss,GRU-model_time,RNN-model_loss,RNN-model_time,LSTM-model_loss,LSTM-model_time
params1,0.058968,0 days 00:04:10.221874,0.054406,0 days 00:04:05.703591,0.08687,0 days 00:04:11.716039
params2,0.070728,0 days 00:02:55.348102,0.073214,0 days 00:02:55.441591,0.066475,0 days 00:02:57.068695
params3,0.382366,0 days 00:02:13.209600,0.292268,0 days 00:02:14.932619,0.313288,0 days 00:02:15.261289
params4,0.042402,0 days 00:05:06.036539,0.055118,0 days 00:05:00.155144,0.049522,0 days 00:05:03.863897
params5,0.44826,0 days 00:02:20.555539,0.153702,0 days 00:02:14.220461,0.473617,0 days 00:02:14.087845
params6,0.03738,0 days 00:11:20.526839,0.048977,0 days 00:10:40.443477,0.029212,0 days 00:11:33.414501
