# Dual LSTM encoder

Based on [Retrieval-based_Chatbot](https://github.com/Janinanu/Retrieval-based_Chatbot).

Find the necessary data files within this [Google Drive folder](https://drive.google.com/open?id=1RIIbsS-vxR7Dlo2_v6FWHDFE7q1XPPgj).

Other useful read: paper of [Ubuntu Dialogue Corpus](https://arxiv.org/pdf/1506.08909.pdf).

In [1]:
import pandas as pd
import numpy as np
import torch.nn as nn
import torch
import torch.autograd as autograd
from torch.nn import init
import torch.nn.utils.rnn 
import datetime
import operator
from nltk.tokenize import word_tokenize
from typing import List
import pickle
import glob

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
PATH_TO_TRAINING_SET = 'data/train_both_original.txt'
PATH_TO_VALIDATION_SET = 'data/valid_both_original.txt'
PATH_TO_WEIGHTS = 'data/glove.6B.50d.txt'

In [3]:
def normalize(sentence: str) -> List[str]:
    """
    Normalize the given sentence by:
        - Converting to lower case
        - Transforming the English contractions to full words
        - Transforming composed words into 2 separated words
        - Tokenizing into words based on white space
    Inspired from : https://machinelearningmastery.com/clean-text-machine-learning-python/
    and https://en.wikipedia.org/wiki/Wikipedia:List_of_English_contractions
    """
    sentence = sentence.lower()
    
    sentence = sentence.replace("'s", ' is')
    sentence = sentence.replace("n't", ' not')
    sentence = sentence.replace("'re", ' are')
    sentence = sentence.replace("'m", ' am')
    sentence = sentence.replace("'ve", ' have')
    sentence = sentence.replace("'ll", ' will')
    sentence = sentence.replace("'d", ' would')
    sentence = sentence.replace("-", ' ')
    sentence = sentence.replace("!", ' ! ')
    sentence = sentence.replace(".", ' . ')
    sentence = sentence.replace(":", ' : ')

    return word_tokenize(sentence)

In [4]:
def extract_dataset_as_text(path: str, is_training_set: bool, nb_dialogues=-1) -> tuple:
    """
    Extract the dataset as text lists. If it is a training dataset, the first answer will be the correct one, others are
    distractors.
    To access an element of a dialogue i in one of the following list (e.g: output_list), do: output_list[i].

    Parameters
    ----------
    path : str
        Path to training file.
    is_training_set : bool
        Whether the dataset to extract is a training set (correct answer is known).
    nb_dialogues : int
        Number of dialogues to extract. Set -1 for all.

    Returns
    -------
    out : tuple
        word_to_id : Dict[str, int]
            Vocabulary mapping each word to an unique id. Only returned if it's a training set.
        my_personae : List[List[List[str]]]
            My personae of each dialogue.
        other_personae : List[List[List[str]]]
            Other personae of each dialogue.
        line_indices : List[List[int]]
            Indices of lines, except those describing the persona, of each dialogue.
        utterances : List[List[List[str]]]
            Utterances (question-like) of each dialogue.
        answers : List[List[List[List[str]]]]
            Answers of each utterance of each dialogue. The correct one (for a training set) for a dialogue i and an
            utterance j is answers[i][j][0], the others answers[i][j][k] for k>0 are wrong answers.
    """
    def get_tokens_from_sentence(sentence: str):
        """Normalizes, extracts the tokens from the given sentence and adds them to the vocabulary."""
        tokens = normalize(sentence)
        if is_training_set:
            for element in tokens:
                if element not in word_to_id:
                    word_to_id[element] = len(word_to_id)
        return tokens
                
    word_to_id = {}
    my_personae = []
    other_personae = []
    line_indices = []
    utterances = []
    answers = []
    idx_dialogue = 0

    with open(path, 'r') as file:
        for line in file:
            words = line.split()
            idx_line = int(words[0])
            if idx_line == 1:
                idx_dialogue += 1
                if idx_dialogue == nb_dialogues + 1:
                    break

            # Get my persona
            if words[1] + ' ' + words[2] == 'your persona:':
                if len(my_personae) != idx_dialogue:
                    my_personae.append([])
                my_personae[-1].append(get_tokens_from_sentence(' '.join(str(word) for word in words[3:])))

            # Get other persona
            elif words[1] + ' ' + words[2] == "partner's persona:":
                if len(other_personae) != idx_dialogue:
                    other_personae.append([])
                other_personae[-1].append(get_tokens_from_sentence(' '.join(str(word) for word in words[3:])))

            # Get dialogue
            else:
                if len(utterances) != idx_dialogue:
                    line_indices.append([])
                    utterances.append([])
                    answers.append([])

                line_indices[-1].append(idx_line)
                exchange = line[len(str(idx_line)) + 1:].split('\t')
                utterances[-1].append(get_tokens_from_sentence(exchange[0]))
                # Training set: answer is known
                if is_training_set:
                    answers[-1].append([get_tokens_from_sentence(exchange[1])])  # Correct answers
                    for statement in exchange[2:]:  # Wrong answers
                        if statement == '':
                            continue
                        for distractor in statement.split('|'):
                            answers[-1][-1].append(get_tokens_from_sentence(distractor))
                            
                # Testing set: answer is unknown
                else:
                    answers[-1].append([])
                    for statement in exchange[1:]:
                        if statement == '':
                            continue
                        for distractor in statement.split('|'):
                            answers[-1][-1].append(get_tokens_from_sentence(distractor))
                            
    print('Loaded ' + str(len(line_indices)) + ' dialogues')
    if is_training_set:
        return word_to_id, my_personae, other_personae, line_indices, utterances, answers
    else:
        return my_personae, other_personae, line_indices, utterances, answers

In [5]:
def create_id_to_vec(word_to_id: dict, path_to_glove_weights: str) -> dict:
    """
    Extracts the embedding weights for each word in the vocabulary and maps each word ids to its weight in a dictionary.
    
    Parameters
    ----------
    path_to_glove_weights : str
        Path to the file containing the embedding weights.
    word_to_id : Dict[str, int]
        Vocabulary mapping each word to an unique id.

    Returns
    -------
    id_to_vec : Dict[int, np.ndarray]
        Map of each word id to its embedding form.
    """
    with open(path_to_glove_weights, 'r', encoding='utf-8') as glovefile:
        lines = glovefile.readlines()

    id_to_vec = {}
    vector = None
    
    for line in lines:
        word = line.split()[0]
        vector = np.array(line.split()[1:], dtype='float32')
        
        if word in word_to_id:
            id_to_vec[word_to_id[word]] = torch.FloatTensor(torch.from_numpy(vector))
            
    for word, id in word_to_id.items(): 
        if word_to_id[word] not in id_to_vec:
            v = np.zeros(*vector.shape, dtype='float32')
            v[:] = np.random.randn(*v.shape)*0.01
            id_to_vec[word_to_id[word]] = torch.FloatTensor(torch.from_numpy(v))
                
    return id_to_vec

In [6]:
class Encoder(nn.Module):
    """LSTM encoder"""

    def __init__(self, emb_size, hidden_size, p_dropout, id_to_vec): 
    
            super(Encoder, self).__init__()
             
            self.emb_size = emb_size
            self.hidden_size = hidden_size
            self.vocab_size = len(id_to_vec)
            self.p_dropout = p_dropout
       
            self.embedding = nn.Embedding(self.vocab_size, self.emb_size)
            self.lstm = nn.LSTM(self.emb_size, self.hidden_size)
            self.dropout_layer = nn.Dropout(self.p_dropout) 

            self.init_weights(id_to_vec)
             
    def init_weights(self, id_to_vec):
        init.uniform_(self.lstm.weight_ih_l0, a = -0.01, b = 0.01)
        init.orthogonal_(self.lstm.weight_hh_l0)
        self.lstm.weight_ih_l0.requires_grad = True
        self.lstm.weight_hh_l0.requires_grad = True
        
        embedding_weights = torch.FloatTensor(self.vocab_size, self.emb_size)
            
        for idx, vec in id_to_vec.items():
            embedding_weights[idx] = vec
        
        self.embedding.weight = nn.Parameter(embedding_weights, requires_grad = True)
            
    def forward(self, inputs):
        embeddings = self.embedding(inputs)
        _, (last_hidden, _) = self.lstm(embeddings) #dimensions: (num_layers * num_directions x batch_size x hidden_size)
        last_hidden = self.dropout_layer(last_hidden[-1])#access last lstm layer, dimensions: (batch_size x hidden_size)

        return last_hidden

    
class DualEncoder(nn.Module):
    """Dual LSTM encoder"""
     
    def __init__(self, encoder):
        super(DualEncoder, self).__init__()
        self.encoder = encoder
        self.hidden_size = self.encoder.hidden_size
        M = torch.FloatTensor(self.hidden_size, self.hidden_size)     
        init.xavier_normal_(M)
        self.M = nn.Parameter(M, requires_grad = True)

    def forward(self, context_tensor, response_tensor):
        
        context_last_hidden = self.encoder(context_tensor) #dimensions: (batch_size x hidden_size)
        response_last_hidden = self.encoder(response_tensor) #dimensions: (batch_size x hidden_size)
        
        #context = context_last_hidden.mm(self.M).cuda()
        context = context_last_hidden.mm(self.M) #dimensions: (batch_size x hidden_size)
        context = context.view(-1, 1, self.hidden_size) #dimensions: (batch_size x 1 x hidden_size)
        
        response = response_last_hidden.view(-1, self.hidden_size, 1) #dimensions: (batch_size x hidden_size x 1)
        
        #score = torch.bmm(context, response).view(-1, 1).cuda()
        score = torch.bmm(context, response).view(-1, 1) #dimensions: (batch_size x 1 x 1) and lastly --> (batch_size x 1)

        return score

In [7]:
def creating_training_variables(path_to_training_set, path_to_glove_weights, embedding_dim=50, nb_dialogues=-1):
    print(str(datetime.datetime.now()).split('.')[0], "Creating variables for training...")
    
    word_to_id, my_personae, other_personae, line_indices, utterances, answers = extract_dataset_as_text(path_to_training_set, True, nb_dialogues)
    id_to_vec = create_id_to_vec(word_to_id, path_to_glove_weights)
    # Unknown words
    v = np.zeros(embedding_dim, dtype='float32')
    v[:] = np.random.randn(*v.shape)*0.01
    id_to_vec[-1] = torch.FloatTensor(torch.from_numpy(v))

    print(str(datetime.datetime.now()).split('.')[0], "Variables created.\n")
    return id_to_vec, word_to_id, my_personae, other_personae, line_indices, utterances, answers

def creating_validation_variables(path_to_validation_set, nb_dialogues=-1):
    print(str(datetime.datetime.now()).split('.')[0], "Creating variables for validations...")
    
    _, my_personae, other_personae, line_indices, utterances, answers = extract_dataset_as_text(path_to_validation_set, True, nb_dialogues)

    print(str(datetime.datetime.now()).split('.')[0], "Variables created.\n")
    return my_personae, other_personae, line_indices, utterances, answers

In [8]:
def creating_model(emb_size, hidden_size, p_dropout, id_to_vec):

    print(str(datetime.datetime.now()).split('.')[0], "Calling model...")

    encoder = Encoder(emb_size, hidden_size, p_dropout, id_to_vec)

    dual_encoder = DualEncoder(encoder)

    print(str(datetime.datetime.now()).split('.')[0], "Model created.\n")
    print(dual_encoder)
    
    return dual_encoder.to(device)

In [9]:
def get_word_id(word_to_id: dict, token: str) -> int:
    """Retrieves the ID of the word if known, else returns -1 (ID for unknown words)."""
    try:
        id_word = word_to_id[token]
    except KeyError:
        id_word = -1
    return id_word

In [10]:
def save_training_data_on_disk(word_to_id, my_personae, other_personae, line_indices, utterances, answers, is_training, max_context_len=160, max_size_df=10000):
    dataframe_name = 'validation_df'
    if is_training:
        with open('data/word_to_id' + '.pkl', 'wb') as dict_file:
            pickle.dump(word_to_id, dict_file)
        dataframe_name = 'training_df'
        
    dataframe_to_save = pd.DataFrame(columns=['context', 'response', 'label', 'idx_line'])
    idx_dataframe = 0
    for idx_dialogue in range(len(line_indices)):
        if len(dataframe_to_save) >= max_size_df:
            dataframe_to_save.to_csv('data/{0}{1}.csv'.format(dataframe_name, idx_dataframe), index=False)
            idx_dataframe += 1
            
        context_ids = []
        # Add my persona in context
        for sentence in my_personae[idx_dialogue]:
            for token in sentence:
                context_ids.append(get_word_id(word_to_id, token))

        # Add other persona in context
        for sentence in other_personae[idx_dialogue]:
            for token in sentence:
                context_ids.append(get_word_id(word_to_id, token))

        # Add utterances, create responses and labels
        for idx_utterance in range(len(utterances[idx_dialogue])):
            if idx_utterance != 0:
                # Add previous correct answer in context
                for token in answers[idx_dialogue][idx_utterance - 1][0]:
                    context_ids.append(get_word_id(word_to_id, token))

            # Add utterances in context
            for token in utterances[idx_dialogue][idx_utterance]:
                context_ids.append(get_word_id(word_to_id, token))

            # Get response and label
            for idx_answer in range(len(answers[idx_dialogue][idx_utterance])):
                response_ids = []
                for token in answers[idx_dialogue][idx_utterance][idx_answer]:
                    response_ids.append(get_word_id(word_to_id, token))


                if idx_answer == 0:
                    label = 1
                else:
                    label = 0
                    
                if len(context_ids) > max_context_len:
                    context_ids = context_ids[:max_context_len]
                if len(response_ids) > max_context_len:
                    response_ids = response_ids[:max_context_len]

                dataframe_to_save.loc[len(dataframe_to_save)] = [0, 0, label, line_indices[idx_dialogue][idx_utterance]]
                dataframe_to_save['context'][len(dataframe_to_save) - 1] = context_ids
                dataframe_to_save['response'][len(dataframe_to_save) - 1] = response_ids
    
    dataframe_to_save.to_csv('data/{0}{1}.csv'.format(dataframe_name, idx_dataframe), index=False)

In [11]:
def train_model(learning_rate, l2_penalty, nb_epochs, dual_encoder, word_to_id,
               tr_my_personae, tr_other_personae, tr_line_indices, tr_utterances, tr_answers,
               val_my_personae, val_other_personae, val_line_indices, val_utterances, val_answers):
    print(str(datetime.datetime.now()).split('.')[0], "Starting training...\n")
#     print("====================Data and Hyperparameter Overview====================\n")
#     print("Number of training examples: %d, Number of validation examples: %d" %(len(training_dataframe), len(validation_dataframe)))
#     print("Learning rate: %.5f, Embedding Dimension: %d, Hidden Size: %d, Dropout: %.2f, L2:%.10f\n" %(learning_rate, emb_dim, encoder.hidden_size, encoder.p_dropout, l2_penalty))
#     print("================================Results...==============================\n")

    optimizer = torch.optim.Adam(dual_encoder.parameters(), lr = learning_rate, weight_decay = l2_penalty)
       
    loss_func = torch.nn.BCEWithLogitsLoss()
          
    for epoch in range(nb_epochs):
        # TODO : shuffle data here
        sum_loss_training = 0
        nb_iter_tr = 0
        sum_loss_validation = 0
        nb_iter_val = 0

        # First: use training set
        dual_encoder.train()

        for idx_dialogue in range(len(tr_line_indices)):
            context_ids = []
            # Add my persona in context
            for sentence in tr_my_personae[idx_dialogue]:
                for token in sentence:
                    context_ids.append(word_to_id[token])

            # Add other persona in context
            for sentence in tr_other_personae[idx_dialogue]:
                for token in sentence:
                    context_ids.append(word_to_id[token])

            # Add utterances, create responses and labels
            for idx_utterance in range(len(tr_utterances[idx_dialogue])):
                if idx_utterance != 0:
                    # Add previous correct answer in context
                    for token in tr_answers[idx_dialogue][idx_utterance - 1][0]:
                        context_ids.append(word_to_id[token])

                # Add utterances in context
                for token in tr_utterances[idx_dialogue][idx_utterance]:
                    context_ids.append(word_to_id[token])

                # Get response and label
                for idx_answer in range(len(tr_answers[idx_dialogue][idx_utterance])):
                    response_ids = []
                    for token in tr_answers[idx_dialogue][idx_utterance][idx_answer]:
                        response_ids.append(word_to_id[token])

                    if idx_answer == 0:
                        label = np.array(1).astype(np.float32)
                    else:
                        label = np.array(0).astype(np.float32)

                    context = autograd.Variable(torch.LongTensor(context_ids).view(-1,1), requires_grad = False).cuda()
                    response = autograd.Variable(torch.LongTensor(response_ids).view(-1, 1), requires_grad = False) .cuda()   
                    label = autograd.Variable(torch.FloatTensor(torch.from_numpy(np.array(label).reshape(1,1))), requires_grad = False).cuda()

                    # Predict
                    score = dual_encoder(context, response)
                    loss = loss_func(score, label)
                    
                    # Train
                    nb_iter_tr += 1
                    sum_loss_training += loss.data.item()
                    loss.backward()
                    optimizer.step()
                    optimizer.zero_grad()
        
        # Second: use validation set
        # TODO : shuffle data here
#         for idx_dialogue in range(len(val_line_indices)):
#             context_ids = []
#             # Add my persona in context
#             for sentence in val_my_personae[idx_dialogue]:
#                 for token in sentence:
#                     context_ids.append(get_word_id(word_to_id, token))

#             # Add other persona in context
#             for sentence in val_other_personae[idx_dialogue]:
#                 for token in sentence:
#                     context_ids.append(get_word_id(word_to_id, token))

#             # Add utterances, create responses and labels
#             for idx_utterance in range(len(val_utterances[idx_dialogue])):
#                 if idx_utterance != 0:
#                     # Add previous correct answer in context
#                     for token in val_answers[idx_dialogue][idx_utterance - 1][0]:
#                         context_ids.append(get_word_id(word_to_id, token))

#                 # Add utterances in context
#                 for token in val_utterances[idx_dialogue][idx_utterance]:
#                     context_ids.append(get_word_id(word_to_id, token))

#                 # Get response and label
#                 for idx_answer in range(len(val_answers[idx_dialogue][idx_utterance])):
#                     response_ids = []
#                     for token in val_answers[idx_dialogue][idx_utterance][idx_answer]:
#                         response_ids.append(get_word_id(word_to_id, token))

#                     if idx_answer == 0:
#                         label = np.array(1).astype(np.float32)
#                     else:
#                         label = np.array(0).astype(np.float32)

#                     context = autograd.Variable(torch.LongTensor(context_ids).view(-1,1), requires_grad = False).cuda()
#                     response = autograd.Variable(torch.LongTensor(response_ids).view(-1, 1), requires_grad = False) .cuda()   
#                     label = autograd.Variable(torch.FloatTensor(torch.from_numpy(np.array(label).reshape(1,1))), requires_grad = False).cuda()

#                     # Predict
#                     score = dual_encoder(context, response)
#                     loss = loss_func(score, label)
#                     nb_iter_val += 1
#                     sum_loss_validation += loss.data.item()
        
        
        print('Training loss =', sum_loss_training / nb_iter_tr)
#         print('Validation loss =', sum_loss_validation / nb_iter_val)
                
    print(str(datetime.datetime.now()).split('.')[0], "Training and validation epochs finished.")
    return dual_encoder

In [12]:
def train_model_df(learning_rate, l2_penalty, nb_epochs, dual_encoder, word_to_id):
    """Training with dataframe"""
    print(str(datetime.datetime.now()).split('.')[0], "Starting training...\n")
#     print("====================Data and Hyperparameter Overview====================\n")
#     print("Number of training examples: %d, Number of validation examples: %d" %(len(training_dataframe), len(validation_dataframe)))
#     print("Learning rate: %.5f, Embedding Dimension: %d, Hidden Size: %d, Dropout: %.2f, L2:%.10f\n" %(learning_rate, emb_dim, encoder.hidden_size, encoder.p_dropout, l2_penalty))
#     print("================================Results...==============================\n")

    optimizer = torch.optim.Adam(dual_encoder.parameters(), lr = learning_rate, weight_decay = l2_penalty)
    loss_func = torch.nn.BCEWithLogitsLoss()
          
    for epoch in range(nb_epochs):
        sum_loss_training = 0
        nb_iter_tr = 0
        sum_loss_validation = 0
        nb_iter_val = 0

        # First: use training set
        dual_encoder.train()
        for training_df_name in glob.glob('data/training_df*'):
            training_df = pd.read_csv(training_df_name).sample(frac=1)  # Shuffle
            
            for idx, row in training_df.iterrows():
            
                context_ids = list(map(int, row['context'][1:-1].split(', ')))
                response_ids = list(map(int, row['response'][1:-1].split(', ')))
                label = np.array(row['label']).astype(np.float32)

                context = autograd.Variable(torch.LongTensor(context_ids).view(-1,1), requires_grad = False).cuda()
                response = autograd.Variable(torch.LongTensor(response_ids).view(-1, 1), requires_grad = False) .cuda()   
                label = autograd.Variable(torch.FloatTensor(torch.from_numpy(np.array(label).reshape(1, 1))), requires_grad = False).cuda()

                # Predict
                score = dual_encoder(context, response)
                loss = loss_func(score, label)

                # Train
                nb_iter_tr += 1
                sum_loss_training += loss.data.item()
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
        
        # Second: use validation set
#         dual_encoder.eval()
#         for validation_df_name in glob.glob('data/validation_df*'):
#             validation_df = pd.read_csv(validation_df_name).sample(frac=1)  # Shuffle

#             for idx, row in validation_df.iterrows():

#                 context_ids = list(map(int, row['context'][1:-1].split(', ')))
#                 response_ids = list(map(int, row['response'][1:-1].split(', ')))
#                 label = np.array(row['label']).astype(np.float32)

#                 context = autograd.Variable(torch.LongTensor(context_ids).view(-1,1), requires_grad = False).cuda()
#                 response = autograd.Variable(torch.LongTensor(response_ids).view(-1, 1), requires_grad = False) .cuda()   
#                 label = autograd.Variable(torch.FloatTensor(torch.from_numpy(np.array(label).reshape(1, 1))), requires_grad = False).cuda()

#                 # Predict
#                 score = dual_encoder(context, response)
#                 loss = loss_func(score, label)
#                 nb_iter_val += 1
#                 sum_loss_validation += loss.data.item()
        
        
        print('Training loss =', sum_loss_training / nb_iter_tr)
#         print('Validation loss =', sum_loss_validation / nb_iter_val)
                
    print(str(datetime.datetime.now()).split('.')[0], "Training and validation epochs finished.")
    return dual_encoder

In [13]:
EMBEDDING_DIM = 50
NB_DIALOGUES = 2  # Set -1 for all

In [14]:
id_to_vec, word_to_id, tr_my_personae, tr_other_personae, tr_line_indices, tr_utterances, tr_answers = creating_training_variables(PATH_TO_TRAINING_SET, PATH_TO_WEIGHTS, EMBEDDING_DIM, NB_DIALOGUES)

2019-04-15 02:32:25 Creating variables for training...
Loaded 2 dialogues
2019-04-15 02:32:34 Variables created.



In [15]:
val_my_personae, val_other_personae, val_line_indices, val_utterances, val_answers = creating_validation_variables(PATH_TO_VALIDATION_SET, NB_DIALOGUES)

2019-04-15 02:32:34 Creating variables for validations...
Loaded 2 dialogues
2019-04-15 02:32:34 Variables created.



In [16]:
save_training_data_on_disk(word_to_id, tr_my_personae, tr_other_personae, tr_line_indices, tr_utterances, tr_answers, True)
save_training_data_on_disk(word_to_id, val_my_personae, val_other_personae, val_line_indices, val_utterances, val_answers, False)

In [17]:
dual_encoder = creating_model(EMBEDDING_DIM, 50, 0.1, id_to_vec)

2019-04-15 02:32:35 Calling model...
2019-04-15 02:32:35 Model created.

DualEncoder(
  (encoder): Encoder(
    (embedding): Embedding(854, 50)
    (lstm): LSTM(50, 50)
    (dropout_layer): Dropout(p=0.1)
  )
)


In [19]:
learning_rate = 1e-4
l2_penalty = 1e-4
nb_epochs = 10
dual_encoder = train_model(learning_rate, l2_penalty, nb_epochs, dual_encoder, word_to_id,
                           tr_my_personae, tr_other_personae, tr_line_indices, tr_utterances, tr_answers,
                           val_my_personae, val_other_personae, val_line_indices, val_utterances, val_answers)

2019-04-15 02:33:43 Starting training...

Training loss = 0.18769825651266037
Training loss = 0.18823424732268212
Training loss = 0.18532153655660852
Training loss = 0.1777685421097669
Training loss = 0.17361444199685844
Training loss = 0.17935311002973595
Training loss = 0.17824914578988285
Training loss = 0.17020619477835947
Training loss = 0.16038986673851047
Training loss = 0.15296309048683718
2019-04-15 02:34:31 Training and validation epochs finished.


In [18]:
# Training with dataframe
learning_rate = 1e-4
l2_penalty = 1e-4
nb_epochs = 10
dual_encoder = train_model_df(learning_rate, l2_penalty, nb_epochs, dual_encoder, word_to_id)

2019-04-15 02:32:41 Starting training...

Training loss = 0.31649383706861134
Training loss = 0.20400283946164688
Training loss = 0.2031813178220954
Training loss = 0.1997427689529271
Training loss = 0.19331151172423697
Training loss = 0.1914491235424264
Training loss = 0.19986246750677372
Training loss = 0.19108849731242272
Training loss = 0.19430851138222227
Training loss = 0.1893954094907991
2019-04-15 02:33:31 Training and validation epochs finished.


__TODO:__
- Shuffle dataset before each epoch
- Plot learning curves
- Save model after training
- Tests functions
- Optimize parameters

If it's too slow or kernel dies, try using `.py` scripts (or remove validation part from training).