In [1]:
root_dir = '/home/alfirsafauzulh@student.ub.ac.id/Firsa/Research/Chatbot'

data_dir = './Datasets'
dailydialogs_root_dir = data_dir + '/dailydialog'
cornell_root_dir = data_dir + '/cornell_movie'

# Import Library

In [2]:
# !pip install datasets
# !pip install gensim

In [3]:
import seaborn as sns
sns.set()

In [4]:
import os
import re
import time
import numpy as np
import pandas as pd
from collections import Counter

import torch
import torch.nn as nn
import torch.optim as optim
# from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

import random
from random import seed, randrange

import matplotlib.pyplot as plt

from nltk.translate.bleu_score import sentence_bleu

from sklearn.model_selection import train_test_split

import pickle
import json
from tqdm import tqdm
# import gensim

# Mount GDrive

In [5]:
import os
from google.colab import drive

# COLAB
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


# Utils


## Tokenizer Class

In [5]:
class Tokenizer():
  def __init__(self, data, min_freq=2, vocabs_npa=None, embs_npa=None):
    self.vocabs_npa = vocabs_npa
    self.embs_npa = embs_npa
    self.data = data
    self.min_freq = min_freq
    self.word2index = {}
    self.index2word = {}
    self.wordfreq = {}
    self.vocab = set()

    self.build()

  def build(self):
    for phrase in self.data:
      for word in phrase.split(' '):
        if word not in self.wordfreq.keys():
          self.wordfreq[word] = 1
        else:
          self.wordfreq[word]+=1

    for phrase in self.data:
      phrase_word = phrase.split(' ')
      phrase_word_update = []
      
      for data in phrase_word:
        if self.wordfreq[data] >= self.min_freq:
          phrase_word_update.append(data)

      self.vocab.update(phrase_word_update)

    self.vocab = sorted(self.vocab)

    self.word2index['<PAD>'] = 0
    self.word2index['<UNK>'] = 1
    self.word2index['<sos>'] = 2
    self.word2index['<eos>'] = 3
    
    for i, word in enumerate(self.vocab):
      self.word2index[word] = i+4

    for word, i in self.word2index.items():
      self.index2word[i] = word

  def text_to_sequence(self, text):
    sequences = []

    for word in text:
      try:
        sequences.append(self.word2index[word])
      except:
        sequences.append(self.word2index['<UNK>'])

    return sequences

  def sequence_to_text(self, sequence):
    texts = []

    for token in sequence:
      try:
        texts.append(self.index2word[token])
      except:
        texts.append(self.index2word[1])

    return texts

In [6]:
def pad_sequences(x, max_len):
  padded = np.zeros((max_len), dtype=np.int64)
  
  if len(x) > max_len:
    padded[:] = x[:max_len]

  else:
    padded[:len(x)] = x
    
  return padded

## Data Adjustment Class

In [7]:
class MyData(Dataset):
    def __init__(self, X, y):
        self.data = X
        self.target = y
        # TODO: convert this into torch code is possible
        self.length = [ np.sum(1 - np.equal(x, 0)) for x in X]
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        x_len = self.length[index]
        return x,y,x_len
    
    def __len__(self):
        return len(self.data)

## Response Method

In [8]:
def respond_only(model, sentence, question, answer, device, max_length=50):
    # print(sentence)

    # sys.exit()

    # Load question tokenizer
    # spacy_en = spacy.load("en")

    # Create tokens using spacy and everything in lower case (which is what our vocab is)
    if type(sentence) == str:
        sentence = normalize(sentence)
        sentence = remove_non_letter(sentence)
        sentence = remove_whitespace(sentence)

        tokens = [token.lower() for token in sentence.split(' ')]
    else:
        tokens = [token.lower() for token in sentence]

    # print(tokens)

    # sys.exit()
    # Add <SOS> and <EOS> in beginning and end respectively
    tokens.insert(0, '<sos>')
    tokens.append('<eos>')

    # Go through each question token and convert to an index
    text_to_indices = []
    for token in tokens:
      if token in question.word2index.keys():
        text_to_indices.append(question.word2index[token])
      else:
        text_to_indices.append(question.word2index['<UNK>'])
    # text_to_indices = [question.word2index[token] for token in tokens]
    sentence_length = len(text_to_indices)
    text_to_indices = pad_sequences(text_to_indices, th+2)

    # Convert to Tensor
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)
    sentence_length = torch.tensor([sentence_length])
    # Build encoder hidden, cell state

    with torch.no_grad():
        encoder_states, hidden, cell = model.encoder(sentence_tensor, sentence_length)

    outputs = [answer.word2index["<sos>"]]

    for _ in range(max_length):
        previous_word = torch.LongTensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hidden, cell, _ = model.decoder(previous_word, encoder_states, hidden, cell)
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        # Model predicts it's the end of the sentence
        if output.argmax(1).item() == answer.word2index["<eos>"]:
            break

    answer_token = [answer.index2word[idx] for idx in outputs]

    # print('Question\t:', sentence)
    # print('Answer\t\t:', ' '.join(translated_sentence[1:-1]))
    
    return ' '.join(answer_token[1:-1])

In [9]:
def respond(sentence):
  answer = respond_only(model, sentence, question_tokenizer, answer_tokenizer, device, max_length=17)
  print('Me\t:', sentence)
  print('Bot\t:', answer)
  print()

# Preprocess

In [10]:
np.random.seed(42)

th = 50
df = pd.read_csv(dailydialogs_root_dir + f'/df_dailydialogs_max_{th}.csv')

In [11]:
df.head()

Unnamed: 0,questions,answers
0,the kitchen stinks,i will throw out the garbage
1,so dick how about getting some coffee for tonight,coffee i don t honestly like that kind of stuff
2,coffee i don t honestly like that kind of stuff,come on you can at least try a little besides ...
3,come on you can at least try a little besides ...,what s wrong with that cigarette is the thing ...
4,what s wrong with that cigarette is the thing ...,not for me dick


In [12]:
print(df.isnull().sum())
print()
df = df.dropna()
print()
print(df.isnull().sum())

questions    69
answers      76
dtype: int64


questions    0
answers      0
dtype: int64


In [13]:
tokenizer = Tokenizer(pd.concat([df['questions'], df['answers']], axis=0).values, min_freq=1)
question_tokenizer = tokenizer
answer_tokenizer = tokenizer
    
# question_tokenizer = Tokenizer(df['questions'].values, min_freq=1)
# answer_tokenizer = Tokenizer(df['answers'].values, min_freq=1)

print(len(question_tokenizer.vocab))
print(len(answer_tokenizer.vocab))

17173
17173


In [14]:
def normalize(txt):
  txt = txt.lower()
  txt = re.sub(r"i'm", "i am", txt)
  txt = re.sub(r"he's", "he is", txt)
  txt = re.sub(r"she's", "she is", txt)
  txt = re.sub(r"that's", "that is", txt)
  txt = re.sub(r"what's", "what is", txt)
  txt = re.sub(r"where's", "where is", txt)
  txt = re.sub(r"\'ll", " will", txt)
  txt = re.sub(r"\'ve", " have", txt)
  txt = re.sub(r"\'re", " are", txt)
  txt = re.sub(r"\'d", " would", txt)
  txt = re.sub(r"won't", "will not", txt)
  txt = re.sub(r"can't", "can not", txt)
  txt = re.sub(r"a'ight", "alright", txt)
  txt = re.sub(r"n't", ' not', txt)
  return txt

def remove_non_letter(data):
  return re.sub(r'[^a-zA-Z]',' ', data)

def remove_whitespace(data):
  data = [x for x in data.split(' ') if x]
  return ' '.join(data)

def tokenize(text):
  text = str(text)
  return [token for token in text.split(' ')]

def add_sos_eos(text):
  return '<sos> ' + text + ' <eos>'

In [15]:
max_len = th+2

df['questions_preprocessed'] = df['questions'].map(lambda x: add_sos_eos(x))
df['answers_preprocessed'] = df['answers'].map(lambda x: add_sos_eos(x))

df['questions_preprocessed'] = df['questions_preprocessed'].map(lambda x: tokenize(x))
df['answers_preprocessed'] = df['answers_preprocessed'].map(lambda x: tokenize(x))

df['questions_preprocessed'] = df['questions_preprocessed'].map(lambda x: question_tokenizer.text_to_sequence(x))
df['answers_preprocessed'] = df['answers_preprocessed'].map(lambda x: answer_tokenizer.text_to_sequence(x))

df['questions_preprocessed'] = df['questions_preprocessed'].map(lambda x: pad_sequences(x, max_len))
df['answers_preprocessed'] = df['answers_preprocessed'].map(lambda x: pad_sequences(x, max_len))

In [16]:
# df = df.sample(frac=.66, random_state=77)
df_train, df_test = train_test_split(df, test_size=.1, random_state=42)

In [17]:
print(f"Jml Data Latih\t: {len(df_train)}\nJml Data Uji\t: {len(df_test)}")

Jml Data Latih	: 79684
Jml Data Uji	: 8854


# Model

In [29]:
class Encoder(nn.Module):
  def __init__(self, input_size, embedding_size, hidden_size, num_layers, p, pretrained_word_embedding=False, embedding_matrix=None, freeze=False):
    super(Encoder, self).__init__()
    self.dropout = nn.Dropout(p)
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.device = device

    if pretrained_word_embedding:
      self.embedding = nn.Embedding.from_pretrained(torch.from_numpy(embedding_matrix_q).float(), freeze=freeze)
    
    self.embedding = nn.Embedding(input_size, embedding_size)

    self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)

  def forward(self, x, lens):
    # x shape: (seq_length, N), N ==> batch size

    embedding = self.dropout(self.embedding(x))
    # embedding shape: (ssq_length, N, embedding_size)
    embedding = pack_padded_sequence(embedding, lens)
    
    encoder_states, (hidden, cell) = self.rnn(embedding)
    encoder_states, _ = pad_packed_sequence(encoder_states)

    return encoder_states, hidden, cell

class Decoder(nn.Module):
  def __init__(self, input_size, embedding_size, hidden_size, output_size, 
               num_layers, p, pretrained_word_embedding=False, embedding_matrix=None, freeze=False):
    
    super(Decoder, self).__init__()
    self.dropout = nn.Dropout(p)
    self.hidden_size = hidden_size
    self.num_layers = num_layers

    if pretrained_word_embedding:
      self.embedding = nn.Embedding.from_pretrained(torch.from_numpy(embedding_matrix_a).float(), freeze=freeze)
    
    self.embedding = nn.Embedding(input_size, embedding_size)

    self.rnn = nn.LSTM(embedding_size + hidden_size, hidden_size, num_layers, dropout=p)
    self.fc = nn.Linear(hidden_size, output_size)

    self.W1 = nn.Linear(self.hidden_size, self.hidden_size)
    self.W2 = nn.Linear(self.hidden_size, self.hidden_size)
    self.V = nn.Linear(self.hidden_size, 1)

  def forward(self, x, encoder_states, hidden, cell):
    # x shape = (N) tapi kita butuh (1, N) karena decoder hanya predict 1 kata tiap predict
    x = x.unsqueeze(0)

    encoder_states = encoder_states.permute(1, 0, 2)
    hidden_with_time_axis = hidden.permute(1, 0, 2)

    attention_score = self.W1(encoder_states) + self.W2(hidden_with_time_axis)
    attention_score = torch.tanh(attention_score)

    attention_weights = self.V(attention_score)
    attention_weights = torch.softmax(attention_weights, dim=1)

    context_vector = attention_weights * encoder_states
    context_vector = torch.sum(context_vector, dim=1)

    embedding = self.dropout(self.embedding(x))
    # embedding shape = (1, N, embedding_size)

    embedding = torch.cat((context_vector.unsqueeze(1).permute(1,0,2), embedding), dim=-1)

    outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))
    # outputs shape = (1, N, hidden_size)

    predictions = self.fc(outputs)
    # predictions shape = (1, N, length_vocab)

    predictions = predictions.squeeze(0)
    # predictions shape = (N, length_vocab) untuk dipassing ke loss function

    return predictions, hidden, cell, attention_weights

class Seq2Seq(nn.Module):
  def __init__(self, encoder, decoder):
    super(Seq2Seq, self).__init__()
    self.encoder = encoder
    self.decoder = decoder

  def forward(self, source, target, input_len, teacher_force_ratio=.5):
    # source and target shape = (target_len, N)
    batch_size = source.shape[1]
    target_len = target.shape[0]
    target_vocabulary_size = len(answer_tokenizer.vocab)+4
    # target_vocabulary_size = len(answer_tokenizer.vocab)

    outputs = torch.zeros(target_len, batch_size, target_vocabulary_size).to(device)

    encoder_states, hidden, cell = self.encoder(source, input_len)
    # ambil start token
    x = target[0]

    for t in range(1, target_len):
      output, hidden, cell, _ = self.decoder(x, encoder_states, hidden, cell)

      outputs[t] = output
      # output shape = (N, answer_vocab_size)

      best_guess = output.argmax(1)

      x = target[t] if random.random() < teacher_force_ratio else best_guess

    return outputs

# Pretrained Word Embedding


> List Of Pretrained WE Gensim


1. glove-wiki-gigaword-50 (65 MB)
2. glove-wiki-gigaword-100 (128 MB)
3. glove-wiki-gigaword-200 (252 MB)
4. glove-wiki-gigaword-300 (376 MB)
5. word2vec-google-news-300 (1662 MB)
6. word2vec-ruscorpora-300 (198 MB)







In [None]:
import gensim.downloader as api

embeddings_ap = api.load('word2vec-google-news-300')
pretrained_word_embedding_dimensions = 300



In [None]:
vocab_q = question_tokenizer.index2word
vocab_a = answer_tokenizer.index2word

vocab_q_size = len(vocab_q)
vocab_a_size = len(vocab_a)

embedding_matrix_q = np.zeros((vocab_q_size, pretrained_word_embedding_dimensions))
embedding_matrix_a = np.zeros((vocab_a_size, pretrained_word_embedding_dimensions))

for i, word in question_tokenizer.index2word.items():
  try:
    embedding_q_vector = embeddings_ap[word]
    if embedding_q_vector is not None:
      embedding_matrix_q[i] = embedding_q_vector
  except:
    pass

for i, word in answer_tokenizer.index2word.items():
  try:
    embedding_a_vector = embeddings_ap[word]
    if embedding_a_vector is not None:
      embedding_matrix_a[i] = embedding_a_vector
  except:
    pass

In [None]:
zero_embedding_count_q = 0
zero_embedding_count_a = 0

for eq, ea in zip(embedding_matrix_q, embedding_matrix_a):
  if all(item == 0 for item in eq):
    zero_embedding_count_q+=1

  if all(item == 0 for item in ea):
    zero_embedding_count_a+=1

print(f"{zero_embedding_count_q}\n{zero_embedding_count_a}")

513
466


# Train

In [19]:
from models.LSTMBahdanau import Encoder, Decoder, Seq2Seq

In [20]:
input_tensor_train = df_train['questions_preprocessed'].values.tolist()
target_tensor_train = df_train['answers_preprocessed'].values.tolist()

input_tensor_test = df_test['questions_preprocessed'].values.tolist()
target_tensor_test = df_test['answers_preprocessed'].values.tolist()

train_data = MyData(input_tensor_train, target_tensor_train)
test_data = MyData(input_tensor_test, target_tensor_test)

In [21]:
import gc
gc.collect()
torch.cuda.empty_cache()

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Training hyperparams
num_epochs = 30
learning_rate = 0.001
batch_size = 256

# Model hyperparams
load_model = False
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_size_encoder = len(question_tokenizer.vocab)+4
input_size_decoder = len(answer_tokenizer.vocab)+4
output_size = len(answer_tokenizer.vocab)+4

encoder_embedding_size = 256
decoder_embedding_size = 256
hidden_size = 768
num_layers = 1
enc_dropout = 0
dec_dropout = 0

train_dataset = DataLoader(train_data, batch_size = batch_size, drop_last=True, shuffle=True)
test_dataset = DataLoader(test_data, batch_size = batch_size, drop_last=True, shuffle=True)

# encoder_net = Encoder(input_size_encoder, encoder_embedding_size, hidden_size, 
#                       num_layers, enc_dropout, pretrained_word_embedding=True, embedding_matrix=embedding_matrix_q, freeze=False).to(device)

# decoder_net = Decoder(input_size_decoder, decoder_embedding_size, hidden_size, 
#                       output_size, num_layers, dec_dropout, pretrained_word_embedding=True, embedding_matrix=embedding_matrix_a, freeze=False).to(device)

encoder_net = Encoder(input_size_encoder, encoder_embedding_size, hidden_size, 
                      num_layers, enc_dropout, pretrained_word_embedding=False, embedding_matrix=None, freeze=False).to(device)

decoder_net = Decoder(input_size_decoder, decoder_embedding_size, hidden_size, 
                      output_size, num_layers, dec_dropout, pretrained_word_embedding=False, embedding_matrix=None, freeze=False).to(device)

model = Seq2Seq(encoder_net, decoder_net).to(device)

TypeError: __init__() missing 1 required positional argument: 'vocab_len'

In [None]:
pad_idx = answer_tokenizer.word2index["<PAD>"]
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

def loss_function(real, pred):
    """ Only consider non-zero inputs in the loss; mask needed """
    #mask = 1 - np.equal(real, 0) # assign 0 to all above 0 and 1 to all 0s
    #print(mask)
    mask = real.ge(1).type(torch.cuda.FloatTensor)
    
    loss_ = criterion(pred, real) * mask 
    return torch.mean(loss_)

### sort batch function to be able to use with pad_packed_sequence
def sort_within_batch(X, y, lengths):
    lengths, indx = lengths.sort(dim=0, descending=True)
    X = X[indx]
    y = y[indx]
    return X, y, lengths # transpose (batch x seq) to (seq x batch)

In [None]:
train_losses = []
val_losses = []

for epoch in range(num_epochs):
  start = time.time()

  print(f"Epoch [{epoch+1}/{num_epochs}]")

  num_batch = 0
  val_num_batch = 0
  batch_loss = 0
  val_batch_loss = 0

  training_time = 0

#   for (batch_idx, (X_train, y_train, input_len)) in enumerate(train_dataset):
  for (batch_idx, (X_train, y_train, input_len)) in enumerate(bar := tqdm(train_dataset)):
    X, y, input_lengths = sort_within_batch(X_train, y_train, input_len)
    
    X = X.permute(1,0)
    y = y.permute(1,0)

    inp_data = X.to(device)
    target = y.to(device)
    # target shape = (target_length, batch_size))

    # print(inp_data.shape, target.shape)
    output = model(inp_data, target, input_lengths)
    # # output shape = (target_length, batch_size, output_dim)
    
    # print(output.shape, target.shape)

    output = output[1:].reshape(-1, output.shape[2])
    target = target[1:].reshape(-1)
    # membuat output shape menjadi (target_length*batch_size, output dim) dan target shape menjadi (target_length*batch_size) untuk dipassing ke loss function

    # print(output.shape, target.shape)

    optimizer.zero_grad()
    loss = loss_function(target, output)
    batch_loss += loss.detach()

    loss.backward()

    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
    optimizer.step()

    bar.set_description(f'Train Seq2Seq Model '
                        f'[train_loss={loss.detach():.4f}')
    
    num_batch+=1
  
  
  train_loss_ = batch_loss/num_batch
  train_losses.append(train_loss_)

  train_time = time.time() - start
  training_time += train_time

  print(f"Epoch {epoch+1} loss: {train_loss_}")
  print('Time taken for 1 epoch {} sec\n'.format(train_time))

Epoch [1/30]
	Epoch 1 Batch 32 Loss 1.6210


# Test

In [148]:
with torch.no_grad():
  model.eval()
  test_batch_loss = 0
  test_num_batch = 0
  for (test_batch_idx, (X_test, y_test, input_len)) in enumerate(test_dataset):
    X, y, input_lengths = sort_within_batch(X_test, y_test, input_len)

    X = X.permute(1,0)
    y = y.permute(1,0)

    test_inp_data = X.to(device)
    test_target = y.to(device)

    output = model(test_inp_data, test_target, input_lengths)
      
    output = output[1:].reshape(-1, output.shape[2])
    test_target = test_target[1:].reshape(-1)

    test_loss = loss_function(test_target, output)
    test_batch_loss += test_loss
    test_num_batch+=1

  test_loss_ = test_batch_loss/test_num_batch

  print(f"test_loss: {test_loss_}")

test_loss: 2.9881205558776855


In [151]:
respond('hi')
respond('good morning')
respond('how are you ?')
respond('Nice to meet you')
respond('Where do you live now ?')
respond("Do you like football ?")
respond("What is your favourite food ?")
respond("Do you know john ?")
respond("What is the matter ?")
respond("See you again")
respond("Who created you ?")
respond("It’s a pleasure to meet you")
respond("How’s it going ?")
respond("Can you help me ?")
respond("Hey")
respond("I need your help")
respond("Hello")
respond("Are you a bot ?")
respond("Hey Bot, how are you doing ?")
respond("Bot, can you come and help me ?")
respond("Would you please help me with my luggage ?")
respond("Do you need anything else ?")
respond("Ok, thank you")
respond("I am worried")
respond("Can we walk there ?")
respond("Ok, see you then, bye")

Me	: hi
Bot	: what are you planning on do it

Me	: good morning
Bot	: good morning

Me	: how are you ?
Bot	: i am fine thanks how about you

Me	: Nice to meet you
Bot	: it s very nice to meet you too

Me	: Where do you live now ?
Bot	: i live at zhongguancun road apt beijing

Me	: Do you like football ?
Bot	: yes i am very hungry

Me	: What is your favourite food ?
Bot	: it s hard to say food

Me	: Do you know john ?
Bot	: no i

Me	: What is the matter ?
Bot	: i subway should i take to get to the east side

Me	: See you again
Bot	: my pleasure

Me	: Who created you ?
Bot	: i m

Me	: It’s a pleasure to meet you
Bot	: a pleasure meeting to our company for you you for our company

Me	: How’s it going ?
Bot	: i am in a good mood today actually how about you

Me	: Can you help me ?
Bot	: sure what can i do for you

Me	: Hey
Bot	: what s the matter

Me	: I need your help
Bot	: oh what is up

Me	: Hello
Bot	: hi lucy are you calling to

Me	: Are you a bot ?
Bot	: yes i am here

Me	: Hey Bot, 

In [152]:
test_questions = df_test['questions'].values
test_answers = df_test['answers'].values

preds = []
for x in test_questions:
  preds.append(respond_only(model, str(x), question_tokenizer, answer_tokenizer, device, max_length=17))

for (q, real, a) in zip(test_questions[:500], test_answers[:500], preds[:500]):
  print('Question\t:', q)
  print('Answer\t\t:', real)
  print('Pred\t\t:', a)
  print()

Question	: the pass is free
Answer		: i do not have to pay for anything
Pred		: where would you like

Question	: i did i especially liked the food what do you think about the food here
Answer		: it s very different from chinese food but i am getting used to it
Pred		: i think love the food really like it really

Question	: i spend all my time looking after my baby i am so tired
Answer		: why do not you ask your husband to help you with it
Pred		: are you ready to go to work

Question	: that is right
Answer		: so i wonder why you want to work for a newspaper
Pred		: just a minute please

Question	: we can go to that new chinese restaurant
Answer		: i suppose i could
Pred		: i do that a new refrigerator

Question	: can you help me out chuck
Answer		: sure what is it yvonne
Pred		: i can not believe this is my hair

Question	: here is the film can i get my pictures tomorrow
Answer		: sure
Pred		: of course it s a reservation is your name

Question	: i believe you are cut out for this posi

In [153]:
bleu_score_1 = 0
bleu_score_2 = 0
bleu_score_3 = 0
bleu_score_4 = 0
bleu_score_all = 0

num_of_rows_calculated = 0

for i, (question, real_answer) in enumerate(zip(test_questions, test_answers)):
  try:
    refs = [real_answer.split(' ')]
    hyp = preds[i].split(' ')

    bleu_score_1 += sentence_bleu(refs, hyp, weights=(1,0,0,0))
    bleu_score_2 += sentence_bleu(refs, hyp, weights=(0,1,0,0))
    bleu_score_3 += sentence_bleu(refs, hyp, weights=(0,0,1,0))
    bleu_score_4 += sentence_bleu(refs, hyp, weights=(0,0,0,1))
    bleu_score_all += sentence_bleu(refs, hyp, weights=(.25,.25,.25,.25))

    num_of_rows_calculated+=1
  except:
    print(f"EXCEPTION {(real_answer, preds[i])}")
    
print(f"Bleu Score 1-gram : {(bleu_score_1/num_of_rows_calculated)}")
print(f"Bleu Score 2-gram : {(bleu_score_2/num_of_rows_calculated)}")
print(f"Bleu Score 3-gram : {(bleu_score_3/num_of_rows_calculated)}")
print(f"Bleu Score 4-gram : {(bleu_score_4/num_of_rows_calculated)}")
print(f"Bleu Score all-gram : {(bleu_score_all/num_of_rows_calculated)}")

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Bleu Score 1-gram : 0.2870749956140811
Bleu Score 2-gram : 0.20974003354699586
Bleu Score 3-gram : 0.18030828101567067
Bleu Score 4-gram : 0.159479184197635
Bleu Score all-gram : 0.16580707793193464


In [None]:
# train_questions = df_train['questions'].values
# train_answers = df_train['answers'].values

# preds = []
# for x in train_questions:
#   preds.append(respond_only(model, str(x), question_tokenizer, answer_tokenizer, device, max_length=17))


# bleu_score_1 = 0
# bleu_score_2 = 0
# bleu_score_3 = 0
# bleu_score_4 = 0
# bleu_score_all = 0

# num_of_rows_calculated = 0

# for i, (question, real_answer) in enumerate(zip(train_questions, train_answers)):
#   try:
#     refs = [real_answer.split(' ')]
#     hyp = preds[i].split(' ')

#     bleu_score_1 += sentence_bleu(refs, hyp, weights=(1,0,0,0))
#     bleu_score_2 += sentence_bleu(refs, hyp, weights=(0,1,0,0))
#     bleu_score_3 += sentence_bleu(refs, hyp, weights=(0,0,1,0))
#     bleu_score_4 += sentence_bleu(refs, hyp, weights=(0,0,0,1))
#     bleu_score_all += sentence_bleu(refs, hyp, weights=(.25,.25,.25,.25))

#     num_of_rows_calculated+=1
#   except:
#     print(f"EXCEPTION {(real_answer, preds[i])}")
    
# print(f"Bleu Score 1-gram : {(bleu_score_1/num_of_rows_calculated)}")
# print(f"Bleu Score 2-gram : {(bleu_score_2/num_of_rows_calculated)}")
# print(f"Bleu Score 3-gram : {(bleu_score_3/num_of_rows_calculated)}")
# print(f"Bleu Score 4-gram : {(bleu_score_4/num_of_rows_calculated)}")
# print(f"Bleu Score all-gram : {(bleu_score_all/num_of_rows_calculated)}")

Bleu Score 1-gram : 0.8657220873515558
Bleu Score 2-gram : 0.8054433932646443
Bleu Score 3-gram : 0.7445173619402672
Bleu Score 4-gram : 0.6733497442123495
Bleu Score all-gram : 0.6907689111667196


In [154]:
respond("Hi")

Me	: Hi
Bot	: what are you planning on do it



In [155]:
respond("I'm going to the beach today, what about you ?")

Me	: I'm going to the beach today, what about you ?
Bot	: i am going to this party



In [156]:
respond("Let's go together then")

Me	: Let's go together then
Bot	: can you want a



In [157]:
respond("No")

Me	: No
Bot	: have you maintained right now it s a the train



In [158]:
respond("Thank you, good bye")

Me	: Thank you, good bye
Bot	: good bye

