<a href="https://colab.research.google.com/github/Arsuh/Seq2Seq-Chatbot/blob/master/Seq2SeqV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
!git clone https://github.com/Arsuh/Seq2Seq-Chatbot

fatal: destination path 'Seq2Seq-Chatbot' already exists and is not an empty directory.


In [0]:
from __future__ import absolute_import, division, print_function
%tensorflow_version 2.x
import tensorflow as tf

from google.oauth2 import service_account
from google.cloud import bigquery

import matplotlib.pyplot as plt
import numpy as np
import json
import re
import os
import time
import random
from shutil import copyfile

In [0]:
%cd Seq2Seq-Chatbot/
from Vocabulary import Vocabulary
from MainModel import Encoder, Decoder, loss_fnc

drive_main_path = '/content/drive/My Drive/Colab Files/Chatbot/'
main_path = '/content/Seq2Seq-Chatbot/'
hparams_path = main_path + 'hyper_parameters_std.json'
#hparams_path = main_path + 'hyper_parameters_test.json'
ckpt_path = drive_main_path + 'ckeckpoints/'
ckpt_prefix = os.path.join(ckpt_path, 'ckpt')

In [0]:
def initialize_model(hparams, from_indexed=True, create_ds=True, de_tokenize=False, verbose=False):
    start = time.time()
    if from_indexed:
        v = Vocabulary.create_inputs_from_indexed(service_account.Credentials.from_service_account_file(hparams['CREDENTIALS_PATH']),
                                                  max_len=hparams['MAX_LEN'],
                                                  vocab=hparams['VOCAB_DB'],
                                                  limit_main=hparams['NUM_EXAMPLES'],
                                                  limit_vocab=hparams['VOCAB'],
                                                  verbose=True)  # <--- False
    else:
        v = Vocabulary.create_inputs(service_account.Credentials.from_service_account_file(hparams['CREDENTIALS_PATH']),
                                     max_len=hparams['MAX_LEN'],
                                     vocab=hparams['VOCAB_DB'],
                                     limit_main=hparams['NUM_EXAMPLES'],
                                     limit_vocab=hparams['VOCAB'],
                                     verbose=True)  # <--- False

    if de_tokenize:
        v.de_tokenize_data()
    if verbose:
        print('Vocabulary created!')

    if create_ds:
        dataset = create_dataset(v, hparams['BATCH_SIZE'], hparams['NUM_EXAMPLES'])

    enc = Encoder(hparams['VOCAB'], hparams['BATCH_SIZE'], hparams['EMBEDDING'],
                  hparams['RNN1'], hparams['RNN2'], hparams['RNN_TYPE'], hparams['BIDIRECTIONAL'], hparams['MERGE_MODE'], hparams['DROPOUT_ENC'])
    dec = Decoder(hparams['VOCAB'], hparams['BATCH_SIZE'], hparams['EMBEDDING'],
                  hparams['RNN1'], hparams['RNN2'], hparams['RNN_TYPE'], hparams['DROPOUT_DEC'])
    opt = tf.keras.optimizers.Adam(learning_rate=hparams['LR'])
    #opt = tf.keras.optimizers.SGD(learning_rate=hparams['LR'], momentum=0.5)

    print('Time to initialize model {:.2f} min | {:.2f} hrs\n'.format(
        (time.time()-start)/60, (time.time()-start)/3600))

    if create_ds:
        return v, dataset, enc, dec, opt
    return v, enc, dec, opt

def load_hyper_params(path):
    with open(path, 'r') as f:
        data = json.load(f)

    for k in data:
        if data[k] == 'None':
            data[k] = None
    return data

def create_dataset(v, batch_size, buffer_size):
    v.tokenize_data()
    dataset = tf.data.Dataset.from_tensor_slices((np.array(v.inp, dtype=np.int32), np.array(v.tar, dtype=np.int32)))
    dataset = dataset.shuffle(buffer_size)
    dataset = dataset.batch(batch_size, drop_remainder=True)
    return dataset


def save_plot(path, plt_loss):
    if not os.path.isdir(path):
        os.mkdir(path)
    with open(path + 'plot.txt', 'a', encoding='utf-8') as f:
        f.write(str(plt_loss[-1].numpy()) + '\n')

In [0]:
def evaluate(text, v, enc, dec, max_len):
    inp = np.array(v.preproc(text), dtype=np.float32)
    inp = tf.convert_to_tensor(inp)
    inp = tf.expand_dims(inp, axis=0)

    attention_plot = np.zeros((max_len, max_len))

    result = ''
    h1, h2 = enc.initialize_hidden(batch=1)

    enc_out, h1, h2 = enc.call(inp, h1, h2, training=False)
    dec_inp = tf.expand_dims([1], axis=0)  # SOS
    result += '<SOS> '
    for i in range(v.max_len):
        pred, h1, h2, attention_weights = dec.call(dec_inp, enc_out, h1, h2, training=False)

        pred = tf.nn.softmax(pred, axis=1)
        pred_id = tf.argmax(pred[0]).numpy()
        #pred_id = tf.random.categorical(pred, num_samples=1)[-1, 0].numpy()

        attention_weights = tf.reshape(attention_weights, (-1, ))
        attention_plot[i] = attention_weights.numpy()

        result += v.idx2word[pred_id] + ' '
        if pred_id == 2:  # EOS
            return result, text, attention_plot

        dec_inp = tf.expand_dims([pred_id], axis=0)

    return result[:-1], text, attention_plot

In [0]:
def train_step(hparams, inp, tar, enc_h1, enc_h2):
    global enc, dec, opt
    loss = 0
    with tf.GradientTape() as tape:
        enc_out, enc_h1, enc_h2 = enc(inp, enc_h1, enc_h2)
        dec_h1, dec_h2 = enc_h1, enc_h2
        dec_inp = tf.expand_dims([1]*hparams['BATCH_SIZE'], 1)

        for t in range(1, tar.shape[1]):
            pred, dec_h1, dec_h2, _ = dec(dec_inp, enc_out, dec_h1, dec_h2)

            loss += loss_fnc(tar[:, t], pred)
            dec_inp = tf.expand_dims(tar[:, t], 1)

    batch_loss = (loss/int(tar.shape[1]))
    variables = enc.trainable_variables + dec.trainable_variables
    gradients = tape.gradient(loss, variables)
    opt.apply_gradients(zip(gradients, variables))

    return batch_loss


test_sentences = ['Hello!',
                  'How are you?',
                  'Tomorow is my birthday, but I keep feeling sad...',
                  'What is your name sir?',
                  'Artificial intelligence will take over the world some day!',
                  'Can you please bring me some water?',
                  'Come on! This is the easiest thing you are supposed to do!',
                  'My name is Thomas!']


def train(hparams, saving=True, saving_step=1, plot_saving=True, verbose=True):
    global v, dataset, enc, dec, opt
    if hparams['NUM_EXAMPLES'] == None:
        N_BATCH = hparams['MAX_EXAMPLES'] // hparams['BATCH_SIZE']
    else:
        N_BATCH = hparams['NUM_EXAMPLES'] // hparams['BATCH_SIZE']

    if saving:
        checkpoint = tf.train.Checkpoint(optimizer=opt, encoder=enc, decoder=dec)
        if not os.path.isdir(ckpt_path): os.mkdir(ckpt_path)
        copyfile(hparams_path, ckpt_path + 'hparams.json')

    plt_loss = []
    for epoch in range(1, hparams['EPOCHS']+1):
        h1, h2 = enc.initialize_hidden()

        total_loss = 0
        for (batch, (inp, tar)) in enumerate(dataset.take(N_BATCH)):
            batch_time = time.time()
            batch_loss = train_step(hparams, inp, tar, h1, h2)
            total_loss += batch_loss

            if batch % 300 == 0:
              print('  >>> Epoch: {} | Batch: {}\\{} | Loss: {:.4f} | Time: {:.2f} sec'
                    .format(epoch, batch+1, N_BATCH, batch_loss, time.time() - batch_time))

        print('Epoch: {} | Loss: {:.4f}'.format(epoch+1, total_loss/N_BATCH))
        plt_loss.append(total_loss/N_BATCH)

        sentences = random.choices(test_sentences, k=2)
        result1, text1, _ = evaluate(sentences[0], v, enc, dec, hparams['MAX_LEN'])
        result2, text2, _ = evaluate(sentences[1], v, enc, dec, hparams['MAX_LEN'])
        print(50*'+')
        print(text1)
        print(result1)
        print(text2)
        print(result2)
        print(50*'+')

        if saving and epoch%saving_step == 0:
            print('Saving model...')
            checkpoint.save(file_prefix=ckpt_prefix)
        if plot_saving:
            save_plot(ckpt_path, plt_loss)
    return plt_loss

In [0]:
hparams = load_hyper_params(hparams_path)

In [0]:
v, dataset, enc, dec, opt = initialize_model(hparams, from_indexed=True, create_ds=True, de_tokenize=False)

In [0]:
#'''
checkpoint = tf.train.Checkpoint(optimizer=opt, encoder=enc, decoder=dec)
#checkpoint.restore(tf.train.latest_checkpoint(ckpt_path))
checkpoint.restore(drive_main_path + 'checkpoints-final-1/' + 'ckpt-2')


result, _, _ = evaluate(u'The world is changing once again!', v, enc, dec, hparams['MAX_LEN'])
print(result)
#'''

In [0]:
plt_loss = train(hparams, saving=True, saving_step=5, plot_saving=True)
del dataset

plt.plot(plt_loss)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()