In [3]:
import os
import sys
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import gpt_2_simple as gpt2
import random
import pandas as pd
import numpy as np
import math
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import logging
logging.getLogger('tensorflow').setLevel(logging.FATAL)
import contextlib
import re

sys.path.append("../lib/InferSent")
from models import InferSent
import nltk
nltk.download('punkt')
import spacy
import torch

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



[nltk_data] Downloading package punkt to /home/ryan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
params_model = {'bsize': 64, 'word_emb_dim': 300, 'enc_lstm_dim': 2048, 'pool_type': 'max', 'dpout_model': 0.0, 'version': 2}
infersent = InferSent(params_model)
infersent.load_state_dict(torch.load('../models/encoder/infersent2.pkl'))
infersent.set_w2v_path("../models/fastText/crawl-300d-2M.vec")

nlp = spacy.load("en")
squad_df = pd.read_csv("../corpora/squad-dev-v2.0.csv", index_col=0)

sentences = []

contexts = list(squad_df["contexts"].drop_duplicates())
for context in contexts:
    doc = nlp(context)
    sentences += [sentence.string.strip() for sentence in doc.sents]
    
infersent.build_vocab(sentences, tokenize=True)

Found 18481(/19809) words with w2v vectors
Vocab size : 18481


In [5]:
def cosine_similarity(text1, text2):
    return np.dot(text1, text2)/(np.linalg.norm(text1) * np.linalg.norm(text2))

In [6]:
def split_sentences(paragraph):
    doc = nlp(paragraph)
    return [sentence.string.strip() for sentence in doc.sents]

In [7]:
def cosine_predict(statements, question):
    context_sentences = split_sentences(statements)
    most_similar, highest_sim = "", 0
    for sentence in context_sentences:
        similarity = cosine_similarity(infersent.encode([question])[0], infersent.encode([sentence])[0])
        if similarity > highest_sim:
            most_similar = sentence
            highest_sim = similarity
    return most_similar, highest_sim

In [8]:
profiles = {}
profile_dir = "../corpora/profiles"
for profile in os.listdir(profile_dir):
    df = pd.read_csv(f"{profile_dir}/{profile}")
    character = df.columns.tolist()[0]
    profiles[character] = {}
    for emotion in df.columns.tolist()[1:]:
        profiles[character][emotion] = df[emotion].tolist()[0]

In [9]:
def get_emotional_composite(emotional_profile, response_length):
    composite_amount = random.randint(1,3)
    emotions, probabilities = zip(*emotional_profile.items())
    response_breakdown = [(str(emotion), math.floor(response_length/composite_amount) + int(i < response_length % composite_amount)) for i, emotion in enumerate(np.random.choice(emotions, composite_amount, p=probabilities))]
    return response_breakdown

In [10]:
def generate_composite_response(sess, emotional_profile, conversation, character, response_length=30):
    response = "\n".join([f"{sentence[0]}: {sentence[1]}" for sentence in conversation]) + f"\n{character}:"
    start_offset = len(response)
    response_breakdown = get_emotional_composite(emotional_profile, response_length)
    for emotion, length in response_breakdown:
        gpt2.reset_session(sess)
        sess = gpt2.start_tf_sess()
        gpt2.load_gpt2(sess, run_name=f"{emotion}_run1")
        response = gpt2.generate(
            sess,
            length=length,
            temperature=0.7,
            prefix=response,
            nsamples=1,
            batch_size=1,
            run_name=f"{emotion}_run1",
            return_as_list=True
        )[0]
    return re.split(r"[a-z A-Z0-9]+:", response[start_offset:])[0].strip().split("\n")[0]

In [11]:
def generate_character_response(sess, profile, conversation, character, response_length=30):
    seed = "\n".join([f"{sentence[0]}: {sentence[1]}" for sentence in conversation]) + f"\n{character}:"
    gpt2.reset_session(sess)
    sess = gpt2.start_tf_sess()
    gpt2.load_gpt2(sess, run_name=f"{character}_run1")
    response = gpt2.generate(
        sess,
        length=response_length,
        temperature=0.7,
        prefix=seed,
        nsamples=1,
        batch_size=1,
        run_name=f"{character}_run1",
        return_as_list=True
    )[0][len(seed):]
    return re.split(r"[a-z A-Z0-9]+:", response)[0].strip().split("\n")[0]

In [12]:
def generate_holistic_model_response(sess, conversation, character, response_length=25):
    seed = "\n".join([f"{sentence[0]}: {sentence[1]}" for sentence in conversation]) + f"\n{character}:"
    gpt2.reset_session(sess)
    sess = gpt2.start_tf_sess()
    gpt2.load_gpt2(sess, run_name=f"full_model_run1")
    response = gpt2.generate(
        sess,
        length=response_length,
        temperature=0.7,
        prefix=seed,
        nsamples=1,
        batch_size=1,
        run_name=f"{character}_run1",
        return_as_list=True
    )[0][len(seed):]
    return re.split(r"[a-z A-Z0-9]+:", response)[0].strip()

In [16]:
def start_conversation(
    conversation=[], 
    scene=["harry", "user", "environment"], 
    characters=["harry", "ron", "hermione", "snape", "albus dumbledore", "tom riddle", "hagrid", "user", "environment"],
    character_addition_prob=0.2,
    character_removal_prob=0.25,
    env_model=generate_character_response, 
    char_model=generate_composite_response, 
    length=10,
    print_scene=False
):
    [print(f"{character}: {statement}") for character, statement in conversation]
    sess = gpt2.start_tf_sess()
    for i in range(length):
        if i and random.random() < character_removal_prob:
            del scene[scene.index(random.choice(scene))]
        if i and random.random() < character_addition_prob:
            scene.append(random.choice([character for character in characters if character not in scene]))
        if not scene:
            break
        with open(os.devnull, "w") as f, contextlib.redirect_stdout(f):
            character = random.choice([character for character in scene if not conversation or character != conversation[-1][0]])
            if character == "user":
                print("user: ", end="")
                response = input("user: ")
            elif character == "environment":
                response = env_model(sess, profiles[character], conversation, "environment")
            else:
                response = char_model(sess, profiles[character], conversation, character)
        if character != "user":
            print(f"{character.capitalize()}: {response}{' - ' + str(scene) if print_scene else ''}")
        conversation.append((character, response))
        if len(conversation) > 5:
            predicted_response, _ = cosine_predict("\n".join([statement for _, statement in conversation[:-1]]), response)
            conversation.append((character, predicted_response))

In [14]:
scene_setup = [
    ("environment", "Diagon Alley was completely empty because of the coronavirus."),
    ("hagrid", "Where is everybody?"),
    ("albus dumbledore", "Hopefully at home."),
]

In [15]:
start_conversation(
    conversation=scene_setup, 
    scene=["harry", "ron", "hermione"], 
    characters=["harry", "ron", "hermione", "snape", "albus dumbledore", "tom riddle", "hagrid", "user", "environment"],
    character_addition_prob=0.2,
    character_removal_prob=0.25,
    env_model=generate_character_response, 
    char_model=generate_character_response, 
    length=10,
    print_scene=True
)

environment: Diagon Alley was completely empty because of the coronavirus.
hagrid: Where is everybody?
albus dumbledore: Hopefully at home.
Hermione: We still don't know for sure what it is doing, but it's causing widespread illness and deaths. We'll have to be extra careful. - ['harry', 'ron', 'hermione']
Ron: You don't think St. Nicholas will turn out to be the monster, do you? - ['harry', 'ron']
Harry: I think if he did, he'd be dead. - ['harry', 'ron']
Tom riddle: I knew it wouldn't be safe to open the Chamber again while I was still at school. So I decided to leave behind a diary, preserving my - ['harry', 'tom riddle']
Harry: You don't think St. Nicholas will turn out to be the monster, do you? - ['harry']


IndexError: Cannot choose from an empty sequence