In [57]:
import os
import pathlib
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import json
import pickle

import sys
sys.path.append("..")

import tqdm

import numpy as np
import pandas as pd

import tensorflow as tf
from pytorch_transformers import RobertaTokenizer

from src.eqt.datasets import create_test_dataset_for_prediction
from src.eqt.preprocess_data import preprocess_data
from src.eqt.model_qbert import QBERT
from src.eqt.model_utils import create_masks


def get_label_source(row, class_tag):
    if row[class_tag] != '':
        return row['label_source']
    return 'QBERT'


mappings = {
    'type': {
        'Ask about antecedent':  0,
        'Ask about consequence': 1,
        'Ask for confirmation':  2,
        'Irony':                 3,
        'Negative rhetoric':     4,
        'Positive rhetoric':     5,
        'Request information':   6,
        'Suggest a reason':      7,
        'Suggest a solution':    8,
    },
    'intent': {
        'Amplify excitement': 0,
        'Amplify joy':        1,
        'Amplify pride':      2,
        'De-escalate':        3,
        'Express concern':    4,
        'Express interest':   5,
        'Moralize speaker':   6,
        'Motivate':           7,
        'Offer relief':       8,
        'Pass judgement':     9,
        'Support':           10,
        'Sympathize':        11,
    }
}

class_type = "type"
peak_lr = 2e-5
checkpoints_path = f"../models/{class_type}"
data_path = "../data/eqt/quest_df_all_labelled_intents.pickle"
restore_epoch = 3 if class_type == "intent" else 4

In [58]:
# Load model checkpoint
num_layers         = 12
d_model            = 768
num_heads          = 12
dff                = d_model * 4
hidden_act         = "gelu"
dropout_rate       = 0.1
layer_norm_eps     = 1e-5
max_position_embed = 514

tokenizer  = RobertaTokenizer.from_pretrained("roberta-base")
vocab_size = tokenizer.vocab_size

lab_mapping  = mappings[class_type]
pred_mapping = {v: k for k, v in lab_mapping.items()}
num_classes  = len(pred_mapping.keys())

adam_beta_1  = 0.9
adam_beta_2  = 0.98
adam_epsilon = 1e-6

qbert = QBERT(num_layers, 
              d_model, 
              num_heads, 
              dff, 
              hidden_act, 
              dropout_rate,
              layer_norm_eps, 
              max_position_embed, 
              vocab_size, 
              num_classes)
optimizer = tf.keras.optimizers.legacy.Adam(peak_lr, 
                                            beta_1=adam_beta_1, 
                                            beta_2=adam_beta_2,
                                            epsilon=adam_epsilon)
ckpt = tf.train.Checkpoint(model=qbert, optimizer=optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoints_path, max_to_keep=None)
ckpt.restore(ckpt_manager.checkpoints[restore_epoch - 1]).expect_partial()
print('Checkpoint {} restored!!!'.format(ckpt_manager.checkpoints[restore_epoch - 1]))

Checkpoint ../models/type/ckpt-4 restored!!!


In [3]:
# Load dataset
with open(data_path, 'rb') as f:
    quest_df = pickle.load(f)
    quest_df['{}_source'.format(class_type)] = ''
    quest_df['{}_source'.format(class_type)] = quest_df.apply(get_label_source, axis=1, class_tag=class_type)

data = preprocess_data(data_path, class_type, drop_test=False)
data.head()

Reading data from pickle file...


Unnamed: 0,id,utterance_truncated,type,intent
0,hit:0_conv:1-2-0,I remember going to see the fireworks with my ...,,Express interest
2,hit:0_conv:1-4-0,I remember going to see the fireworks with my ...,,Express interest
6,hit:0_conv:1-6-0,I remember going to see the fireworks with my ...,Suggest a reason,Express concern
12,hit:1_conv:2-2-0,it feels like hitting to blank wall when i se...,,Express interest
14,hit:2_conv:4-4-0,I have never cheated on my wife. \n And thats ...,,Express interest


In [4]:
test_dataset = create_test_dataset_for_prediction(tokenizer, data, 32, 256, lab_mapping)

Vocabulary size is 50265.


100%|██████████| 20197/20197 [00:05<00:00, 3395.11it/s]

Created dataset with 20197 examples.





In [5]:
def predict(test_dataset):
    
    y_pred = []
    pred_ids = []
    for inputs in tqdm.tqdm(test_dataset):
        inp, weights, ids = inputs
        enc_padding_mask = create_masks(inp)
        pred_class = qbert(inp, weights, False, enc_padding_mask)
        pred_class = np.argmax(pred_class.numpy(), axis=1)
        y_pred += pred_class.tolist()
        y_pred_lab = [pred_mapping[pred] for pred in y_pred]
        pred_labels = np.array(y_pred_lab)
        pred_ids += [idx.decode('utf-8') for idx in ids.numpy()]

    return pred_labels, pred_ids

In [8]:
predictions, ids = predict(test_dataset)

df_pred = pd.DataFrame({'id': ids, f'{class_type}_prediction': predictions})
df_pred = pd.merge(df_pred, data, on='id', how='left')
df_pred.head()

Unnamed: 0,id,intent_prediction,utterance_truncated,type,intent
0,hit:0_conv:1-2-0,Express interest,I remember going to see the fireworks with my ...,,Express interest
1,hit:0_conv:1-4-0,Express interest,I remember going to see the fireworks with my ...,,Express interest
2,hit:0_conv:1-6-0,Express concern,I remember going to see the fireworks with my ...,Suggest a reason,Express concern
3,hit:1_conv:2-2-0,Express interest,it feels like hitting to blank wall when i se...,,Express interest
4,hit:2_conv:4-4-0,Express interest,I have never cheated on my wife. \n And thats ...,,Express interest


In [11]:
(df_pred[f'{class_type}_prediction'] == df_pred[f"{class_type}"]).mean()

0.9111254146655444

In [14]:
for i in range(3):
    print(df_pred.iloc[i]['utterance_truncated'])

I remember going to see the fireworks with my best friend. It was the first time we ever spent time alone together. Although there was a lot of people, we felt like the only people in the world. 
 Was this a friend you were in love with, or just a best friend?
I remember going to see the fireworks with my best friend. It was the first time we ever spent time alone together. Although there was a lot of people, we felt like the only people in the world. 
 Was this a friend you were in love with, or just a best friend? 
 This was a best friend. I miss her. 
 Where has she gone?
I remember going to see the fireworks with my best friend. It was the first time we ever spent time alone together. Although there was a lot of people, we felt like the only people in the world. 
 Was this a friend you were in love with, or just a best friend? 
 This was a best friend. I miss her. 
 Where has she gone? 
 We no longer talk. 
 Oh was this something that happened because of an argument?


## __Eval Self-chat Dialogues__

### __Dialogue Preprocessing__

We need to truncate dialogues in terms of both __turns__ and __questions__. Each turn will be separated by a newline character. This is how the EQT `create_test_dataset_for_prediction()` function identifies dialogue turns. Further, since the bot may ask more than one questions in each turn, we need to truncate the dialogues multiple times to have the input dialogue end with each question.

In [79]:
dialogues = [
    [
        {
            "role": "patient",
            "text": "Hi Hal, I'm feeling really overwhelmed and confused right now. I used to be so passionate about soccer and my team, but lately I've been feeling really down and disconnected from everything. I'm not sure what's going on or how to get back to feeling like myself again."
        },
        {
            "role": "therapist",
            "text": "It sounds like you're feeling overwhelmed and disconnected from something that used to bring you a lot of joy. Can you tell me more about what's been going on for you lately? What has changed that has made it difficult for you to stay connected to soccer and your team?"
        },
        {
            "role": "patient",
            "text": "Well, I've been dealing with a lot of emotional changes lately. I've been feeling really sad and hopeless, and I don't have the same enthusiasm for activities that I used to. I'm also having trouble connecting with my teammates and I feel like I'm withdrawing from my social circles. I'm just not sure how to make sense of all these emotions and get back to feeling like myself again."
        },
        {
            "role": "therapist",
            "text": "It sounds like you're going through a difficult time and it's understandable that you're feeling overwhelmed. It can be hard to make sense of all these emotions, but it's important to remember that you have the resources within yourself to get through this. What do you think might help you reconnect with your passion for soccer and your team? Is there anything that could help you feel more connected?"
        }
    ],
    [
        {
            "role": "patient",
            "text": "Hi Hal, I'm feeling really discouraged and anxious about my job prospects. I recently graduated from a top program, but I'm having trouble getting employers to even look at my resume. I'm starting to doubt my abilities and feel like I'm not good enough. What can I do to overcome this?"
        },
        {
            "role": "therapist",
            "text": "It sounds like you're feeling overwhelmed and discouraged. It's understandable to feel this way when you're facing a difficult situation. However, it's important to remember that you have the skills and abilities to succeed. What do you think are some of your strengths that could help you in this situation?"
        },
        {
            "role": "patient",
            "text": "I think my biggest strength is my ability to think critically and solve problems. I'm also a great communicator and I'm very organized. I'm confident that these skills can help me stand out to potential employers."
        },
        {
            "role": "therapist",
            "text": "That's great! It sounds like you have a lot of strengths that could be beneficial in this situation. Have you tried reaching out to potential employers directly? This could be a great way to showcase your communication and problem-solving skills."
        }
    ],
    [
        {
            "role": "patient",
            "text": "Hi Hal, I'm feeling lost and lonely now that I'm retired. I've been so focused on my career for so long that I don't know how to find meaning and fulfillment in my post-retirement life. I don't have any children or close friends, and my entire identity was wrapped up in my career. What can I do to find purpose and joy in my life now?"
        },
        {
            "role": "therapist",
            "text": "That sounds like a difficult transition. It's understandable that you feel lost and lonely without your career to define you. What activities have you enjoyed in the past that could bring you joy now? Are there any hobbies or interests that you could explore? What do you think would make your life more meaningful?"
        },
        {
            "role": "patient",
            "text": "I used to enjoy playing golf and tennis, and I used to love traveling. I think exploring new places and cultures would be a great way to find joy and meaning in my life. I also think volunteering or helping others in some way would be a great way to find purpose and fulfillment."
        },
        {
            "role": "therapist",
            "text": "That sounds like a great plan! Traveling and exploring new places can be a great way to find joy and meaning in life. Volunteering or helping others is also a great way to find purpose and fulfillment. Have you considered joining any local volunteer organizations or groups? What other activities do you think could bring you joy?"
        }
    ],
    [
        {
            "role": "patient",
            "text": "Hi Hal, I'm a Vietnam War veteran and I've been struggling with PTSD since I returned home. I've been trying to cope with the memories and emotions associated with the war, but it's been difficult. I'm hoping you can help me find a way to move forward and find peace."
        },
        {
            "role": "therapist",
            "text": "I'm sorry to hear that you're struggling with PTSD. It sounds like you've been through a lot and it's understandable that it's been difficult for you. Can you tell me more about the memories and emotions associated with the war? How have they been affecting your life?"
        },
        {
            "role": "patient",
            "text": "The memories and emotions associated with the war have been overwhelming. I can't seem to shake off the images of the battles and the loss of my best friend. I feel like I'm constantly reliving the trauma and it's been hard to move on with my life. I'm struggling to find peace and I'm hoping you can help me find a way to cope with these memories and emotions."
        },
        {
            "role": "therapist",
            "text": "I understand how difficult it must be to cope with these memories and emotions. It sounds like you're feeling stuck in the past and unable to move forward. Have you tried any strategies to help you cope with the trauma? If not, I can suggest some techniques that may help you find peace."
        }
    ],
    [
        {
            "role": "patient",
            "text": "Hi Hal, I'm feeling really overwhelmed and isolated since moving to the US. I'm struggling to learn a new language and adapt to a different culture, and I'm also facing racial discrimination. It's really affecting my self-esteem and sense of belonging. I'm not sure how to cope with all of this."
        },
        {
            "role": "therapist",
            "text": "I'm sorry to hear that you're feeling overwhelmed and isolated. It sounds like you're facing a lot of challenges right now. Can you tell me more about how you're feeling? What kind of strategies have you been using to cope with the stress of the situation?"
        },
        {
            "role": "patient",
            "text": "I've been trying to stay positive and focus on the things I can control, like learning the language and adapting to the culture. But it's been hard to stay positive when I'm facing so much discrimination. I'm also trying to reach out to people in my community for support, but it's been difficult to make connections."
        },
        {
            "role": "therapist",
            "text": "It sounds like you're doing your best to stay positive and reach out for support. That's great! It can be really hard to stay positive when facing discrimination, so it's important to find ways to take care of yourself. What kind of activities do you find helpful for managing stress and boosting your self-esteem?"
        }
    ],
    [
        {
            "role": "patient",
            "text": "Hi Hal, I'm feeling really overwhelmed and confused right now. I used to be so passionate about soccer and my team, but lately I've been feeling really down and disconnected from them. I'm not sure what's going on or how to get back to feeling like myself again."
        },
        {
            "role": "therapist",
            "text": "That sounds really tough. It sounds like you're feeling disconnected from something that used to bring you a lot of joy. Can you tell me a bit more about what's been going on? What do you think might be causing this disconnection?"
        },
        {
            "role": "patient",
            "text": "I think it might have something to do with my romantic life. I've been in a relationship for the past three months and it's been really intense. I think it's been taking up a lot of my emotional energy and I'm not sure how to balance it with my other commitments, like soccer and school."
        },
        {
            "role": "therapist",
            "text": "I can understand how that could be difficult. It sounds like you're trying to juggle a lot of different things and it can be hard to find the right balance. What do you think would help you to reconnect with soccer and your team? Is there anything that you can do to make it easier to manage your commitments?"
        }
    ],
    [
        {
            "role": "patient",
            "text": "Hi Hal, I'm glad to be here. I've been dealing with depression since I was a teenager, and it's been a struggle to manage my relationships, especially in romantic contexts. I've been married and divorced twice, and I have joint custody of two teenage daughters. I've also struggled with alcohol use disorder in the past, but I haven't had any suicidal thoughts. I'm hoping to find ways to manage my depression and build healthier relationships."
        },
        {
            "role": "therapist",
            "text": "Hi there! It sounds like you have been through a lot and have a lot of challenges to manage. It's great that you are looking for ways to manage your depression and build healthier relationships. Can you tell me more about what you have tried in the past to manage your depression and build healthier relationships? What has worked for you and what hasn't?"
        },
        {
            "role": "patient",
            "text": "In the past, I've tried a few different things to manage my depression. I've been to therapy, taken medication, and tried to exercise regularly. I've also tried to be more mindful and practice self-care. As far as relationships, I've tried to be more open and honest with my partners, and I've tried to be more patient and understanding. Unfortunately, I haven't had much success with any of these strategies, and I'm hoping to find something that works better."
        },
        {
            "role": "therapist",
            "text": "I understand that you have tried a few different strategies to manage your depression and build healthier relationships, but haven't had much success. That can be really frustrating. Can you tell me more about what you think might help you manage your depression and build healthier relationships? What do you think would be the most effective strategies for you?"
        }
    ],
    [
        {
            "role": "patient",
            "text": "Hi Hal, I'm feeling really overwhelmed right now. I'm in college studying computer science, but it's not something I'm passionate about. I'm more interested in oil painting, but my parents want me to pursue this degree. I recently had an argument with them over a failed exam in data structure and it's been really hard for me to cope with. I'm not sure what to do."
        },
        {
            "role": "therapist",
            "text": "I'm sorry to hear that you're feeling overwhelmed. It sounds like you're facing a difficult situation. Can you tell me more about why you're feeling overwhelmed? What is it about the situation that is making it so difficult for you to cope?"
        },
        {
            "role": "patient",
            "text": "I feel like I'm stuck between two paths. I want to pursue oil painting, but I also want to make my parents happy. I'm afraid of disappointing them if I don't pursue the degree, but I'm also afraid of wasting my time if I do. I'm not sure which path to take and it's been really hard for me to make a decision."
        },
        {
            "role": "therapist",
            "text": "It sounds like you're facing a difficult decision. It's understandable that you would feel overwhelmed in this situation. Can you tell me more about why you're feeling so stuck between these two paths? What are the pros and cons of each option?"
        }
    ],
]

In [84]:
from typing import Union, List, Dict, Tuple, Callable


def format_dialogue(
        dialogue: List[Dict[str, str]], 
        sep_char: str = "\n",
        key_content: str = "text"
    ) -> str:
    """Simply concatenate the utterances of dialogue turns with a 
        `sep` character, which defaults to the newline character 
        to conform the EQT provided API."""
    
    ret = sep_char.join(d[key_content] for d in dialogue)
    return ret


def generate_samples_from_dialogue(
        dialogue: List[Dict[str, str]],
        q_predictor: Callable[[str], bool] = None,
        key_content: str = "text",
        key_role: str = "role",
        role_bot: str = "therapist",
    ) -> List[str]:

    from nltk.tokenize import sent_tokenize
    
    q_predictor = q_predictor or (lambda x: x.endswith("?"))

    ret = []
    for i, d in enumerate(dialogue, start=1):
        if d[key_role] == role_bot:
            dialogue_truncated = dialogue[:i]

            # Generate one sample for each question in 
            #   the bot's utterance
            sentences_bot = sent_tokenize(d[key_content])
            for j, s in enumerate(sentences_bot, start=1):
                if q_predictor(s):
                    d_cp = d.copy()
                    d_cp[key_content] = " ".join(sentences_bot[:j])
                    sample = dialogue_truncated[:-1] + [d_cp]
                    ret.append(format_dialogue(sample, key_content=key_content))
    return ret

In [85]:
samples = [generate_samples_from_dialogue(dialogue) for dialogue in dialogues]
samples = [s for ss in samples for s in ss]

df_test = pd.DataFrame({'utterance_truncated': samples}) \
            .reset_index() \
            .rename({'index': 'id'}, axis=1) \
            .astype({'id': str})
df_test

Unnamed: 0,id,utterance_truncated
0,0,"Hi Hal, I'm feeling really overwhelmed and con..."
1,1,"Hi Hal, I'm feeling really overwhelmed and con..."
2,2,"Hi Hal, I'm feeling really overwhelmed and con..."
3,3,"Hi Hal, I'm feeling really overwhelmed and con..."
4,4,"Hi Hal, I'm feeling really discouraged and anx..."
5,5,"Hi Hal, I'm feeling really discouraged and anx..."
6,6,"Hi Hal, I'm feeling lost and lonely now that I..."
7,7,"Hi Hal, I'm feeling lost and lonely now that I..."
8,8,"Hi Hal, I'm feeling lost and lonely now that I..."
9,9,"Hi Hal, I'm feeling lost and lonely now that I..."


In [86]:
test_dataset = create_test_dataset_for_prediction(tokenizer, df_test, 32, 256, lab_mapping)
predictions, ids = predict(test_dataset)

Vocabulary size is 50265.


100%|██████████| 29/29 [00:00<00:00, 1156.48it/s]


Created dataset with 29 examples.


100%|██████████| 1/1 [00:00<00:00,  1.37it/s]


In [87]:
df_pred = pd.DataFrame({'id': ids, f'{class_type}_prediction': predictions})
df_pred = pd.merge(df_pred, df_test, on='id', how='left')
df_pred

Unnamed: 0,id,type_prediction,utterance_truncated
0,0,Request information,"Hi Hal, I'm feeling really overwhelmed and con..."
1,1,Ask about antecedent,"Hi Hal, I'm feeling really overwhelmed and con..."
2,2,Request information,"Hi Hal, I'm feeling really overwhelmed and con..."
3,3,Request information,"Hi Hal, I'm feeling really overwhelmed and con..."
4,4,Request information,"Hi Hal, I'm feeling really discouraged and anx..."
5,5,Suggest a solution,"Hi Hal, I'm feeling really discouraged and anx..."
6,6,Request information,"Hi Hal, I'm feeling lost and lonely now that I..."
7,7,Request information,"Hi Hal, I'm feeling lost and lonely now that I..."
8,8,Request information,"Hi Hal, I'm feeling lost and lonely now that I..."
9,9,Request information,"Hi Hal, I'm feeling lost and lonely now that I..."
