# DEMO: Empathy classification using a pattern classifier

In this notebook, it is possible to use a previously trained contrast-pattern classification algorithm to obtain the empathy level of a conversation between two people. 

In [3]:
import pickle
import pandas as pd
import torch
import os
import sys
import random 
import re
#import classifier
from PBC4cip import PBC4cip
from PBC4cip.core.Evaluation import obtainAUCMulticlass
from PBC4cip.core.Helpers import get_col_dist, get_idx_val

#utilities for database management
import numpy as np
import pandas as pd
from tqdm import tqdm, trange
import os
import argparse

import train_classifier as trainer
import test_classifier as tester
import database_processing_package as data_processer

#relevant classifiers for annotating exchange feature
from classifiers.empathetic_intent import intent_prediction as ip
from classifiers.sentiment import sentiment_prediction as sp
from classifiers.epitome_mechanisms import epitome_predictor as epitome
from classifiers.nrc_vad_lexicon import lexicon_analysis as lexicon
from classifiers.course_grained_emotion import pretrained_32emotions as em32
from classifiers.course_grained_emotion import emotion_reductor as em_red
import database_processing_package as data_processer

from spellchecker import SpellChecker


## Loading utilities and models

First, we will obtain the model, and load all utilities necessary for obtaining the features for a conversation exchange. 

In [45]:
#Relevant directories
current_dir = os.getcwd() #get directory of the repository
#Select an appropriate classification model in the Experiments folder
model_directory = current_dir + '/Experiments/outputs/Experiment '+ str(70) + '/' + 'trained_pbc4cip.sav'


feature2number = {'database_to_classify':0,'intent' : 1, 'sentiment' : 2, 'epitome':3, 'VAD_vectors':4, 'utterance_length':5,
                  '32_emotion_labels':6,'20_emotion_labels':7, 
                  '8_emotion_labels':8, 'emotion_mimicry':9, 'Reduce_empathy_labels':10, 
                  'exchange_number' : 11}

feature_vector = [1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1]
'''
                 [1,#database to pull from 0 = empatheticconversations (old), 1 empatheticexchanges (new)
                  1,#intent
                  1,#sentiment
                  0,#epitome
                  1,#vad lexicon
                  1,#length
                  0,#emotion 32
                  0,#emotion 20
                  1,#emotion 8
                  1,#emotion mimicry
                  1, #reduce empathy labels
                  1 #exchange number
                  ]
'''

if feature_vector[feature2number['database_to_classify']] == 1: 
    database_dir = '/processed_databases/EmpatheticExchanges/EmpatheticExchanges_test.csv'
else: 
    database_dir = '/processed_databases/EmpatheticConversationsExchangeFormat/EmpatheticConversations_ex.csv'

In [70]:

#load intent model
if feature_vector[feature2number['intent']] == 1: 
    empIntSubDir = './classifiers/empathetic_intent/'
    model_intent,tokenizer_intent,device = ip.loadModelTokenizerAndDevice(empIntSubDir) #get model and parameters
#load sentiment model
if feature_vector[feature2number['sentiment']] == 1: 
    empIntSubDir = './classifiers/empathetic_intent/'
    sent_model, sent_tokenzr = sp.loadSentimentModel() #get model and tokenizer
#epitome model is loaded during inference due to the code of its classifier
#load lexicon
if feature_vector[feature2number['VAD_vectors']] == 1:
    lexicon_df, wnl, stp_wrds = lexicon.setup_lexicon('classifiers/nrc_vad_lexicon/BipolarScale/NRC-VAD-Lexicon.txt')
#load emotion classifier with 32 labels for any of the emotion labels options
if (feature_vector[feature2number['32_emotion_labels']] == 1) or (feature_vector[feature2number['20_emotion_labels']] == 1) or (feature_vector[feature2number['8_emotion_labels']] == 1):
    emo32_model, emo32_tokenzr = em32.load32EmotionsModel() #get model and tokenizer
#it is necessary to get the VAD vectors for obtaining emotion mimicry
if feature_vector[feature2number['emotion_mimicry']] == 1:
    lexicon_df, wnl, stp_wrds = lexicon.setup_lexicon('classifiers/nrc_vad_lexicon/BipolarScale/NRC-VAD-Lexicon.txt')


def process_answer(sample_df,control_vector):
    print('processing data....')
    columns_2_keep = []
    if control_vector[feature2number['intent']] == 1: 
        sample_df['utterance'] = str(answer)
        sample_df['is_response'] = 1
        sample_df['empathetic_intent'] = sample_df.apply(data_processer.get_emp_intent_probabilities, axis=1, args = (model_intent,tokenizer_intent,device,'utterance'))
        sample_df[data_processer.intent_labels] = pd.DataFrame(sample_df.empathetic_intent.tolist(),index = sample_df.index)
        sample_df = sample_df.drop(columns=['empathetic_intent','utterance','is_response'])
    if control_vector[feature2number['sentiment']] == 1: 
        sample_df['speaker_sentiment'] = sample_df.apply(data_processer.get_sentiment_probabilities,axis = 1, args = (sent_model,sent_tokenzr,'speaker_utterance')) #apply sentiment label extraction to speaker
        sample_df[['s_negative','s_neutral', 's_positive']] = pd.DataFrame(sample_df.speaker_sentiment.tolist(),index = sample_df.index)
        sample_df['listener_sentiment'] = sample_df.apply(data_processer.get_sentiment_probabilities,axis = 1, args = (sent_model,sent_tokenzr,'listener_utterance')) #apply sentiment label extraction to speaker
        sample_df[['l_negative','l_neutral', 'l_positive']] = pd.DataFrame(sample_df.listener_sentiment.tolist(),index = sample_df.index)
        sample_df = sample_df.drop(columns=['speaker_sentiment','listener_sentiment'])
    if control_vector[feature2number['epitome']] == 1:
        sample_df = epitome.predict_epitome_values('classifiers/epitome_mechanisms/trained_models',sample_df)
    if control_vector[feature2number['VAD_vectors']] == 1:
        sample_df['vad_speaker'] = sample_df['speaker_utterance'].apply(lexicon.get_avg_vad, args = (lexicon_df,wnl,stp_wrds)) 
        sample_df['vad_listener'] = sample_df['listener_utterance'].apply(lexicon.get_avg_vad, args = (lexicon_df,wnl,stp_wrds)) 
        sample_df[['valence_speaker','arousal_speaker','dominance_speaker']] = pd.DataFrame(sample_df.vad_speaker.tolist(),index = sample_df.index)
        sample_df[['valence_listener','arousal_listener','dominance_listener']] = pd.DataFrame(sample_df.vad_listener.tolist(),index = sample_df.index)
        sample_df = sample_df.drop(columns = ['vad_speaker','vad_listener'])
        #columns_2_keep += ['valence_speaker','arousal_speaker','dominance_speaker','valence_listener','arousal_listener','dominance_listener']
    if control_vector[feature2number['utterance_length']] == 1:
        sample_df['s_word_len'] = sample_df['speaker_utterance'].apply(data_processer.get_word_len) 
        sample_df['l_word_len'] = sample_df['listener_utterance'].apply(data_processer.get_word_len) 
    if (control_vector[feature2number['32_emotion_labels']] == 1) or (control_vector[feature2number['20_emotion_labels']] == 1) or (control_vector[feature2number['8_emotion_labels']] == 1):
        sample_df['speaker_emotion'] = sample_df.apply(data_processer.get_emotion_label,axis = 1, args = (emo32_model,emo32_tokenzr,'speaker_utterance')) 
        sample_df['listener_emotion'] = sample_df.apply(data_processer.get_emotion_label,axis = 1, args = (emo32_model,emo32_tokenzr,'listener_utterance')) 
        if (control_vector[feature2number['20_emotion_labels']] == 1): 
            sample_df = em_red.reduce_emotion_labels('speaker_emotion',sample_df)
            sample_df = em_red.reduce_emotion_labels('listener_emotion',sample_df)
        if (control_vector[feature2number['8_emotion_labels']] == 1): 
            sample_df = em_red.reduce_emotion_labels_to_8('speaker_emotion',sample_df)
            sample_df = em_red.reduce_emotion_labels_to_8('listener_emotion',sample_df)
    if control_vector[feature2number['emotion_mimicry']] == 1:
        if(control_vector[4] == 1):
            #get the emotional similarity, if it is more than 0.7 set mimicry to 1
            print('No labels detected, obtaining mimicry through emotional distance using VAD....')
            sample_df['emotional_similarity'] = sample_df.apply(data_processer.get_cosine_similarity,axis = 1) #obtain cosine similarity between valence and arousal vector
            sample_df['mimicry'] = sample_df.apply(lambda x: 1 if x['emotional_similarity'] > 0.7 else 0, axis = 1)
            sample_df = sample_df.drop(columns = ['emotional_similarity'])
        else: 
            #print('Annotating VAD values.....')
            sample_df['vad_speaker'] = sample_df['speaker_utterance'].apply(lexicon.get_avg_vad, args = (lexicon_df,wnl,stp_wrds)) 
            sample_df['vad_listener'] = sample_df['listener_utterance'].apply(lexicon.get_avg_vad, args = (lexicon_df,wnl,stp_wrds)) 
            sample_df[['valence_speaker','arousal_speaker','dominance_speaker']] = pd.DataFrame(sample_df.vad_speaker.tolist(),index = sample_df.index)
            sample_df[['valence_listener','arousal_listener','dominance_listener']] = pd.DataFrame(sample_df.vad_listener.tolist(),index = sample_df.index)
            sample_df = sample_df.drop(columns = ['vad_speaker','vad_listener'])                
            sample_df['emotional_similarity'] = sample_df.apply(data_processer.get_cosine_similarity,axis = 1) #obtain cosine similarity between valence and arousal vector
            sample_df['mimicry'] = sample_df.apply(lambda x: 1 if x['emotional_similarity'] > 0.7 else 0, axis = 1)
            sample_df = sample_df.drop(columns =  ['valence_speaker','arousal_speaker','dominance_speaker','valence_listener','arousal_listener','dominance_listener','emotional_similarity'])
        sample_df['mimicry'] = sample_df['mimicry'].astype('category')
        sample_df['mimicry'] = sample_df['mimicry'].astype('string')
    print('done')
    return sample_df


## Loading database

Next we load load the database and get a random sample of a conversation starter

In [30]:
database = pd.read_csv(current_dir + database_dir)

starting_exchange_db = database[database['exchange_number'] == 1]
starting_exchange_db = starting_exchange_db.reset_index(drop = True)
starting_exchange_db

Unnamed: 0,conv_id,context,prompt,speaker_utterance,listener_utterance,exchange_number,s_negative,s_neutral,s_positive,l_negative,...,acknowledging,encouraging,consoling,sympathizing,suggesting,questioning,wishing,neutral,mimicry,empathy
0,hit:10687_conv:21375,joyful,I was so happy when my mom came to visit me!,I was so happy when my mom came to visit me,Nice! How long has it been since you last saw ...,1,0.001461,0.007063,0.991476,0.004031,...,0.000768,0.000021,0.000023,0.000214,0.000168,0.998566,0.000094,0.000084,1,3
1,hit:1876_conv:3752,confident,I felt confident when I finished my job interv...,I felt confident when I finished my job interv...,did y0u get it ?,1,0.001707,0.047845,0.950448,0.162308,...,0.000078,0.000039,0.000015,0.000080,0.000176,0.999199,0.000088,0.000279,1,2
2,hit:88_conv:176,afraid,I think my job will lay us off.,I think our job might be laying us off.,O No! Are they going to offer you any kind of ...,1,0.668925,0.319458,0.011617,0.507649,...,0.000041,0.000029,0.000023,0.000203,0.000817,0.998379,0.000254,0.000190,1,3
3,hit:8687_conv:17374,anxious,I'm not really sure if I am going to be able t...,I'm not really sure if I am going to be able t...,Why not?,1,0.511960,0.435424,0.052616,0.252442,...,0.000035,0.000023,0.000047,0.000309,0.000683,0.998611,0.000145,0.000091,0,2
4,hit:4346_conv:8693,confident,I am playing an official gig with a band I'm i...,I am playing an official gig with a band I'm i...,Wow congrats! What type of music do you play?,1,0.001250,0.006920,0.991831,0.000848,...,0.000102,0.000031,0.000023,0.001210,0.001222,0.996931,0.000144,0.000224,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458,hit:10410_conv:20820,terrified,I heard some noises outside the house. I thoug...,I heard some noises outside the house. I thoug...,what,1,0.462025,0.509884,0.028091,0.351025,...,0.000229,0.000044,0.000024,0.000432,0.024983,0.972462,0.000456,0.001138,0,3
459,hit:10276_conv:20553,anticipating,I am looking forward to my wedding date!,I am looking forward to my wedding date! Of co...,Oh that is definitely something to be excited ...,1,0.000774,0.007288,0.991938,0.001177,...,0.004093,0.001286,0.000084,0.000022,0.000238,0.000267,0.000287,0.000419,1,3
460,hit:4303_conv:8607,guilty,I yelled at my two-year-old nephew the other d...,I yelled at my two-year-old nephew the other d...,I think it's an adjustment you have to work th...,1,0.956855,0.038004,0.005142,0.489524,...,0.268683,0.000206,0.001094,0.005618,0.002770,0.000403,0.000271,0.720282,1,3
461,hit:11415_conv:22830,apprehensive,When we were deciding to move across the count...,I was really nervous to move across country.,why were you so nervous,1,0.750556,0.228983,0.020461,0.555310,...,0.000892,0.000019,0.000045,0.000227,0.000109,0.994134,0.000171,0.004301,0,3


### loading model

In [49]:
pbc = pickle.load(open(model_directory, 'rb'))

### picking a random conversation starter

In [32]:
len_of_db = len(starting_exchange_db)
index_of_sample = random.randint(0, len_of_db)

sample_text = starting_exchange_db.loc[index_of_sample,'speaker_utterance']

sample_text = re.sub("_comma_", ',', sample_text)

print(f'Prompt: "{sample_text}"') 

Prompt: "Yesterday I failed my physics exam."


### type a response

In [74]:
flag = True
while(flag):
    answer = input("Provide your response: ")
    if answer.lower() == '':
        print('No answer received, please provide a response')
    else:
        flag = False

Provide your response:  I love you I love you I love you you are my world


### process data

In [75]:

data = {'speaker_utterance': [sample_text], 'listener_utterance': [answer]}

df = starting_exchange_db.iloc[[index_of_sample]]
df = df.reset_index(drop=True)

columns_list = starting_exchange_db.columns.to_list()
df.loc[0, 'listener_utterance'] = str(answer)


C = list(set(columns_list) - set(['speaker_utterance','listener_utterance','empathy','exchange_number']))

print(df.iloc[0])

df = df.drop(columns = C)
df = process_answer(df,feature_vector)

df.loc[0, 'listener_utterance']

df = df.drop(columns = ['speaker_utterance', 'listener_utterance'])
df.iloc[0]

conv_id                                              hit:1498_conv:2997
context                                                    disappointed
prompt                I I failed my physics exam yesterday. I was so...
speaker_utterance                   Yesterday I failed my physics exam.
listener_utterance    I love you I love you I love you you are my world
exchange_number                                                       1
s_negative                                                      0.93711
s_neutral                                                      0.058596
s_positive                                                     0.004294
l_negative                                                     0.608533
l_neutral                                                      0.365874
l_positive                                                     0.025593
valence_speaker                                                  -0.021
arousal_speaker                                                 

exchange_number              1
empathy                      3
agreeing              0.001059
acknowledging         0.000569
encouraging           0.000108
consoling             0.000007
sympathizing          0.000215
suggesting            0.000095
questioning           0.000487
wishing               0.001096
neutral               0.996365
s_negative             0.93711
s_neutral             0.058596
s_positive            0.004294
l_negative            0.002831
l_neutral             0.007368
l_positive            0.989802
predictions_ER               1
predictions_IP               0
predictions_EX               0
valence_speaker         -0.021
arousal_speaker         -0.051
dominance_speaker        -0.17
valence_listener         0.811
arousal_listener       -0.0245
dominance_listener      0.3845
s_word_len                   6
l_word_len                  13
mimicry                      0
Name: 0, dtype: object

### pass it to the model and predict value

In [77]:
x_test = df.drop(columns=['empathy'])
y_test = df.drop(columns=x_test.columns)
y_pred = pbc.predict(x_test)
y_pred

                                                                                

[0]