# DEMO for empathy classification

In this demo, we classify the empathy in text exchanges. 

We provide the most supported pattern for the classification of the exchange. 


# Set up

In [1]:
import pickle
import pandas as pd
import numpy as np
from PBC4cip import PBC4cip
import os
import sys
import random 
import re

from sklearn.metrics import accuracy_score


import database_processing_package as data_processer
import exchange_processing as exchange_processer
from classifiers.course_grained_emotion import emotion_reductor as em_red


### Load main classification model

In [22]:
#Relevant directories
current_dir = os.getcwd() #get directory of the repository

#Database
database_dir = '/processed_databases/EmpatheticExchanges/EmpatheticExchanges.csv'
test_database_dir = '/processed_databases/EmpatheticExchanges/test.csv'
database = pd.read_csv(current_dir + database_dir)
test_db = pd.read_csv(current_dir + test_database_dir)

#Select an appropriate classification model in the Experiments folder
#number_of_model =90 #The number of the experiment for the model of interest

number_of_model = 70 #Best performing model without exchange number (broader pattern recognition)
model_directory = current_dir + '/Experiments/outputs/Experiment '+ str(number_of_model) + '/' + 'trained_pbc4cip.sav'
pbc = pickle.load(open(model_directory, 'rb'))

#Select features relevant for the model
att_lst = [attribute[0] for attribute in pbc.dataset.Attributes]
print('Features for the model: ')
for attribute in att_lst:
    print(attribute, end = ' ')


Features for the model: 
exchange_number s_negative s_neutral s_positive l_negative l_neutral l_positive predictions_ER predictions_IP predictions_EX valence_speaker arousal_speaker dominance_speaker valence_listener arousal_listener dominance_listener s_word_len l_word_len agreeing acknowledging encouraging consoling sympathizing suggesting questioning wishing neutral mimicry 

### Load supplementary classification models

In [3]:
flag_array, model_components = exchange_processer.load_supplementary_classifiers(att_lst)

## Inference 

In [4]:
len(model_components)

14

In [5]:

processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,'I hate when my wife and son are away from me', "I get that you're feeling bad but do not let it get to you. I'm sure you'll be extra happy once they are here",model_components)
recommendation = exchange_processer.get_recommentation(pbc, processed_exchange,'listener')
judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,'I hate when my wife and son are away from me', "I get that you're feeling bad but do not let it get to you. I'm sure you'll be extra happy once they are here",model_components,"listener")


                                                                                

Empathy prediction for listener: 3/3




In [6]:
#judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,'I hate when my wife and son are away from me', "Aww that is sweet You are a good dad",model_components,"listener")
#judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,'My little cousin  was nice and gave me a present!', "Aww cool! was the ocasion special?",model_components,"listener")
#judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,"Does it bother you when your friends have all dates and you're single? It makes me feel inadecuate", "Yeah, it really sucks loneliness is no easy thing to go though",model_components,"listener")
#judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,"Does it bother you when your friends have all dates and you're single? It makes me feel inadecuate", "Yeah, you are ",model_components,"listener")
#judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,"Does it bother you when your friends have all dates and you're single? It makes me feel inadecuate", "Oh yeah, it bothers me a lot too! ",model_components,"listener")



#judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,"I was so mad earlier someone hit my car and just drove off!", "Fucking really? That sucks so much I would be pissed! ",model_components,"listener")
#judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,"I was so mad earlier someone hit my car and just drove off!", "That's what you get lmao! ",model_components,"listener")
#judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,"I was so mad earlier someone hit my car and just drove off!", "Aww man, that's not ideal Did you get the plates?",model_components,"listener")
#judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,"I was so mad earlier someone hit my car and just drove off!", "Was it a bad accident?",model_components,"listener")


judge_exchange = exchange_processer.judge_exchange(pbc,flag_array,att_lst,"I was so mad earlier someone hit my car and just drove off!", "Was it a bad accident?",model_components,"listener")

                                                                                

Empathy prediction for listener: 2/3




print("First exchange")
exchange_processer.judge_exchange(pbc,flag_array,att_lst,"i loved taking care of my sisters pet", "Huh, is that so",model_components, 'listener')
exchange_processer.judge_exchange(pbc,flag_array,att_lst,"i loved taking care of my sisters pet", "It's cool that you loved that",model_components, 'listener')
print("Second exchange")
exchange_processer.judge_exchange(pbc,flag_array,att_lst,"Yeah! I have loved animals since then especially dogs", "Dogs are very cute. Cats too",model_components, "listener")
print("Third exchange")
exchange_processer.judge_exchange(pbc,flag_array,att_lst,"Oh my gosh yes. Cats are so fluffy and cuddly", "They are! I love petting them", model_components,"listener")
print("Fourth exchange")
exchange_processer.judge_exchange(pbc,flag_array,att_lst,"You know i really enjoy having pets they bring a new life into an empty feeling house", "Yes I only have one cat. How about you?",model_components, "listener")
print("Fifth exchange")
exchange_processer.judge_exchange(pbc,flag_array,att_lst,"we have a cat, a dog, a bunny, and a betta fish!", "Those are many pets", model_components, "listener")
exchange_processer.judge_exchange(pbc,flag_array,att_lst,"we have a cat, a dog, a bunny, and a betta fish!", "Those are many pets, how do you manage?",model_components, "listener")
print("Sixth exchange")
exchange_processer.judge_exchange(pbc,flag_array,att_lst,"It is a lot of work. But their little faces are so worth it", "Yes they are I bet you feel proud",model_components, "listener")


In [22]:
exchange_processer.judge_exchange(pbc,flag_array,att_lst,"I was really nervous to move across country.", "why were you?",model_components, 'listener')

exchange_processer.judge_exchange(pbc,flag_array,att_lst,"Knew noone where we were moving and also far away from my mother_comma_ who is getting old.", "no one knew that you were moving?",model_components, 'listener')

exchange_processer.judge_exchange(pbc,flag_array,att_lst,"oh sorry_comma_ we knew no one where we were moving to", "oh_comma_ that has to be scary",model_components, 'listener')

exchange_processer.judge_exchange(pbc,flag_array,att_lst,"Amazingly lol. But here I am.", "here you are, killing it",model_components, 'listener')


                                                                          

Empathy prediction for listener: 1/3
Low empathy detected!
interjection by Haru: Suggestion: Respond with sympathy


                                                                          

Empathy prediction for listener: 1/3
Low empathy detected!
interjection by Haru: Suggestion: My sensors say that we should encourage them about their feelings listener


                                                                          

Empathy prediction for listener: 3/3


                                                                          

Empathy prediction for listener: 1/3
Low empathy detected!
interjection by Haru: Suggestion: My sensors say that we should encourage them about their feelings listener


False

In [None]:
processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,'I hate when my wife and son are away from me', "I get that you're feeling bad but do not let it get to you. I'm sure you'll be extra happy once they are here",model_components)
recommendation = exchange_processer.get_recommentation(pbc, processed_exchange,'listener')

### testing new VA vectors

In [28]:
processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,'I hate when my wife and son are away from me', "I get that you're feeling bad but do not let it get to you. I'm sure you'll be extra happy once they are here",model_components)



conversations = ['body', 'comic', 'faith', 'joy', 'lottery', 'manager', 'racoon', 'sister', 'morning', 'furiosa']

#conversations = ['body', 'comic', 'joy', 'lottery', 'racoon', 'sister', 'morning', 'furiosa']


for weights in range(1,11,1):
    print(weights/10)

    #conversations =  ['faith']
    empathy_truth = []
    predictions_text = []
    predictions_video = []


    for i in range(len(conversations)):
        convo_idx = i
        convos_to_test = pd.read_csv(current_dir + '/useful_database_subsets/video_exchanges/convos_to_test.csv')

        #print(convos_to_test.columns)

        df_convo = convos_to_test[convos_to_test['conversation'] == conversations[convo_idx]].reset_index()

        #print(convos_to_test[convos_to_test['conversation'] == conversations[convo_idx]])


        string_arr = [
                    [df_convo.loc[0,'speaker_utterance'],df_convo.loc[0,'listener_utterance']],
                    [df_convo.loc[1,'speaker_utterance'],df_convo.loc[1,'listener_utterance']]
                    ]



        exchanges_df = pd.DataFrame(columns=processed_exchange.columns)


        for ex in string_arr:
            #print(ex)
            single_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst, ex[0], ex[1])
            single_exchange['pred_text'] = y_pred
            exchanges_df = pd.concat([exchanges_df, single_exchange])

        exchanges_df = exchanges_df.reset_index(drop= True)

        conversation = []
        video_av_values = pd.read_csv(current_dir + '/useful_database_subsets/video_exchanges/exchanges/' + 'exchanges_'+str(conversations[convo_idx])+'.csv')
        ratio_v_t = weights/10
        exchanges_df['valence_speaker'] = (1-ratio_v_t)*exchanges_df['valence_speaker'] + (ratio_v_t)*video_av_values['valence_right']
        exchanges_df['arousal_speaker'] = (1-ratio_v_t)*exchanges_df['arousal_speaker'] + (ratio_v_t)*video_av_values['arousal_right'] 
        exchanges_df['valence_listener'] = (1-ratio_v_t)*exchanges_df['valence_listener'] + (ratio_v_t)*video_av_values['valence_left']
        exchanges_df['arousal_listener'] = (1-ratio_v_t)*exchanges_df['arousal_listener'] + (ratio_v_t)*video_av_values['arousal_left'] 


        #print(exchanges_df[['valence_speaker', 'valence_listener', 'arousal_listener', 'arousal_listener', 'pred_text']])

        #mimicry
        exchanges_df['emotional_similarity'] = exchanges_df.apply(data_processer.get_cosine_similarity,axis = 1) 
        exchanges_df['mimicry'] = exchanges_df.apply(lambda x: 1 if x['emotional_similarity'] > 0.7 else 0, axis = 1)
        exchanges_df = exchanges_df.drop(columns = ['emotional_similarity'])


        exchanges_df['pred_video'] = pbc.predict(exchanges_df)
        exchanges_df['pred_video'] = exchanges_df['pred_video'] + 1
        exchanges_df['new_empathy_reduced'] = df_convo['new_empathy_reduced']
        exchanges_df['empathy'] = df_convo['empathy']
        exchanges_df.to_csv(current_dir + '/useful_database_subsets/video_exchanges/'+'exchange_predictions_'+str(conversations[convo_idx])+'.csv')
        exchanges_df

        empathy_truth.append(exchanges_df.loc[0,'new_empathy_reduced'])
        empathy_truth.append(exchanges_df.loc[1,'new_empathy_reduced'])
        predictions_text.append(exchanges_df.loc[0,'pred_text'])
        predictions_text.append(exchanges_df.loc[1,'pred_text'])
        predictions_video.append(exchanges_df.loc[0,'pred_video'])
        predictions_video.append(exchanges_df.loc[1,'pred_video'])

    for i in range(len(predictions_text)):
        predictions_text[i] = predictions_text[i].astype(np.int64)

    print(empathy_truth)
    print(predictions_text)
    print(predictions_video)

    acc_text = accuracy_score(empathy_truth, predictions_text)
    acc_video = accuracy_score(empathy_truth, predictions_video)

    print(acc_text)
    print(acc_video)

    for i in range(len(empathy_truth)):
        if empathy_truth[i] != predictions_video[i] and empathy_truth[i] == predictions_text[i]:
            print(i+1)    

    print()


                                                                          

0.1


                                                                          

[3, 3, 2, 2, 2, 2, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3]
[2, 3, 1, 2, 2, 3, 1, 2, 1, 3, 3, 1, 3, 2, 1, 2, 3, 2, 1, 3]
[3, 3, 3, 2, 3, 3, 2, 2, 1, 3, 3, 3, 3, 3, 1, 2, 3, 2, 1, 3]
0.45
0.4
5
7
12
0.2


                                                                          

[3, 3, 2, 2, 2, 2, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3]
[2, 3, 1, 2, 2, 3, 1, 2, 1, 3, 3, 1, 3, 2, 1, 2, 3, 2, 1, 3]
[3, 3, 3, 2, 3, 3, 2, 2, 1, 3, 3, 3, 3, 3, 1, 2, 3, 2, 1, 3]
0.45
0.4
5
7
12
0.3


                                                                          

[3, 3, 2, 2, 2, 2, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3]
[2, 3, 1, 2, 2, 3, 1, 2, 1, 3, 3, 1, 3, 2, 1, 2, 3, 2, 1, 3]
[3, 3, 3, 2, 3, 3, 2, 2, 1, 3, 3, 3, 3, 3, 1, 2, 3, 2, 1, 3]
0.45
0.4
5
7
12
0.4


                                                                          

[3, 3, 2, 2, 2, 2, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3]
[2, 3, 1, 2, 2, 3, 1, 2, 1, 3, 3, 1, 3, 2, 1, 2, 3, 2, 1, 3]
[3, 3, 3, 2, 3, 3, 2, 2, 1, 3, 3, 3, 3, 3, 1, 2, 3, 2, 1, 3]
0.45
0.4
5
7
12
0.5


                                                                          

[3, 3, 2, 2, 2, 2, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3]
[2, 3, 1, 2, 2, 3, 1, 2, 1, 3, 3, 1, 3, 2, 1, 2, 3, 2, 1, 3]
[3, 3, 3, 2, 3, 3, 2, 2, 1, 3, 3, 3, 3, 3, 1, 2, 3, 2, 1, 3]
0.45
0.4
5
7
12
0.6


                                                                          

[3, 3, 2, 2, 2, 2, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3]
[2, 3, 1, 2, 2, 3, 1, 2, 1, 3, 3, 1, 3, 2, 1, 2, 3, 2, 1, 3]
[3, 3, 3, 2, 3, 3, 2, 2, 1, 3, 3, 3, 3, 3, 1, 2, 3, 2, 1, 3]
0.45
0.4
5
7
12
0.7


                                                                          

KeyboardInterrupt: 

In [22]:
from sklearn.metrics import accuracy_score

acc_text = accuracy_score(empathy_truth, predictions_text)
acc_video = accuracy_score(empathy_truth, predictions_video)

print(acc_text)
print(acc_video)

for i in range(len(empathy_truth)):
    if empathy_truth[i] != predictions_video[i] and empathy_truth[i] == predictions_text[i]:
        print(i+1)

conversations = ['body', 'comic', 'faith', 'joy', 'lottery', 'manager', 'racoon', 'sister', 'morning', 'furiosa']


0.45
0.35
5
7
12


In [159]:
processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,"Knew no one where we were moving_comma_ and also far away from my mother_comma_ who is getting old.", "no one knew that you were moving?",model_components)
print(y_pred)
print(processed_exchange)
processed_exchange.loc[0,'valence_speaker'] = -0.0615155920987168
processed_exchange.loc[0,'arousal_speaker'] = 0.0384891806771198
processed_exchange.loc[0,'valence_listener'] = -0.0883875858710437
processed_exchange.loc[0,'arousal_listener'] = -0.0113459913457026
processed_exchange['emotional_similarity'] = processed_exchange.apply(data_processer.get_cosine_similarity,axis = 1) 
processed_exchange['mimicry'] = processed_exchange.apply(lambda x: 1 if x['emotional_similarity'] > 0.7 else 0, axis = 1)
processed_exchange = processed_exchange.drop(columns = ['emotional_similarity'])
print(processed_exchange)
#print(processed_exchange)


                                                                          

1
   s_negative  s_neutral  s_positive  l_negative  l_neutral  l_positive  \
0    0.574606   0.387071    0.038323    0.375899   0.594197    0.029904   

   predictions_ER  valence_speaker  arousal_speaker  valence_listener  ...  \
0               0         0.301333        -0.084667             0.021  ...   

   agreeing  acknowledging  encouraging  consoling  sympathizing  suggesting  \
0  0.000062       0.000078     0.000021   0.000021      0.000109    0.000099   

   questioning   wishing   neutral  mimicry  
0     0.999338  0.000085  0.000187        1  

[1 rows x 23 columns]
   s_negative  s_neutral  s_positive  l_negative  l_neutral  l_positive  \
0    0.574606   0.387071    0.038323    0.375899   0.594197    0.029904   

   predictions_ER  valence_speaker  arousal_speaker  valence_listener  ...  \
0               0        -0.061516         0.038489         -0.088388  ...   

   agreeing  acknowledging  encouraging  consoling  sympathizing  suggesting  \
0  0.000062       0.000078



In [45]:
processed_exchange, y_pred  = predict_exchange_empathy(pbc, flag_array, 1, att_lst, 'How are you?', "Doing pretty good, how about yourself?")
print(y_pred)
print(processed_exchange)

NameError: name 'predict_exchange_empathy' is not defined

In [None]:
arguments = [pbc, flag_array, 1, att_lst, 'How are you?', "Doing pretty good, how about yourself?"]
arguments.extend(model_components)
processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy_source(*arguments)
print(y_pred)
print(processed_exchange)

In [None]:
def get_input():
    bad_input_flag = True
    while bad_input_flag:
        utterance = input("Provide input: ")
        if utterance.lower() == '':
            print('Please provide valid input')
        else:
            bad_input_flag = False    
    return utterance 


In [None]:
judge_exchange(pbc,flag_array,att_lst,'I hate when my wife and son are away from me', "Oh ok. ","listener")
judge_exchange(pbc,flag_array,att_lst,'I hate when my wife and son are away from me', "I've never been in that situation but that is understandable. ","listener")


# Inference

## Examples of inference and recommendations

In [None]:

judge_exchange(pbc,flag_array,att_lst,'finally got my house, I do not have to deal with apartment living anymore', "apartments are ok","listener")

judge_exchange(pbc,flag_array,att_lst,'finally got my house, I do not have to deal with apartment living anymore', "apartments are ok, you shouldn't knock them","listener")

judge_exchange(pbc,flag_array,att_lst,'finally got my house, I do not have to deal with apartment living anymore', "That's great I love living in my apartment but I'm happy for you","listener")



## Automatic conversation loop

In [None]:
spkr = ["I cannot wait for the newest Pokemon game, it looks amazing to me!", "I see your point, but I still think it is fun",'abortsequence']
lstnr = ['pokemon is just ok',"Oh well it's not bad. I had fun with pokemon when I was 10", "I relate to that, I listen to old songs from my childhood",'abortsequence']

speaker_utterances = []
listener_utterances = []

conversation_end = False
#For a short conversation 
for i in range(2):    
    if i>0:
        j = i
        while not good_speaker:
            prompt = spkr[j]
            print(f"Speaker turn: {prompt}")
            print('Speaker_empathy')
            good_speaker = judge_exchange(pbc, flag_array, att_lst, listener_utterances[i-1],prompt,'speaker')
            j = j+1
            #judgement_on_speaker = False
            #print(f'judgement on speaker {judgement_on_speaker}')
    else:      
        prompt = spkr[i]
        print(f"Speaker turn: {prompt}")
        if 'abortsequence' in prompt:
            break
        good_speaker = False    
        
    
    
    speaker_utterances.append(prompt) #We append the successful utterance to the record
    good_listener = False
    #Keep listener hostage while they do not provide empathetic responses
    j = i
    while not good_listener:
        answer = lstnr[j]
        print(f"Listener turn: {answer}")
        if 'abortsequence' in answer:
            break
        good_listener = judge_exchange(pbc, flag_array, att_lst, speaker_utterances[i], answer,'listener')
        j = j+1
        #print(f"Speaker: {speaker_utterances[i]}")
        #print(f"Listener: {listener_utterances[i]}")     
    listener_utterances.append(answer)


print(speaker_utterances)
print(listener_utterances)


In [None]:
spkr = ["I cannot wait for the newest Pokemon game, it looks amazing to me!", "I see your point, but I still think it is fun",'abortsequence']
lstnr = ['pokemon is just ok',"Oh well it's not bad. I had fun with pokemon when I was 10", "Yeah it was to me, maybe I should try the new one",'abortsequence']

speaker_utterances = []
listener_utterances = []

conversation_end = False
#For a short conversation

j = 0
i = 0

while not conversation_end:    
    if j>0:
        good_speaker = False    
        while not good_speaker:
            prompt = spkr[j]
            print(f"Speaker turn: {prompt}")
            print('Speaker_empathy')
            good_speaker = judge_exchange(pbc, flag_array, att_lst, lstnr[i-1],prompt,'speaker')
            j = j+1
            if j + 1 >= len(spkr):
                conversation_end = True
            #judgement_on_speaker = False
            #print(f'judgement on speaker {judgement_on_speaker}')
    else:      
        prompt = spkr[i]
        print(f"Speaker turn: {prompt}")
        if 'abortsequence' in prompt:
            break
        good_speaker = False    
        if j + 1 >= len(spkr):
            conversation_end = True
        else:
            j += 1        
    speaker_utterances.append(prompt) #We append the successful utterance to the record
    good_listener = False
    #Keep listener hostage while they do not provide empathetic responses
    while not good_listener:
        answer = lstnr[i]
        print(f"Listener turn: {answer}")
        if 'abortsequence' in answer:
            conversation_end = True
            break
        good_listener = judge_exchange(pbc, flag_array, att_lst, spkr[j], answer,'listener')
        i = i+1
        #print(f"Speaker: {speaker_utterances[i]}")
        #print(f"Listener: {listener_utterances[i]}")     
    listener_utterances.append(answer)
    if i + 1 >= len(lstnr):
        conversation_end = True
    else:
        i += 1
    print(f'{i} {j}')
        


print(speaker_utterances)
print(listener_utterances)

## Conversation Loop

In [None]:
candidates = []
for i in database['prompt']:
    if(len(database[database['prompt'] == i]) >= 4 ):
        #print(database[database['prompt'] == i])
        candidates.append(i)
set(candidates)

In [None]:

print("First exchange")
judge_exchange(pbc,flag_array,att_lst,"i loved taking care of my sisters pet", "Huh, is that so", 'listener')
judge_exchange(pbc,flag_array,att_lst,"i loved taking care of my sisters pet", "It's cool that you loved that", 'listener')
print("Second exchange")
judge_exchange(pbc,flag_array,att_lst,"Yeah! I have loved animals since then especially dogs", "Dogs are very cute. Cats too", "listener")
print("Third exchange")
judge_exchange(pbc,flag_array,att_lst,"Oh my gosh yes. Cats are so fluffy and cuddly", "They are! I love petting them", "listener")
print("Fourth exchange")
judge_exchange(pbc,flag_array,att_lst,"You know i really enjoy having pets they bring a new life into an empty feeling house", "Yes I only have one cat. How about you?", "listener")
print("Fifth exchange")
judge_exchange(pbc,flag_array,att_lst,"we have a cat, a dog, a bunny, and a betta fish!", "Those are many pets", "listener")
judge_exchange(pbc,flag_array,att_lst,"we have a cat, a dog, a bunny, and a betta fish!", "Those are many pets, how do you manage?", "listener")
print("Six exchange")
judge_exchange(pbc,flag_array,att_lst,"It is a lot of work. But their little faces are so worth it", "Yes they are I bet you feel proud", "listener")


# Miscellaneous zone

Where we do all sorts of experimentation

### Checking pattern list that covers an exchange (values)

In [3]:
emerging_patterns = pbc.EmergingPatterns
emerging_patterns[0].Items[0].Value
dummy_dic = {}
att_lst = pbc.dataset.Attributes
values = [0.9398489, 0.057078857, 0.0030721908, 0.33885983, 0.64117646, 
    0.019963712, 0, 0, 2, -0.708, 0.656, -0.63, -0.98, 0.55, -0.51, 
    7, 5, 6.73560498398729E-05, 5.48729512956925E-05, 3.37222991220187E-05, 
    1.91891886061057E-05, 0.000120048694953, 0.0005292572896, 0.998958706855774, 
    0.000112920177344, 0.000103849401057, 1]

for i in range(len(att_lst)):
    new_data = {str(att_lst[i][0]): [values[i]]}
    dummy_dic.update(new_data)


print(dummy_dic)

exchange_df = pd.DataFrame.from_dict(dummy_dic)

pbc.predict(exchange_df)


pattern_list = [] #patterns that cover the exchange

for instance in exchange_df.to_numpy(): 
    for pattern in emerging_patterns:
        if pattern.IsMatch(instance):
            pattern_list.append(pattern)   
len(pattern_list)

{'s_negative': [0.9398489], 's_neutral': [0.057078857], 's_positive': [0.0030721908], 'l_negative': [0.33885983], 'l_neutral': [0.64117646], 'l_positive': [0.019963712], 'predictions_ER': [0], 'valence_speaker': [0], 'arousal_speaker': [2], 'dominance_speaker': [-0.708], 'valence_listener': [0.656], 'arousal_listener': [-0.63], 'dominance_listener': [-0.98], 's_word_len': [0.55], 'l_word_len': [-0.51], 'agreeing': [7], 'acknowledging': [5], 'encouraging': [6.73560498398729e-05], 'consoling': [5.48729512956925e-05], 'sympathizing': [3.37222991220187e-05], 'suggesting': [1.91891886061057e-05], 'questioning': [0.000120048694953], 'wishing': [0.0005292572896], 'neutral': [0.998958706855774], 'mimicry': [0.000112920177344]}


                                                                                

147

In [4]:
processed_exchange, y_pred  = predict_exchange_empathy(pbc, flag_array, 1, att_lst, 'At the time there was a friend that told me that i could not jump over him_comma_ then i jumped over him.', "Neato burrito")


NameError: name 'predict_exchange_empathy' is not defined

In [5]:
judge_exchange(pbc,flag_array,att_lst,'At the time there was a friend that told me that i could not jump over him_comma_ then i jumped over him.', "Neato burrito", 'listener')


NameError: name 'judge_exchange' is not defined

### load predictions on dataframe

In [7]:
from sklearn.model_selection import train_test_split

pred_pbc = pd.read_csv(current_dir + '/best_predictions.txt', header = None)
pred_pbc = pred_pbc.rename(columns = {0:'pred'})
pred_pbc['pred'] = pred_pbc['pred'].apply(lambda x: x + 1)
pred_pbc

database_dir = '/processed_databases/EmpatheticExchanges/EmpatheticExchanges.csv'

database = pd.read_csv(current_dir + database_dir)
X = database.drop(columns=['empathy'])
y = database['empathy']

test_db = pd.read_csv(current_dir + test_database_dir)
test_db

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42,stratify=y)
train_df = pd.concat([X_train, y_train], axis=1)
test_df = pd.concat([X_test, y_test], axis=1)
test_df = test_df.reset_index(drop = True)
test_df['pred'] = pred_pbc['pred']
test_df

Unnamed: 0,conv_id,context,prompt,speaker_utterance,listener_utterance,s_negative,s_neutral,s_positive,l_negative,l_neutral,...,encouraging,consoling,sympathizing,suggesting,questioning,wishing,neutral,mimicry,empathy,pred
0,hit:10889_conv:21779,jealous,I saw my neighbor bought the car I have always...,I know_comma_ it would look perfect in front o...,Time goes by so fast. You will see.,0.026005,0.146168,0.827828,0.273078,0.536051,...,0.000544,0.000172,0.000325,0.000067,0.000040,0.007855,0.000388,1,3,1
1,hit:1916_conv:3833,hopeful,when you expect more you have been disapponted...,yeah good. whats your name?,I am not sure I feel comfortable telling you a...,0.005329,0.183143,0.811529,0.453844,0.520900,...,0.000467,0.145248,0.003519,0.002405,0.002545,0.000153,0.725658,1,1,2
2,hit:7038_conv:14076,impressed,My coworker did this presentation at work that...,I was really proud of my coworker and their pr...,That is nice. It is good to be supportive,0.001078,0.007549,0.991373,0.002196,0.014970,...,0.000377,0.000236,0.000155,0.000037,0.000073,0.003338,0.000069,1,3,3
3,hit:10237_conv:20475,devastated,One time my dog got run over by a car. He had ...,Yeah. He did not survive. I really miss him,Hope everything gets better soon,0.921586,0.069954,0.008460,0.004004,0.086787,...,0.002373,0.995767,0.000493,0.000107,0.000336,0.000064,0.000093,0,1,2
4,hit:5988_conv:11976,prepared,I have been working all week on my project. To...,Hi_comma_ I have a big business presentation t...,Are you fully prepared for it?,0.002434,0.235530,0.762036,0.139080,0.810800,...,0.000093,0.000012,0.000063,0.000195,0.998911,0.000307,0.000167,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3298,hit:9741_conv:19482,confident,I had to give a speech in front of a few of my...,I absolutely hate public speaking. I did alrig...,I bet! Well it over now and I am sure you did ...,0.949689,0.044111,0.006201,0.002755,0.031774,...,0.000148,0.000034,0.000142,0.000117,0.000241,0.000037,0.990227,0,3,3
3299,hit:3981_conv:7962,jealous,My friend bought a new car.,An old beat up car that I need to replace but ...,I drive a gianormous conversion van,0.949371,0.045157,0.005472,0.222195,0.711267,...,0.000318,0.002807,0.000196,0.000047,0.000063,0.001068,0.001629,1,1,1
3300,hit:2853_conv:5707,anxious,I was nervous when I had to go to jury duty. I...,No_comma_ I had no idea what to expect,I have never served on jury duty either. The c...,0.560171,0.411278,0.028551,0.427809,0.534693,...,0.000381,0.001946,0.000405,0.000127,0.000545,0.000121,0.860535,0,1,3
3301,hit:8397_conv:16795,terrified,Running up the stairs in the dark when I was a...,Yeah_comma_ so I would book it up the stairs a...,I am sure I would too. Just be careful,0.027067,0.625647,0.347286,0.148689,0.703990,...,0.000161,0.000254,0.000065,0.000031,0.000078,0.001583,0.001291,1,2,2


## measuring performance metrics

In [50]:
import CEM as cem
from sklearn.metrics import accuracy_score

features = test_df.columns
features2keep = ['conv_id', 'context', 'prompt', 'speaker_utterance','listener_utterance','valence_listener','arousal_listener','empathy']

features2delete = list(set(features) - set(features2keep))

test_df = test_df.drop(columns = features2delete)


test_df['pred'] = pred_pbc['pred']

#x_test = test_df.drop(columns=['empathy'])
#y_test = test_df.copy()
#y_test = y_test.drop(columns=x_test.columns)


#ClosenessEvaluationMeasure = cem.get_cem(test_df['pred'].apply(lambda x: x - 1),y_test)



test_df['empathy_red'] = test_df.apply(lambda x: 2 if (x['empathy'] == 3 or x['empathy'] == 2)  else 1, axis = 1)
test_df['pred_red'] = test_df.apply(lambda x: 1 if (x['pred'] == 3 or x['pred'] == 2)  else 0, axis = 1)
test_df_2 = test_df.drop(columns=['empathy'])
test_df_2 = test_df_2.drop(columns=['pred'])


test_df_2 = test_df_2.rename(columns={"empathy_red": "empathy"})
test_df_2 = test_df_2.rename(columns={"pred_red": "pred"})
feature_columns = test_df_2.drop(columns=['empathy']).columns
#print(test_df_2.head())
#correct_pred_only = test_df[test_df['empathy'] ==  test_df['pred']]
#correct_pred_only_low = correct_pred_only[correct_pred_only['empathy'] ==  1]

#no_valence = test_df[test_df['valence_listener'] == 0]

#no_valence = test_df[test_df['valence_listener'] > -0.1]
#no_valence = no_valence[no_valence['valence_listener'] < 0.1]
#no_valence = no_valence[no_valence['empathy'] != no_valence['pred']]


#no_valence.to_csv('no_valence_wrong_label_examples.csv')
#correct_pred_only.to_csv('correctly_predicted_exchanges.csv')
#correct_pred_only_low.to_csv('correctly_predicted_exchanges_low.csv')

#correct_pred_only_low['context'].describe()

acc = accuracy_score(test_df['empathy_red'], test_df['pred_red'])
y_pred = test_df_2['pred']
print(y_pred)
y_true = test_df_2.drop(columns = feature_columns)
print(y_true)
ClosenessEvaluationMeasure = cem.get_cem(y_pred,y_true)
test_df

test_df_2
ClosenessEvaluationMeasure
ClosenessEvaluationMeasure

0       0
1       1
2       1
3       1
4       0
       ..
3298    1
3299    0
3300    1
3301    1
3302    1
Name: pred, Length: 3303, dtype: int64
      empathy
0           2
1           1
2           2
3           1
4           2
...       ...
3298        2
3299        1
3300        1
3301        2
3302        2

[3303 rows x 1 columns]


0.7025794952465386

### get full conversations on the test set

In [4]:
from sklearn.model_selection import train_test_split

pred_pbc = pd.read_csv(current_dir + '/best_predictions.txt', header = None)
pred_pbc = pred_pbc.rename(columns = {0:'pred'})
pred_pbc['pred'] = pred_pbc['pred'].apply(lambda x: x + 1)
pred_pbc

database_dir = '/processed_databases/EmpatheticExchanges/EmpatheticExchanges.csv'

database = pd.read_csv(current_dir + database_dir)
X = database.drop(columns=['empathy'])
y = database['empathy']

test_db = pd.read_csv(current_dir + test_database_dir)
test_db

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42,stratify=y)
train_df = pd.concat([X_train, y_train], axis=1)
test_df = pd.concat([X_test, y_test], axis=1)
test_df = test_df.reset_index(drop = True)
test_df['pred'] = pred_pbc['pred']
print(len(test_df['conv_id'].unique()))
print(len(test_df['conv_id']))


ids = test_df["conv_id"]
duplicated_convos = test_df[ids.isin(ids[ids.duplicated()])].sort_values("conv_id")

duplicated_convos

v = duplicated_convos.conv_id.value_counts()
duplicated_convos[duplicated_convos.conv_id.isin(v.index[v.gt(2)])]
duplicated_convos

duplicated_convos = duplicated_convos[['conv_id', 'prompt', 'context', 'speaker_utterance', 'listener_utterance', 'empathy', 'pred']]


duplicated_convos.to_csv('./useful_database_subsets/convos_in_test_set.csv', index=False)



duplicated_convos = em_red.reduce_emotion_labels_to_8('context',duplicated_convos)
duplicated_convos.to_csv('./useful_database_subsets/convos_in_test_set_reduced_emotion.csv', index=False)



print(len(duplicated_convos['conv_id'].unique()))

2939
3303
355


### get emotionally balanced contexts from database

In [3]:
#get the indexes of the conversations already sampled
def get_index_list(approved_df,dataframe_list):
  index_list = []
  for convo in approved_df['prompt']:
    #print(convo)
    for  i in range(len(dataframe_list)):
      if dataframe_list[i].loc[dataframe_list[i]['prompt'] == convo].empty == False:
        index_list.append([i,dataframe_list[i].loc[dataframe_list[i]['prompt'] == convo].index])
            #index_list.append([frame.index,frame.loc[frame['conv_id'] == convo].index])
  return index_list


#Function that deletes the conversations already sampled from the database
def remove_accepted_convos(index_list, base_df):
  for index in index_list:
    base_df = base_df.drop([index[1][0]])
  return base_df

In [4]:
database_dir = '/processed_databases/EmpatheticExchanges/EmpatheticExchanges.csv'

database = pd.read_csv(current_dir + database_dir)
database
#starting_exchange_db = database[database['exchange_number'] == 1]


print(len(database['conv_id'].unique()))
print(len(database))


print(len(database['prompt'].unique()))


7996
16512
7899


In [11]:
red_emo_prompt_df = em_red.reduce_emotion_labels_to_8('context',database)
red_emo_prompt_df
print(len(red_emo_prompt_df['context'].unique()))

red_emo_prompt_df

red_emo_prompt_df.to_csv('EmpatheticExchanges_prompts_reduced_emotion.csv',index=False)
red_emo_prompt_df

8


Unnamed: 0,conv_id,context,prompt,empathy,speaker_utterance,listener_utterance,s_negative,s_neutral,s_positive,l_negative,...,agreeing,acknowledging,encouraging,consoling,sympathizing,suggesting,questioning,wishing,neutral,mimicry
0,hit:9071_conv:18143,sadness,I was discussing phone bills with my friend_co...,4.0,I was discussing phone bills with my friend. ...,That must be nice,0.171257,0.757237,0.071505,0.005727,...,0.000341,0.997114,0.000295,0.000305,0.000102,0.000037,0.000071,0.001667,0.000067,1
1,hit:9071_conv:18143,sadness,I was discussing phone bills with my friend_co...,4.0,Yeah I know. Although I like to think I would...,Yeah_comma_ I think we all wish that was possible,0.545449,0.403581,0.050971,0.033245,...,0.000300,0.000081,0.000060,0.000068,0.000618,0.017769,0.003114,0.002616,0.975375,1
2,hit:1914_conv:3829,disgust,My daughter was sick the other day and I could...,5.0,My daughter was sick the other day and I could...,That is an unfortunate predicament to be in. W...,0.933043,0.060932,0.006025,0.885590,...,0.000067,0.000088,0.000019,0.000028,0.000526,0.001284,0.997194,0.000106,0.000688,0
3,hit:1914_conv:3829,disgust,My daughter was sick the other day and I could...,5.0,I sent her to daycare even though I probably s...,That is a tough decision for sure_comma_ but y...,0.548584,0.409580,0.041835,0.866907,...,0.001575,0.000145,0.000016,0.000323,0.000073,0.000069,0.000165,0.000060,0.997575,1
4,hit:7464_conv:14928,joy,My son has not always been a strong swimmer. ...,5.0,I just have to brag on my son. He has not alwa...,That is amazing hard work pays off and I think...,0.001463,0.017958,0.980578,0.002007,...,0.562864,0.410589,0.000710,0.000386,0.000183,0.000219,0.000381,0.000785,0.023885,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16507,hit:2945_conv:5891,disgust,I say every year I'm going to wear a two peice...,4.0,One day.,Just keep working at it.,0.063661,0.603897,0.332442,0.029027,...,0.000134,0.002473,0.000020,0.000043,0.000051,0.000137,0.000528,0.000090,0.996525,1
16508,hit:6349_conv:12698,sadness,a younger guy i used to work with passed away ...,3.0,a guy i used to work with passed away this wee...,Oh Im so sorry to hear that,0.550541,0.412104,0.037356,0.848501,...,0.000156,0.002233,0.000028,0.000041,0.996461,0.000085,0.000417,0.000498,0.000080,1
16509,hit:6349_conv:12698,sadness,a younger guy i used to work with passed away ...,3.0,yeah i feel so bad for his family,Were you close?,0.951179,0.044386,0.004436,0.100740,...,0.000058,0.000068,0.000028,0.000019,0.000200,0.001557,0.997412,0.000184,0.000473,0
16510,hit:4042_conv:8084,joy,The first time I met my long distance girlfrie...,4.0,I have been seeing someone on line for over 2 ...,That is awesome! How did it go?,0.045149,0.473167,0.481684,0.001490,...,0.000065,0.000501,0.000021,0.000013,0.000159,0.000157,0.998701,0.000127,0.000255,0


In [7]:
joy_df = red_emo_prompt_df[red_emo_prompt_df['context']== 'joy']
anger_df = red_emo_prompt_df[red_emo_prompt_df['context']== 'anger']
disgust_df = red_emo_prompt_df[red_emo_prompt_df['context']== 'disgust']
fear_df = red_emo_prompt_df[red_emo_prompt_df['context']== 'fear']
trust_df = red_emo_prompt_df[red_emo_prompt_df['context']== 'trust']
surprise_df = red_emo_prompt_df[red_emo_prompt_df['context']== 'surprise']
sadness_df = red_emo_prompt_df[red_emo_prompt_df['context']== 'sadness']
anticipation_df = red_emo_prompt_df[red_emo_prompt_df['context']== 'anticipation']

emo_df_lst = [joy_df,anger_df,disgust_df,fear_df,trust_df,surprise_df,sadness_df,anticipation_df]

def get_sampled_dataframe(df_lst, extra_emotion):
    emo_to_num = {'joy': 0, 'anger': 1, 'disgust': 2, 'fear': 3,'trust': 4,'surprise': 5,'sadness': 6,'anticipation': 7}
    extra_index = emo_to_num[extra_emotion]
    accepted_flag = False
    dataframe_samples = []
    for df in df_lst:
        dataframe_samples.append(df.sample(n=1))
    dataframe_samples.append(df_lst[extra_index].sample(n=1))
    #Join samples in single dataframe
    prepared_dataframe = dataframe_samples[0]
    for i in range(1,len(dataframe_samples)):
        prepared_dataframe = pd.concat([prepared_dataframe, dataframe_samples[i]])
    prepared_dataframe.reset_index(drop=True, inplace=True)
    prepared_dataframe['context'].describe()
    return prepared_dataframe

sampled_dataframe = get_sampled_dataframe(emo_df_lst,'fear')
speaker_prompts = sampled_dataframe.drop(columns = set(red_emo_prompt_df.columns) - set(['prompt', 'context']))




In [8]:
#Output isolated emotion dataframes
emo_to_num = {'joy': 0, 'anger': 1, 'disgust': 2, 'fear': 3,'trust': 4,'surprise': 5,'sadness': 6,'anticipation': 7}
num_to_emo = {v: k for k, v in emo_to_num.items()}

for i in range(len(emo_df_lst)):
    emo_df_lst[i].to_csv('./useful_database_subsets/emotional_contexts/'+str(num_to_emo[i])+'_isolated_df.csv', index=None)


In [9]:

for i in range(len(speaker_prompts['prompt'])):
    print(f"context: {speaker_prompts.loc[i,'context']}, prompt: {speaker_prompts.loc[i,'prompt']}")
    print()


context: joy, prompt: A month back_comma_ I saved my drowning dog. I felt pleased with myself

context: anger, prompt: i am bothered by my neighbors loud power tools next door

context: disgust, prompt: i lied to my mother today 

context: fear, prompt: I had to go to the doctor recently to get something checked out_comma_ I was definitely nervous about the outcome.

context: trust, prompt: I lent my car to my 17-year-old sister today. She left at 10 in the morning and it's midnight_comma_ and she still isn't back with it. I trust her mostly_comma_ but she hasn't been driving for long_comma_ and it makes me nervous thinking of her out there on the road.

context: surprise, prompt: I can't believe there are so any good shows on television

context: sadness, prompt: I cry every time I think of my grandmother.

context: anticipation, prompt: MY kids are so different from each other. I think they are all going to do great things

context: fear, prompt: I'm scared a tornado will come in and

In [10]:
sadness_df.sample(n=1).index
sadness_df.loc[sadness_df.sample(n=1).index[0], 'prompt']

'i was sad when i couldnt go home on time'

In [316]:
speaker_prompts.to_csv('/useful_database_subsets/prompts_20.csv', index=None)
idxlst = get_index_list(speaker_prompts, emo_df_lst)
red_emo_prompt_df = remove_accepted_convos(idxlst,red_emo_prompt_df)


In [242]:
accepted_prompts = pd.DataFrame(columns=['context', 'prompt'])
for i in range(13):
    prompt_sample = pd.read_csv(current_dir + '/prompts_'+str(i+1)+'.csv')
    accepted_prompts = pd.concat([accepted_prompts, prompt_sample])

idxlst = get_index_list(accepted_prompts, emo_df_lst)

print(idxlst)

print(len(accepted_prompts))

#remove_accepted_convos()

print(len(red_emo_prompt_df))

red_emo_prompt_df = remove_accepted_convos(idxlst,red_emo_prompt_df)

print(len(red_emo_prompt_df))


[[0, Index([15879], dtype='int64')], [1, Index([6512], dtype='int64')], [2, Index([5743], dtype='int64')], [3, Index([11351], dtype='int64')], [4, Index([575, 9767], dtype='int64')], [5, Index([3812], dtype='int64')], [6, Index([2567], dtype='int64')], [7, Index([10674], dtype='int64')], [0, Index([8297], dtype='int64')], [0, Index([5853], dtype='int64')], [1, Index([13516], dtype='int64')], [2, Index([5399], dtype='int64')], [3, Index([7434], dtype='int64')], [4, Index([13992], dtype='int64')], [5, Index([15122], dtype='int64')], [6, Index([5228], dtype='int64')], [7, Index([1004], dtype='int64')], [0, Index([15032], dtype='int64')], [0, Index([7275], dtype='int64')], [1, Index([11825], dtype='int64')], [2, Index([7628], dtype='int64')], [3, Index([3736], dtype='int64')], [4, Index([4592], dtype='int64')], [5, Index([1058], dtype='int64')], [6, Index([13485], dtype='int64')], [7, Index([2689], dtype='int64')], [0, Index([4413], dtype='int64')], [0, Index([14087], dtype='int64')], [1, 

KeyError: '[15879] not found in axis'

### Check the human accuracy

In [37]:
'''
import CEM as cem
from sklearn.metrics import accuracy_score

human_df = pd.read_excel('group_1_final_ev.xlsx.ods', engine = 'odf')
human_df

person = "Reviewer #1"


acc = accuracy_score(human_df['ROUND'], human_df[person])

print(acc)


human_df['ROUND_BINARY'] = human_df.apply(lambda x: 1 if (x['ROUND'] == 2 or x['ROUND'] == 1)  else 2, axis = 1)
human_df['ROUND_BINARY'] = human_df.apply(lambda x: 1 if (x['ROUND'] == 2 or x['ROUND'] == 1)  else 2, axis = 1)

human_df[person + '_BINARY'] = human_df.apply(lambda x: 1 if (x[person] == 2 or x[person] == 1)  else 2, axis = 1)

acc = accuracy_score(human_df['ROUND_BINARY'], human_df[person + '_BINARY'])
print(acc)
human_df

human_df.to_csv('human_binary.csv', index = False)
'''

0.19
0.87


### Turn a dataset to binary

In [39]:
database_dir = '/processed_databases/EmpatheticExchanges/EmpatheticExchanges_all_no_emo.csv'
test_database_dir = '/processed_databases/EmpatheticExchanges/test.csv'

train_database_dir = '/processed_databases/EmpatheticExchanges/'
trainFile = current_dir + train_database_dir + 'EmpatheticExchanges_train.csv'
testFile = current_dir + train_database_dir + 'EmpatheticExchanges_test.csv'
df_train = pd.read_csv(trainFile)
df_test = pd.read_csv(testFile)


df_train['empathy_red'] = df_train.apply(lambda x: 2 if (x['empathy'] == 3 or x['empathy'] == 2)  else 1, axis = 1)
df_train = df_train.drop(columns=['empathy'])
df_train = df_train.rename(columns={"empathy_red": "empathy"})

df_test['empathy_red'] = df_test.apply(lambda x: 2 if (x['empathy'] == 3 or x['empathy'] == 2)  else 1, axis = 1)
df_test = df_test.drop(columns=['empathy'])
df_test = df_test.rename(columns={"empathy_red": "empathy"})

df_test.to_csv(current_dir + train_database_dir + 'EmpatheticExchanges_test_binary.csv', index = False)
df_train.to_csv(current_dir + train_database_dir + 'EmpatheticExchanges_train_binary.csv', index = False)


[0.40987205693088047, 0.3907941555000378, 0.1993337875690817]

# Get most influential feature

In [25]:
def find_relevant_feature_per_class_count(pattern_array, count_array, label,attribute_lst, desired_features): 
    #print(f'Exchange classified as {label}')

    label -= 1

    #print(f'Exchange classified as {label}')

    #get the number of patterns that apply to this instance
    counts = count_array[:,int(label)]
    #get the patterns that cover this instance
    patterns = pattern_array[counts.astype(np.bool_)]
    #Slice the patterns into a list of item objects
    pattern_items = [pattern.Items for pattern in patterns]

    #check if they are the same
    #print(f"The number of patterns that cover this class is: {len(patterns)}")
    #print(f"The number of pattern item lists for this class is: {len(pattern_items)}")

    pattern_features = []
    for item_list in pattern_items:
        single_pat_features = [item.Feature[0] for item in item_list]
        #print(len(pat_features))
        #print(len(set(pat_features)))
        pattern_features.append(set(single_pat_features))


    feature_count = [0] * len(attribute_lst)
    feature_count_supports = [0] * len(attribute_lst)



    for idx in range(len(pattern_features)):
        #print(pattern_features)
        for i in range(len(attribute_lst)):
            if attribute_lst[i] in pattern_features[idx]:
                feature_count[i] += 1
                feature_count_supports[i] += 1*patterns[idx].Supports[label]



    #print(feature_count)
    #print(feature_count_supports)

    sorted_lst = sorted([(val, idx) for (idx, val) in enumerate(feature_count)])
    #print(sorted_lst)
    #print(attribute_lst[sorted_lst[-1][1]])


    #print('Most relevant features per pattern count')

    feature_array = []


    feature_array_speaker = []
    feature_array_listener = []

    for feature_num in range(1,desired_features+1):
        #print(attribute_lst[sorted_lst[-feature_num][1]])
        feature_array.append(attribute_lst[sorted_lst[-feature_num][1]])

    #print('Most relevant features related to speaker')

    for feature_num in range(1,len(sorted_lst)):
        if len(feature_array_speaker) < desired_features: 
            candidate_attribute = attribute_lst[sorted_lst[-feature_num][1]]
            if candidate_attribute[:2] == 's_' or 'speaker' in str(candidate_attribute):
                #print(attribute_lst[sorted_lst[-feature_num][1]])
                feature_array_speaker.append(attribute_lst[sorted_lst[-feature_num][1]])
        else:
            break

    #print('Most relevant features related to listener')

    for feature_num in range(1,len(sorted_lst)):
        if len(feature_array_listener) < desired_features: 
            candidate_attribute = attribute_lst[sorted_lst[-feature_num][1]]
            if not (candidate_attribute[:2] == 's_' or 'speaker' in str(candidate_attribute)):
                #print(attribute_lst[sorted_lst[-feature_num][1]])
                feature_array_listener.append(attribute_lst[sorted_lst[-feature_num][1]])
        else:
            break




    return feature_array, feature_array_listener, feature_array_speaker

def find_relevant_feature_per_class_support(pattern_array, count_array, label,attribute_lst,desired_features): 
    #print(f'Exchange classified as {label}')

    label -= 1

    #get the number of patterns that apply to this instance
    counts = count_array[:,int(label)]
    #get the patterns that cover this instance
    patterns = pattern_array[counts.astype(np.bool_)]
    #Slice the patterns into a list of item objects
    pattern_items = [pattern.Items for pattern in patterns]
    
    #for i in range(5):
    #    print(patterns[i].Supports)

    #check if they are the same

    pattern_features = []
    for item_list in pattern_items:
        single_pat_features = [item.Feature[0] for item in item_list]
        #print(len(pat_features))
        #print(len(set(pat_features)))
        pattern_features.append(set(single_pat_features))


    feature_count = [0] * len(attribute_lst)
    feature_count_supports = [0] * len(attribute_lst)


    for idx in range(len(pattern_features)):
        #print(pattern_features)
        for i in range(len(attribute_lst)):
            if attribute_lst[i] in pattern_features[idx]:
                feature_count[i] += 1
                feature_count_supports[i] += 1*patterns[idx].Supports[label]


    sorted_lst = sorted([(val, idx) for (idx, val) in enumerate(feature_count_supports)])

    #print('Most relevant features per pattern support')

    feature_array = []
    feature_array_speaker = []
    feature_array_listener = []
    for feature_num in range(1,desired_features+1):
        #print(attribute_lst[sorted_lst[-feature_num][1]])
        feature_array.append(attribute_lst[sorted_lst[-feature_num][1]])

    #print('Most relevant features related to speaker')

    for feature_num in range(1,len(sorted_lst)):
        if len(feature_array_speaker) < desired_features: 
            candidate_attribute = attribute_lst[sorted_lst[-feature_num][1]]
            if candidate_attribute[:2] == 's_' or 'speaker' in str(candidate_attribute):
                #print(attribute_lst[sorted_lst[-feature_num][1]])
                feature_array_speaker.append(attribute_lst[sorted_lst[-feature_num][1]])
        else:
            break

    #print('Most relevant features related to listener')

    for feature_num in range(1,len(sorted_lst)):
        if len(feature_array_listener) < desired_features: 
            candidate_attribute = attribute_lst[sorted_lst[-feature_num][1]]
            if not (candidate_attribute[:2] == 's_' or 'speaker' in str(candidate_attribute)):
                #print(attribute_lst[sorted_lst[-feature_num][1]])
                feature_array_listener.append(attribute_lst[sorted_lst[-feature_num][1]])
        else:
            break



    return feature_array, feature_array_listener, feature_array_speaker


def print_feature_with_values(exchange_df, feature_array):
    for idx in range(len(exchange_df)):
        #print(idx)
        for item in feature_array:
            print(f'{item} : {exchange_df.loc[idx,item]:.2f}')
    return 0

def get_feature_with_values(exchange_df, feature_array):
    values = []
    for idx in range(len(exchange_df)):
        #print(idx)
        for item in feature_array:
            values.append((item,exchange_df.loc[idx,item]))
            #print(f'{item} : {exchange_df.loc[idx,item]:.2f}')
    return values

    
def get_most_relevant_features(classifier, exchange_df,criterion,attribute_lst, prediction):
    #get most influential patterns
    emerging_patterns = classifier.EmergingPatterns #access the patterns mined by the classifier
    pattern_list = [] #patterns that cover the exchange
    for instance in exchange_df.to_numpy(): 
        for pattern in emerging_patterns:
            if pattern.IsMatch(instance):
                pattern_list.append(pattern)   
                #print(type(instance))
    count_lst = [pattern.Counts for pattern in pattern_list]
    pattern_arr = np.array(pattern_list)
    #print(len(pattern_list))
    count_arr = np.array(count_lst)
    #print(len(count_arr))

    #print(pattern_list[:5])


    #print(f'Exchange classified as {prediction}')

    #ifs important features by pattern support

    if str(criterion) == 'support':
        ifs, ifs_l, ifs_s = find_relevant_feature_per_class_support(pattern_arr,count_arr,prediction,attribute_lst,4)
        return ifs, ifs_l, ifs_s 
    if str(criterion) == 'count':
        ifc, ifc_l, ifc_s = find_relevant_feature_per_class_count(pattern_arr,count_arr,prediction,attribute_lst,4)
        return ifc, ifc_l, ifc_s 
    else:
        print('Invalid criterion, please select "support" to address the support patterns have over instances and "count" for just counting the times a feature appears in the patterns')
        return [],[],[]


def get_most_relevant_feature(classifier, exchange_df,role,attribute_lst, prediction):
    #get most influential patterns
    emerging_patterns = classifier.EmergingPatterns #access the patterns mined by the classifier
    pattern_list = [] #patterns that cover the exchange
    for instance in exchange_df.to_numpy(): 
        for pattern in emerging_patterns:
            if pattern.IsMatch(instance):
                pattern_list.append(pattern)   
                #print(type(instance))
    count_lst = [pattern.Counts for pattern in pattern_list]
    pattern_arr = np.array(pattern_list)
    #print(len(pattern_list))
    count_arr = np.array(count_lst)
    #print(len(count_arr))

    #print(pattern_list[:5])

    #print(f'Exchange classified as {prediction}')

    #ifs important features by pattern support
    ifs, ifs_l, ifs_s = find_relevant_feature_per_class_support(pattern_arr,count_arr,prediction,attribute_lst,4)

    print()

    #ifc important features by pattern count (how many times they appear in the patterns that cover the instance)
    ifc, ifc_l, ifc_s = find_relevant_feature_per_class_count(pattern_arr,count_arr,prediction,attribute_lst,4)
 
    #print(exchange_df)
    print('Most relevant features per pattern support')
    print_feature_with_values(exchange_df,ifs)
    print('Most relevant support features related to listener')
    print_feature_with_values(exchange_df,ifs_l)
    print('Most relevant support features related to speaker')
    print_feature_with_values(exchange_df,ifs_s)



    return ifs, ifc




In [26]:
#processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,'Super sad today. It is the weekend and I have a hard time with loneliness on the weekends especially.', "I love the weekends actually",model_components)
#processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,"I was so mad earlier someone hit my car and just drove off!", "Aww man, that's not ideal Did you get the plates?",model_components)
#processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,'I hate when my wife and son are away from me', "Aww that is sweet You are a good dad",model_components)
#processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,'My little cousin  was nice and gave me a present!', "Aww cool! was the ocasion special?",model_components)
#processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,"Does it bother you when your friends have all dates and you're single? It makes me feel inadequate", "Yeah, it really sucks loneliness is no easy thing to go though",model_components)
#processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,"Does it bother you when your friends have all dates and you're single? It makes me feel inadequate", "Yeah, you are ",model_components)
#processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,"Does it bother you when your friends have all dates and you're single? It makes me feel inadequate", "Oh yeah, it bothers me a lot too! ",model_components)
processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,"I was so mad earlier someone hit my car and just drove off!", "Aww man, that's not ideal Did you get the plates?",model_components)
#processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,"I was so mad earlier someone hit my car and just drove off!", "That's what you get haha!",model_components)
#processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,"I was so mad earlier someone hit my car and just drove off!", "Was it a bad accident?",model_components)

#print(y_pred)
#print(processed_exchange)

influential,influential_l,influential_s = get_most_relevant_features(pbc, processed_exchange,'support',att_lst, y_pred)

print('Most relevant features per pattern support')
print_feature_with_values(processed_exchange,influential)
print(get_feature_with_values(processed_exchange,influential))
print('Most relevant support features related to listener')
print_feature_with_values(processed_exchange,influential_l)
print(get_feature_with_values(processed_exchange,influential_l))
print('Most relevant support features related to speaker')
print_feature_with_values(processed_exchange,influential_s)
print(get_feature_with_values(processed_exchange,influential_s))


print

                                                                                

Most relevant features per pattern support
l_word_len : 10.00
l_positive : 0.00
dominance_speaker : -0.03
s_word_len : 13.00
[('l_word_len', 10), ('l_positive', 0.0046051578), ('dominance_speaker', -0.03280000000000001), ('s_word_len', 13)]
Most relevant support features related to listener
l_word_len : 10.00
l_positive : 0.00
l_negative : 0.93
valence_listener : 0.42
[('l_word_len', 10), ('l_positive', 0.0046051578), ('l_negative', 0.9260325), ('valence_listener', 0.4166666666666667)]
Most relevant support features related to speaker
dominance_speaker : -0.03
s_word_len : 13.00
arousal_speaker : 0.29
s_negative : 0.96
[('dominance_speaker', -0.03280000000000001), ('s_word_len', 13), ('arousal_speaker', 0.2896), ('s_negative', 0.9551038)]


<function print(*args, sep=' ', end='\n', file=None, flush=False)>

In [41]:
persona = ['I was really nervous to move across country.','Knew no one where we were moving_comma_ and also far away from my mother_comma_ who is getting old.', 'oh sorry_comma_ we knew no one where we were moving to', 'Amazingly hehe. But here I am.'] 
personb = ['why were you?','no one knew that you were moving?','oh_comma_ that has to be scary', 'here you are_comma_ killing it ']
print('')

for i in range(4):
    print('', end = '')
    speaker_uttearance = persona[i]
    listener_uttearance = personb[i]
    processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,speaker_uttearance,listener_uttearance,model_components)
    #print(y_pred)
    print()
    influential,influential_l,influential_s = get_most_relevant_features(pbc, processed_exchange,'support',att_lst, y_pred)
    print('Most relevant features for classification')
    #print_feature_with_values(processed_exchange,influential)
    print(get_feature_with_values(processed_exchange,influential)[:3])
    print('Most relevant features from the listener')
    print(get_feature_with_values(processed_exchange,influential_l)[:3])
    print('', end = '\r')
    print()





                                                                                


Most relevant features for classification
[('l_word_len', 3), ('s_word_len', 8), ('arousal_speaker', 0.24066666666666667)]
Most relevant features from the listener
[('l_negative', 0.2412955), ('encouraging', 2.877417682611849e-05), ('questioning', 0.9979150891304016)]



                                                                                


Most relevant features for classification
[('l_word_len', 7), ('l_neutral', 0.5941971), ('s_word_len', 18)]
Most relevant features from the listener
[('l_neutral', 0.5941971), ('suggesting', 9.872257214738056e-05), ('l_negative', 0.375899)]



                                                                                


Most relevant features for classification
[('l_word_len', 6), ('s_word_len', 11), ('arousal_speaker', -0.11533333333333333)]
Most relevant features from the listener
[('l_neutral', 0.21651484), ('encouraging', 7.498222112189978e-05), ('neutral', 0.00021213227591942996)]



                                                                                


Most relevant features for classification
[('l_word_len', 5), ('sympathizing', 0.0006214195163920522), ('s_word_len', 6)]
Most relevant features from the listener
[('sympathizing', 0.0006214195163920522), ('l_neutral', 0.466616), ('encouraging', 0.0001263265876332298)]



## Testing changes in top features using new VA vectors

In [61]:
processed_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst,'I hate when my wife and son are away from me', "I get that you're feeling bad but do not let it get to you. I'm sure you'll be extra happy once they are here",model_components)

columns_of_processed_exchange = processed_exchange.columns

#print(columns_of_processed_exchange)

conversations = ['body', 'comic', 'faith', 'joy', 'lottery', 'manager', 'racoon', 'sister', 'morning', 'furiosa']

#conversations = ['body', 'comic', 'joy', 'lottery', 'racoon', 'sister', 'morning', 'furiosa']

video_weight = 0.2


#conversations =  ['faith']
empathy_truth = []
predictions_text = []
predictions_video = []


for i in range(len(conversations)):
    convo_idx = i
    convos_to_test = pd.read_csv(current_dir + '/useful_database_subsets/video_exchanges/convos_to_test.csv')

    #print(convos_to_test.columns)

    df_convo = convos_to_test[convos_to_test['conversation'] == conversations[convo_idx]].reset_index()

    #print(convos_to_test[convos_to_test['conversation'] == conversations[convo_idx]])


    string_arr = [
                [df_convo.loc[0,'speaker_utterance'],df_convo.loc[0,'listener_utterance']],
                [df_convo.loc[1,'speaker_utterance'],df_convo.loc[1,'listener_utterance']]
                ]



    exchanges_df = pd.DataFrame(columns=columns_of_processed_exchange)

    for ex in string_arr:
        #print(ex)
        single_exchange, y_pred  = exchange_processer.predict_exchange_empathy(pbc, flag_array, 1, att_lst, ex[0], ex[1],model_components)
        single_exchange['pred_text'] = y_pred
        influential,influential_l,influential_s = get_most_relevant_features(pbc, single_exchange,'support',att_lst, y_pred)     
        single_exchange['most_influential_before'] = str(get_feature_with_values(single_exchange,influential))
        single_exchange['most_influential_before_l'] = str(get_feature_with_values(single_exchange,influential_l))
        single_exchange['most_influential_before_s'] = str(get_feature_with_values(single_exchange,influential_s))
        exchanges_df = pd.concat([exchanges_df, single_exchange])
        #print(single_exchange)

    exchanges_df = exchanges_df.reset_index(drop= True)

    #print(exchanges_df)

    conversation = []
    video_av_values = pd.read_csv(current_dir + '/useful_database_subsets/video_exchanges/exchanges/' + 'exchanges_'+str(conversations[convo_idx])+'.csv')
    exchanges_df['valence_speaker'] = (1-video_weight)*exchanges_df['valence_speaker'] + (video_weight)*video_av_values['valence_right']
    exchanges_df['arousal_speaker'] = (1-video_weight)*exchanges_df['arousal_speaker'] + (video_weight)*video_av_values['arousal_right'] 
    exchanges_df['valence_listener'] = (1-video_weight)*exchanges_df['valence_listener'] + (video_weight)*video_av_values['valence_left']
    exchanges_df['arousal_listener'] = (1-video_weight)*exchanges_df['arousal_listener'] + (video_weight)*video_av_values['arousal_left'] 

    #mimicry
    exchanges_df['emotional_similarity'] = exchanges_df.apply(data_processer.get_cosine_similarity,axis = 1) 
    exchanges_df['mimicry'] = exchanges_df.apply(lambda x: 1 if x['emotional_similarity'] > 0.7 else 0, axis = 1)
    exchanges_df = exchanges_df.drop(columns = ['emotional_similarity'])


    exchanges_df['pred_video'] = pbc.predict(exchanges_df)
    exchanges_df['pred_video'] = exchanges_df['pred_video'] + 1
    exchanges_df['new_empathy_reduced'] = df_convo['new_empathy_reduced']
    exchanges_df['empathy'] = df_convo['empathy']

    for i in range(len(exchanges_df)):
        #print(columns_of_processed_exchange)
        single_exchange = exchanges_df.iloc[[i]].drop(columns=['most_influential_before','most_influential_before_l','most_influential_before_s']).reset_index()
        mia,mial,mias = get_most_relevant_features(pbc,single_exchange,'support',att_lst, y_pred)       
        
        exchanges_df.loc[i,'most_influential_after'] = str(get_feature_with_values(single_exchange,mia))
        exchanges_df.loc[i,'most_influential_after_l'] = str(get_feature_with_values(single_exchange,mial))
        exchanges_df.loc[i,'most_influential_after_s'] = str(get_feature_with_values(single_exchange,mias))

    exchanges_df.to_csv(current_dir + '/useful_database_subsets/video_exchanges/'+'exchange_predictions_'+str(conversations[convo_idx])+'.csv')

    

    empathy_truth.append(exchanges_df.loc[0,'new_empathy_reduced'])
    empathy_truth.append(exchanges_df.loc[1,'new_empathy_reduced'])
    predictions_text.append(exchanges_df.loc[0,'pred_text'])
    predictions_text.append(exchanges_df.loc[1,'pred_text'])
    predictions_video.append(exchanges_df.loc[0,'pred_video'])
    predictions_video.append(exchanges_df.loc[1,'pred_video'])

for i in range(len(predictions_text)):
    predictions_text[i] = predictions_text[i].astype(np.int64)

print(empathy_truth)
print(predictions_text)
print(predictions_video)

acc_text = accuracy_score(empathy_truth, predictions_text)
acc_video = accuracy_score(empathy_truth, predictions_video)

print(acc_text)
print(acc_video)

for i in range(len(empathy_truth)):
    if empathy_truth[i] != predictions_video[i] and empathy_truth[i] == predictions_text[i]:
        print(i+1)    

print()


                                                                          

[3, 3, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 3, 3, 1, 1, 2, 2, 3, 3]
[2, 3, 1, 2, 2, 3, 1, 2, 1, 3, 3, 1, 3, 2, 1, 2, 3, 2, 1, 3]
[3, 3, 1, 2, 3, 3, 1, 2, 1, 3, 3, 3, 3, 3, 1, 2, 3, 2, 1, 3]
0.5
0.6
12



## Get experiment order

In [42]:
import random
import math
import itertools
import pandas as pd



plutchik_emo_dic = {0:'anger', 1: 'disgust', 2: 'fear', 3: 'joy', 4: 'anticipation', 5: 'sadness', 6: 'surprise' , 7: 'trust'}
empathy_level_dic = {0: 'low', 1: 'medium', 2: 'high'}

# this only works for Iterable[Iterable]
def is_latin_rectangle(rows):
    valid = True
    for row in rows:
        if len(set(row)) < len(row):
            valid = False
    if valid and rows:
        for i, val in enumerate(rows[0]):
            col = [row[i] for row in rows]
            if len(set(col)) < len(col):
                valid = False
                break
    return valid

def is_latin_square(rows):
    return is_latin_rectangle(rows) and len(rows) == len(rows[0])

# : prepare the input
n = 8
items = list(range(1, n + 1))
# shuffle items
random.shuffle(items)
# number of permutations



def latin_square(items, shuffle=True):
    result = []
    for elems in itertools.permutations(items):
        valid = True
        for i, elem in enumerate(elems):
            orthogonals = [x[i] for x in result] + [elem]
            if len(set(orthogonals)) < len(orthogonals):
                valid = False
                break
        if valid:
            result.append(elems)
    if shuffle:
        random.shuffle(result)
    return result

ltn_sq_1 = latin_square(items)
for row in ltn_sq_1:
    print(row)
print(is_latin_square(ltn_sq_1))

type(ltn_sq_1)

experiment_squares = []

#print(experiment_squares[0][1])

for i in range(n):
    experiment_squares.append(list(ltn_sq_1[i]))

for i in range(n):
    for j in range(n):
        experiment_squares[i][j] = (plutchik_emo_dic[experiment_squares[i][j]-1], random.choice(['low','medium','high']))


for row in experiment_squares:
    print(row)


#for item in experiment_squares: 
#    print(item)

latin_sqr_df = pd.DataFrame(experiment_squares).T


print(latin_sqr_df)

number_of_square = 2

latin_sqr_df.to_csv('./useful_database_subsets/experiment_square_'+str(number_of_square)+'.csv')

#random.choice(['low','medium','high'])

(6, 4, 7, 8, 1, 5, 2, 3)
(7, 8, 6, 4, 2, 3, 1, 5)
(3, 2, 5, 1, 8, 7, 4, 6)
(1, 5, 2, 3, 6, 4, 7, 8)
(5, 1, 3, 2, 4, 6, 8, 7)
(2, 3, 1, 5, 7, 8, 6, 4)
(4, 6, 8, 7, 5, 1, 3, 2)
(8, 7, 4, 6, 3, 2, 5, 1)
True
[('sadness', 'low'), ('joy', 'high'), ('surprise', 'medium'), ('trust', 'high'), ('anger', 'low'), ('anticipation', 'low'), ('disgust', 'high'), ('fear', 'high')]
[('surprise', 'high'), ('trust', 'low'), ('sadness', 'high'), ('joy', 'high'), ('disgust', 'medium'), ('fear', 'high'), ('anger', 'high'), ('anticipation', 'high')]
[('fear', 'high'), ('disgust', 'high'), ('anticipation', 'medium'), ('anger', 'high'), ('trust', 'medium'), ('surprise', 'medium'), ('joy', 'low'), ('sadness', 'medium')]
[('anger', 'high'), ('anticipation', 'medium'), ('disgust', 'low'), ('fear', 'medium'), ('sadness', 'medium'), ('joy', 'high'), ('surprise', 'high'), ('trust', 'medium')]
[('anticipation', 'low'), ('anger', 'high'), ('fear', 'high'), ('disgust', 'medium'), ('joy', 'high'), ('sadness', 'high'), (