# In this file we calculated  `user_sentiment` and `bot_sentiment` columns

In [1]:
import pandas as pd
import numpy as np

In [2]:
explore_turn = pd.read_csv("explore_turn_common.csv")

In [3]:
explore_turn.columns

Index(['Unnamed: 0', 'utterance_id', 'turn_id', 'chat_id', 'datetime',
       'user_utterance', 'valence', 'label', 'sublabel', 'bot_action',
       'bot_datetime', 'bot_utterance', 'user_rating',
       'user_rating_explanation', 'valence_new', 'label_new', 'sublabel_new',
       'user_utterance_english', 'bot_utterance_english',
       'Contained technical issue', 'user_rating_english',
       'user_rating_explanation_english', 'user_sentiment', 'bot_sentiment'],
      dtype='object')

####
0: Very Negative

1: Negative

2: Neutral

3: Positive

4: Very Positive

# Stanford CoreNLP

In [4]:
import requests
import json
from tqdm import tqdm


tqdm.pandas()

# Stanford CoreNLP Server URL
corenlp_url = "http://localhost:9000"

# Sentiment analysis function (returns an average sentiment score from 0 to 4)
def get_sentiment(text):
    if pd.isna(text) or text.strip() == "":
        return None
    try:
        params = {
            'annotators': 'sentiment',
            'outputFormat': 'json'
        }
        response = requests.post(corenlp_url, params=params, data=text.encode('utf-8'))
        result = response.json()
          # Extract sentiment score for each sentence, then return the average
        sentiments = [int(sent['sentimentValue']) for sent in result['sentences']]
        return sum(sentiments) / len(sentiments)
    except Exception as e:
        print(f"Error processing: {text[:30]}... => {e}")
        return None

# Apply sentiment analysis to user utterances
explore_turn['user_sentiment'] = explore_turn['user_utterance_english'].progress_apply(get_sentiment)

# Apply sentiment analysis to bot utterances
explore_turn['bot_sentiment'] = explore_turn['bot_utterance_english'].progress_apply(get_sentiment)


100%|██████████████████████████████████████████████████████████████████████████████| 2011/2011 [01:37<00:00, 20.70it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 2011/2011 [04:56<00:00,  6.77it/s]


In [5]:
explore_turn.head(5)

Unnamed: 0.1,Unnamed: 0,utterance_id,turn_id,chat_id,datetime,user_utterance,valence,label,sublabel,bot_action,...,valence_new,label_new,sublabel_new,user_utterance_english,bot_utterance_english,Contained technical issue,user_rating_english,user_rating_explanation_english,user_sentiment,bot_sentiment
0,0,1,1,1.0,2023-08-07 15:42:33.034771,start,Follow/Neutral,,,,...,Follow/Neutral,,,Start,"Hi! I'm MIcha, your motivational chatbot. My g...",1.0,,,2.0,1.833333
1,1,2,2,1.0,2023-08-07 15:43:07.304097,Ich möchte weniger prokrastinieren,change,Reason,desire,,...,change,Reason,desire,I want to procrastinate less,So you want to procrastinate less. Can you tel...,1.0,,,2.0,1.5
2,2,3,3,1.0,2023-08-07 15:43:48.284858,Ich könnte schon viel weiter im Leben sein.,change,Reason,General Reason,,...,change,Reason,General Reason,I could be a lot further in my life.,How important is this change on a scale from 0...,1.0,,,3.0,2.0
3,3,4,4,1.0,2023-08-07 15:44:04.602837,,Follow/Neutral,,,,...,Follow/Neutral,,,I don't know.,"On a scale from 0 (not at all) to 10 (very), h...",1.0,,,2.0,1.0
4,4,5,5,1.0,2023-08-07 15:44:20.584669,8,Follow/Neutral,,,,...,Follow/Neutral,,,8,An 8. So you're positive about this change ove...,1.0,,,2.0,2.0


In [6]:
explore_turn.to_csv('explore_turn_common.csv', index=False)

In [8]:
explore_turn.isnull().sum()

Unnamed: 0                            0
utterance_id                          0
turn_id                               0
chat_id                               0
datetime                              0
user_utterance                        9
valence                               0
label                              1036
sublabel                           1245
bot_action                         1091
bot_datetime                         24
bot_utterance                        21
user_rating                         856
user_rating_explanation            1250
valence_new                           0
label_new                          1036
sublabel_new                       1245
user_utterance_english                0
bot_utterance_english                 0
Contained technical issue           150
user_rating_english                 856
user_rating_explanation_english    1250
user_sentiment                        0
bot_sentiment                         0
dtype: int64