In [1]:
#Files needed for this code to work:
#1. emowoz-multiwoz.json from https://zenodo.org/record/6506504#.ZFF4gc7P3b1
#2. README.md file from https://zenodo.org/record/6506504#.ZFF4gc7P3b1

#Both files are also available in the repo https://github.com/FinnHet13/CodingProjects/tree/main/sentiment_analysis_bachelor_thesis with "README.md" being named "EmoWoz_README.md".

# Step 1: Import required packages

In [2]:
import json
import pandas as pd
import numpy as np
import os
import pprint

# Step 2: Import EmoWoz json file

In [3]:
with open('emowoz-multiwoz.json', 'r') as f:
    emowoz_database = json.load(f)

#looking at the structure and first few elements of the EmoWoz dataset
pprint.pprint(list(emowoz_database.items())[:5])

[('PMUL2335.json',
  {'log': [{'dialog_act': {'Restaurant-Inform': [['Food', 'european']]},
            'emotion': [{'annotation': 0, 'annotator': '68bd033a'},
                        {'annotation': 0, 'annotator': '13de8dba'},
                        {'annotation': 0, 'annotator': 'c522c02d'},
                        {'emotion': 0, 'sentiment': 0}],
            'span_info': [['Restaurant-Inform', 'Food', 'european', 8, 8]],
            'text': 'How are you doing? Are there any European restaurants in '
                    'the city center?'},
           {'dialog_act': {'Restaurant-Inform': [['Choice', '8']],
                           'Restaurant-Request': [['Price', '?']]},
            'emotion': [],
            'span_info': [['Restaurant-Inform', 'Choice', '8', 4, 4]],
            'text': 'Yes, there are 8. What is your price range?'},
           {'dialog_act': {'Restaurant-Inform': [['Price', 'cheap']]},
            'emotion': [{'annotation': 0, 'annotator': '68bd033a'},
          

In [4]:
#the dataset contains many json files, each representing one conversation between a call agent and a caller. Within each conversation are the different speaker turns, each containing the following elements:
#dialog_act: the dialogue act of the speaker turn, containing a description of the domain of the conversation and intention of the speaker (ex. Restaurant-Inform), not relevant for the analysis
#**emotion**: the emotion of the speaker turn (Ex. 0) - 0: neutral, 1: fearful, sad, disappointed, 2: dissatisfied, disliking, 3: apologetic, 4: abusive, 5: excited, happy, anticipating, 6: satisfied, liking
#**sentiment**: the sentiment of the speaker turn - 0: neutral, 1: negative, 2: positive
#span_info: more info about the speaker turn
#**text**: the transcribed text of the speaker
#The elements enclosed in double asterisks ** are the ones that will be used for the sentiment analysis

# Step 3: Make a list of all the dialogues from the EmoWoz database with associated sentiment and emotion labels

In [5]:
from pprint import pprint

#Extracting all the lines of dialogues spoken by the user together with their sentiment label and emotion label
sentiment_list = []
for item in emowoz_database:
    for second_level in emowoz_database[item]:
        #There are only sentiment labels for user sentences in EmoWoz; there are no sentiment labels for agent dialogue. Only 
        #take every second dialogue from EmoWoz because all dialogues alternate between user and agent and user always starts
        #dialogue. Picked sufficiently large index number of each dialogue with 100 so that all dialogues are definitely included.
        for third_level in emowoz_database[item][second_level][::2]:
            sentiment_list.append({"text": third_level["text"], "sentiment": third_level["emotion"][3]["sentiment"], "emotion": third_level["emotion"][3]["emotion"]})
# Print the first 10 items in sentiment_list in a more readable format
pprint(sentiment_list[:10], width=80)

[{'emotion': 0,
  'sentiment': 0,
  'text': 'How are you doing? Are there any European restaurants in the city '
          'center?'},
 {'emotion': 0,
  'sentiment': 0,
  'text': 'Can I get the name and address of one of the most inexpensive '
          'ones?'},
 {'emotion': 6,
  'sentiment': 2,
  'text': 'Thanks so much. I am also looking for places to go in town. Can you '
          'help me with that?'},
 {'emotion': 0,
  'sentiment': 0,
  'text': 'I would like to see colleges.  Can you please recommend one with no '
          'entrance fee, and let me know the area.  Thank you.'},
 {'emotion': 6, 'sentiment': 2, 'text': 'Thank you.  That was all I needed.'},
 {'emotion': 0,
  'sentiment': 0,
  'text': 'I want a train that arrives in broxbourne by 09:45.'},
 {'emotion': 2,
  'sentiment': 1,
  'text': 'No. I need to depart from Cambridge and leave on Monday. Can you '
          'find me a more suitable train?'},
 {'emotion': 0, 'sentiment': 0, 'text': 'I would prefer to arrive at 9:

# Step 4: Making a dataframe with the text, sentiment and emotion from EmoWoz and removing all unneceessary information

In [6]:
result_df = pd.DataFrame(sentiment_list, columns=['text','sentiment','emotion'])
result_df

Unnamed: 0,text,sentiment,emotion
0,How are you doing? Are there any European rest...,0,0
1,Can I get the name and address of one of the m...,0,0
2,Thanks so much. I am also looking for places t...,2,6
3,I would like to see colleges. Can you please ...,0,0
4,Thank you. That was all I needed.,2,6
...,...,...,...
71519,I want to start my stay on Sunday for 7 people...,0,0
71520,Can you recommend a tour of the college near t...,2,5
71521,Why don't you search for a boat attraction ins...,0,0
71522,Could you give me the address and entrance fee?,0,0


In [7]:
#Removing all rows that contain neutral sentiment, as sentiment analysis models used cannot recognize neutral sentiment
no_neutral_sentiment_df = result_df.query("sentiment != 0")
no_neutral_sentiment_df

Unnamed: 0,text,sentiment,emotion
2,Thanks so much. I am also looking for places t...,2,6
4,Thank you. That was all I needed.,2,6
6,No. I need to depart from Cambridge and leave ...,1,2
11,That's all I need. Thank you!,2,6
14,Thank you and goodbye.,2,6
...,...,...,...
71511,Thank you goodbye.,2,6
71515,"No, that is all. You have been very helpful. ...",2,6
71516,We are visiting cambridge for the first time a...,2,5
71520,Can you recommend a tour of the college near t...,2,5


In [8]:
#Adding two conditional columns that put into words what the sentiment and emotion score mean. The mapping is based on the
#README.md file from https://zenodo.org/record/6506504#.ZFF4gc7P3b1
no_neutral_sentiment_df = no_neutral_sentiment_df.copy()
no_neutral_sentiment_df["sentiment_name"] = no_neutral_sentiment_df['sentiment'].apply(lambda x: 'Positive' if x == 2 else 'Negative')
no_neutral_sentiment_df["emotion_name"] = no_neutral_sentiment_df['emotion'].apply(lambda x: 'Neutral' if x == 0 else 'Fearful, sad, disappointed' if x == 1 else 'Dissatisfied, disliking' if x == 2 else 'Apologetic' if x == 3 else 'Abusive' if x == 4 else 'Excited, happy, anticipating' if x == 5 else 'Satisfied, liking')
no_neutral_sentiment_df.head(10)

Unnamed: 0,text,sentiment,emotion,sentiment_name,emotion_name
2,Thanks so much. I am also looking for places t...,2,6,Positive,"Satisfied, liking"
4,Thank you. That was all I needed.,2,6,Positive,"Satisfied, liking"
6,No. I need to depart from Cambridge and leave ...,1,2,Negative,"Dissatisfied, disliking"
11,That's all I need. Thank you!,2,6,Positive,"Satisfied, liking"
14,Thank you and goodbye.,2,6,Positive,"Satisfied, liking"
15,Good day. I'm headed into town and I need help...,2,5,Positive,"Excited, happy, anticipating"
17,"Okay, i'll try it out! Book me for 6 people, ...",2,6,Positive,"Satisfied, liking"
18,"Thank you. Yes, I also need to find a moderat...",2,6,Positive,"Satisfied, liking"
20,"Yes, let's go for the one in Regent Street Cit...",2,6,Positive,"Satisfied, liking"
21,thank you that will be all,2,6,Positive,"Satisfied, liking"


In [9]:
#In the text column, replace all semicolons within with commas or apostrophes so that later the separator ; functions as intended later
no_neutral_sentiment_df['text'] = no_neutral_sentiment_df['text'].str.replace(';', ',', regex=False)

In [10]:
#Remove any newline characters from the text column
no_neutral_sentiment_df['text'] = no_neutral_sentiment_df['text'].str.replace('\n', ' ', regex=False)

# Step 5: Export dataframe to an Excel file

In [11]:
# Put the dataframe into an Excel file and save it
no_neutral_sentiment_df.to_excel("EmoWoz_Dataset.xlsx", index=False, engine="openpyxl")