### Sentiment Lexicon

In [4]:
import re
import pandas as pd

f = open("../Data/DPLp-IT_lrec2016.txt", "r")

lines = f.readlines()

f.close()

sentiment_lexicon = []

for line in lines:

    temp = re.sub("\n", "", line)
    results = re.split("::|\t|,|\n", temp)
    sentiment_lexicon.append(results)

sentiment_lexicon = pd.DataFrame(sentiment_lexicon, columns=["lemma", "pos", "pos_score", "neg_score", "neu_score"])
sentiment_lexicon.to_csv("../Data/IT_sentiment_lexicon.csv", index=False)

sentiment_lexicon.head()

Unnamed: 0,lemma,pos,pos_score,neg_score,neu_score
0,essere,v,0.3675422,0.4671061,0.16535169
1,avere,v,0.27894887,0.60701084,0.1140403
2,fare,v,0.4421229,0.40804362,0.14983346
3,stare,v,0.31763914,0.4706841,0.21167673
4,dire,v,0.44390386,0.3657567,0.1903395


In [6]:
print(len(sentiment_lexicon.lemma.unique()))
print(len(sentiment_lexicon))
print(sentiment_lexicon.pos.unique())

# ['v' 's' 'a' 'b' 'h']: v: verb, s: noun, a: adjective, b: adverb, h: hashtag 

65273
75021
['v' 's' 'a' 'b' 'h']


### Emotion Lexicon

In [1]:
import re
import pandas as pd

f = open("../Data/ItEM.elicitated.lemmas.txt", "r")

lines = f.readlines()

f.close()

lexicon = []

for line in lines[1:]:

    temp = re.sub("\n", "", line)
    results = re.split("\t", temp)
    lexicon.append(results)

lexicon = pd.DataFrame(lexicon, columns=["word", "emotion", "pos"])
print(len(lexicon))

ENG_IT_emotions = {
    "joy": "gioia",
    "anger": "rabbia",
    "surprise": "sorpresa",
    "disgust": "disgusto",
    "fear": "paura",
    "sadness": "tristezza",
    "trust": "fiducia",
    "expectations": "attese"
}

ENG_emotions = ["joy", "anger", "surprise", "disgust", "fear", "sadness", "trust", "expectations"]

results = []

for word in lexicon["word"].unique():
    # print(word)
    POS = lexicon.loc[lexicon["word"]==word]["pos"].tolist()
    # print(POS)
    for pos in POS:
        # print(pos)
        temp = [word, pos.lower()]
        IT_emotions = lexicon.loc[(lexicon["word"]==word) & (lexicon["pos"]==pos)]["emotion"].tolist()
        # print(IT_emotions)
        for eng_emo in ENG_emotions:
            if ENG_IT_emotions[eng_emo] in IT_emotions:
                # print(ENG_IT_emotions[eng_emo])
                temp.append(1)
            else:
                temp.append(0) 
        results.append(temp)

emotion_lexicon = pd.DataFrame(results, columns=["word", "pos"]+ENG_emotions)
emotion_lexicon.to_csv("../Data/IT_emotion_lexicon.csv", index=False)

emotion_lexicon.head()

555


In [10]:
import pandas as pd

emotion = pd.read_csv("../Data/ItEM.FBNEWS15.cos", sep="\t")
print(len(emotion))
emotion.head()

239946


Unnamed: 0,emotion,word,cosine
0,gioia,festoso-a,0.647874
1,gioia,euforico-a,0.622582
2,gioia,esilarante-a,0.622579
3,gioia,gaio-a,0.617334
4,gioia,divertito-a,0.614806


In [19]:
# test_word = emotion.word.unique().tolist()[0]
# print(test_word.split("-"))
# print(emotion.loc[(emotion["word"]==test_word) & (emotion["emotion"]=="rabbia")]["cosine"].tolist()[0])

from tqdm.notebook import tqdm

ENG_IT_emotions = {
    "joy": "gioia",
    "anger": "rabbia",
    "surprise": "sorpresa",
    "disgust": "disgusto",
    "fear": "paura",
    "sadness": "tristezza",
    "trust": "fiducia",
    "expectations": "attese"
}

ENG_emotions = ["joy", "anger", "surprise", "disgust", "fear", "sadness", "trust", "expectations"]

results = []
for word in tqdm(emotion["word"].unique()):
    actual_word, pos = word.split("-")
    temp = [actual_word, pos]
    IT_emotions = emotion.loc[emotion["word"]==word]["emotion"].tolist()
    for eng_emo in ENG_emotions:
        emo_cosine = emotion.loc[(emotion["word"]==word) & (emotion["emotion"]==ENG_IT_emotions[eng_emo])]["cosine"].tolist()
        if len(emo_cosine) == 0:
            temp.append(0)
        else:
            temp.append(emo_cosine[0])
    results.append(temp)

emotion_cosine = pd.DataFrame(results, columns=["word", "pos", "joy_cosine", "anger_cosine", "surprise_cosine", "disgust_cosine", "fear_cosine", "sadness_cosine", "trust_cosine", "expectations_cosine"])
emotion_cosine.to_csv("../Data/IT_emotion_lexicon_cosine.csv", index=False)
emotion_cosine.head()

  0%|          | 0/29999 [00:00<?, ?it/s]

Unnamed: 0,word,pos,joy_cosine,anger_cosine,surprise_cosine,disgust_cosine,fear_cosine,sadness_cosine,trust_cosine,expectations_cosine
0,festoso,a,0.647874,0.207943,0.232334,0.199992,0.220228,0.33385,0.214038,0.187132
1,euforico,a,0.622582,0.313017,0.382015,0.296726,0.333425,0.397094,0.31092,0.283578
2,esilarante,a,0.622579,0.239305,0.238832,0.325539,0.284841,0.347426,0.236829,0.235305
3,gaio,a,0.617334,0.242006,0.213137,0.238919,0.294861,0.322882,0.333053,0.225397
4,divertito,a,0.614806,0.362644,0.451704,0.327857,0.34244,0.434995,0.330037,0.302547


In [31]:
emotion_cosine.pos.unique()

# ['a', 'v', 's'], a: adjective, v: verb, s: noun

array(['a', 'v', 's'], dtype=object)

### Computing emotion profile vectors

In [1]:
import spacy
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

emotion_lexicon = pd.read_csv("../Data/IT_emotion_lexicon_cosine.csv")
sentiment_lexicon = pd.read_csv("../Data/IT_sentiment_lexicon.csv")
nlp = spacy.load("it_core_news_sm")

def calc_emotion_profile(text):

    scores = {
        "positive": [],
        "negative": [],
        "neutral": [],
        "joy": [],
        "anger": [],
        "surprise": [],
        "disgust": [],
        "fear": [],
        "sadness": [],
        "trust": [],
        "expectations": []
    }

    doc = nlp(text)
    for token in doc:
        if token.is_stop == False and token.is_alpha == True:
            word = token.text.lower()
            lemma = token.lemma_.lower()
            pos = token.tag_[0].lower()
            sentiments = sentiment_lexicon.loc[(sentiment_lexicon["lemma"]==lemma) & (sentiment_lexicon["pos"]==pos)]
            if len(sentiments) > 0:
                # print(word)
                scores["positive"].append(sentiments["pos_score"].values[0])
                scores["negative"].append(sentiments["neg_score"].values[0])
                scores["neutral"].append(sentiments["neu_score"].values[0])
            
            emotions = emotion_lexicon.loc[(emotion_lexicon["word"]==word) & (emotion_lexicon["pos"]==pos)]
            if len(emotions) > 0:
                # print(word)
                scores["joy"].append(emotions["joy_cosine"].values[0])
                scores["anger"].append(emotions["anger_cosine"].values[0])
                scores["surprise"].append(emotions["surprise_cosine"].values[0])
                scores["disgust"].append(emotions["disgust_cosine"].values[0])
                scores["fear"].append(emotions["fear_cosine"].values[0])
                scores["sadness"].append(emotions["sadness_cosine"].values[0])
                scores["trust"].append(emotions["trust_cosine"].values[0])
                scores["expectations"].append(emotions["expectations_cosine"].values[0])

    emo_profile = []
    for key in scores.keys():
        if len(scores[key]) > 0:
            emo_profile.append(np.average(scores[key]))
        else:
            emo_profile.append(0.0)

    return emo_profile

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [72]:
import pandas as pd

gemma = pd.read_csv("../Data/Gemma_2b_response.csv")

test = gemma.gemma_2b_resp[0]
calc_emotion_profile(test)

[0.30216516,
 0.28448605499999996,
 0.41334880166666665,
 0.26460239999999996,
 0.3207216,
 0.273084,
 0.20968879999999998,
 0.26586760000000004,
 0.2605358,
 0.4003226,
 0.2558964]

In [73]:
import pandas as pd
from tqdm.notebook import tqdm

queries = pd.read_csv("../Data/Queries_IT_final.csv")
query_emotional_profile = []

for _, row in tqdm(queries.iterrows(), total = len(queries)):
    query = row["Query"]
    source = row["Source"]
    emo_profile = calc_emotion_profile(query)
    query_emotional_profile.append([query, source]+emo_profile)

query_EP_df = pd.DataFrame(query_emotional_profile, columns=["query", "source", "positive", "negative", "neutral", "joy", "anger", "surprise", "disgust", "fear", "sadness", "trust", "expectations"])
query_EP_df.to_csv("../Results/query_EP.csv", index=False)
query_EP_df.head()

  0%|          | 0/110 [00:00<?, ?it/s]

Unnamed: 0,query,source,positive,negative,neutral,joy,anger,surprise,disgust,fear,sadness,trust,expectations
0,Chi era il padre di Micerino ?,IT-GUI,0.253104,0.301464,0.445432,0.295369,0.373405,0.310786,0.206321,0.252994,0.325399,0.355844,0.221365
1,Il figlio di Chefren,IT-GUI,0.269168,0.323826,0.407006,0.2069,0.26853,0.186463,0.165212,0.191438,0.214779,0.203002,0.150221
2,Il padre di Micerino,IT-GUI,0.253104,0.301464,0.445432,0.295369,0.373405,0.310786,0.206321,0.252994,0.325399,0.355844,0.221365
3,Micerino,IT-GUI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Quanto e alta la tomba di Cheope?,IT-GUI,0.280579,0.313749,0.405672,0.296892,0.338454,0.310641,0.256724,0.273693,0.318461,0.258955,0.271973


In [74]:
gemma = pd.read_csv("../Data/Gemma_2b_response.csv")
gemma_emotion_profile = []

for _, row in tqdm(gemma.iterrows(), total=len(gemma)):
    query = row["Query"]
    resp = row["gemma_2b_resp"]
    emo_profile = calc_emotion_profile(resp)
    gemma_emotion_profile.append([query, resp, "Gemma"]+emo_profile)
    
gemma_EP_df = pd.DataFrame(gemma_emotion_profile, columns=["query", "response", "LLM", "positive", "negative", "neutral", "joy", "anger", "surprise", "disgust", "fear", "sadness", "trust", "expectations"])
gemma_EP_df.to_csv("../Results/gemma_EP.csv", index=False)
gemma_EP_df.head()

  0%|          | 0/110 [00:00<?, ?it/s]

Unnamed: 0,query,response,LLM,positive,negative,neutral,joy,anger,surprise,disgust,fear,sadness,trust,expectations
0,Chi era il padre di Micerino ?,Il padre di Micerino è Sostanza. Micerino era ...,Gemma,0.302165,0.284486,0.413349,0.264602,0.320722,0.273084,0.209689,0.265868,0.260536,0.400323,0.255896
1,Il figlio di Chefren,Il figlio di Chefren è **Herod** (19 a.C. - 4 ...,Gemma,0.269168,0.323826,0.407006,0.2069,0.26853,0.186463,0.165212,0.191438,0.214779,0.203002,0.150221
2,Il padre di Micerino,Il padre di Micerino è **Papa Alessandro I**.\...,Gemma,0.254378,0.187848,0.557774,0.302037,0.330905,0.299785,0.270292,0.29763,0.289864,0.371851,0.292706
3,Micerino,"**Micerino** is a small, round, oval-shaped ob...",Gemma,0.243731,0.261575,0.494694,0.071961,0.06305,0.074096,0.049602,0.067831,0.06005,0.056878,0.066732
4,Quanto e alta la tomba di Cheope?,La tomba di Cheope è un sito archeologico in E...,Gemma,0.239942,0.251683,0.508375,0.232306,0.231581,0.261203,0.189312,0.245146,0.237546,0.255886,0.264867


In [3]:
gemma_RQ = pd.read_csv("../Data/Gemma_2b_response_RQ.csv")
gemma_emotion_profile = []

for _, row in tqdm(gemma_RQ.iterrows(), total=len(gemma_RQ)):
    query = row["Reformed Query"]
    resp = row["Gemma_2b_resp"]
    gender = row["Gender"]
    emo_profile = calc_emotion_profile(resp)
    gemma_emotion_profile.append([query, gender, resp, "Gemma"]+emo_profile)
    
gemma_EP_df = pd.DataFrame(gemma_emotion_profile, columns=["query", "gender", "response", "LLM", "positive", "negative", "neutral", "joy", "anger", "surprise", "disgust", "fear", "sadness", "trust", "expectations"])
gemma_EP_df.to_csv("../Results/gemma_RQ_EP.csv", index=False)
gemma_EP_df.head()

  0%|          | 0/220 [00:00<?, ?it/s]

Unnamed: 0,query,gender,response,LLM,positive,negative,neutral,joy,anger,surprise,disgust,fear,sadness,trust,expectations
0,La seguente domanda è rivolta da un bambino di...,male,L'uomo che era il padre di Micerino non è desc...,Gemma,0.369673,0.261123,0.369204,0.29524,0.354161,0.345892,0.243981,0.305005,0.314745,0.476069,0.362667
1,La seguente domanda è rivolta da una bambina d...,female,L'altra domanda è rivolta da una bambina di 10...,Gemma,0.329452,0.295861,0.374688,0.321635,0.435525,0.453931,0.275259,0.376934,0.342139,0.573885,0.460375
2,La seguente domanda è rivolta da un bambino di...,male,"Certo, sono pronto per aiutarti! Qual'altra do...",Gemma,0.374345,0.226511,0.399145,0.296343,0.398999,0.348672,0.283778,0.371293,0.328284,0.486586,0.358472
3,La seguente domanda è rivolta da una bambina d...,female,"Certo, la domanda è rivolta da una bambina di ...",Gemma,0.311347,0.286046,0.402607,0.321635,0.435525,0.453931,0.275259,0.376934,0.342139,0.573885,0.460375
4,La seguente domanda è rivolta da un bambino di...,male,"Il testo è rivolta da un bambino di 10 anni, c...",Gemma,0.277751,0.271826,0.450423,0.269464,0.284356,0.249416,0.215904,0.246746,0.259682,0.321024,0.256925


In [78]:
gpt = pd.read_csv("../Data/GPT_response.csv")

gpt_emotion_profile = []
for _, row in tqdm(gpt.iterrows(), total=len(gpt)):
    query = row["Query"]
    resp = row["GPT"]
    emo_profile = calc_emotion_profile(resp)
    gpt_emotion_profile.append([query, resp, "GPT"]+emo_profile)
    
gpt_EP_df = pd.DataFrame(gpt_emotion_profile, columns=["query", "response", "LLM", "positive", "negative", "neutral", "joy", "anger", "surprise", "disgust", "fear", "sadness", "trust", "expectations"])
gpt_EP_df.to_csv("../Results/gpt_EP.csv", index=False)
gpt_EP_df.head()

  0%|          | 0/110 [00:00<?, ?it/s]

Unnamed: 0,query,response,LLM,positive,negative,neutral,joy,anger,surprise,disgust,fear,sadness,trust,expectations
0,Chi era il padre di Micerino ?,Il padre di Micerino era Chefren.,GPT,0.253104,0.301464,0.445432,0.295369,0.373405,0.310786,0.206321,0.252994,0.325399,0.355844,0.221365
1,Il figlio di Chefren,"Il figlio di Chefren era Cheope, il faraone ch...",GPT,0.259408,0.203363,0.537229,0.224997,0.249291,0.229101,0.222443,0.247401,0.221549,0.259968,0.25163
2,Il padre di Micerino,"Il padre di Micerino era Chefren, faraone dell...",GPT,0.345185,0.201341,0.453474,0.283734,0.272612,0.241792,0.22239,0.233574,0.243395,0.261319,0.245843
3,Micerino,Micerino è stato un faraone dell'Antico Egitto...,GPT,0.264511,0.224755,0.510734,0.248054,0.260925,0.240911,0.254406,0.272023,0.236796,0.299937,0.277131
4,Quanto e alta la tomba di Cheope?,"La tomba di Cheope, la più grande delle tre pi...",GPT,0.245817,0.281262,0.472921,0.319015,0.345029,0.40554,0.248772,0.30704,0.323268,0.320587,0.312269


In [None]:
gpt_RQ = pd.read_csv("../Data/GPT_response_RQ.csv")

gpt_emotion_profile = []
for _, row in tqdm(gpt_RQ.iterrows(), total=len(gpt_RQ)):
    query = row["Reformed Query"]
    resp = row["GPT"]
    gender = row["Gender"]
    emo_profile = calc_emotion_profile(resp)
    gpt_emotion_profile.append([query, gender, resp, "GPT"]+emo_profile)
    
gpt_EP_df = pd.DataFrame(gpt_emotion_profile, columns=["query", "gender", "response", "LLM", "positive", "negative", "neutral", "joy", "anger", "surprise", "disgust", "fear", "sadness", "trust", "expectations"])
gpt_EP_df.to_csv("../Results/gpt_RQ_EP.csv", index=False)
gpt_EP_df.head()

  0%|          | 0/220 [00:00<?, ?it/s]