## Computing sentiment and emotion vectors

### Language Model based approach: FEEL-IT

In [1]:
# santiy check

from transformers import pipeline
import pandas as pd
import re

emo_classifier = pipeline("text-classification",model='MilaNLProc/feel-it-italian-emotion',top_k=4)
sent_classifier = pipeline("text-classification",model='MilaNLProc/feel-it-italian-sentiment',top_k=2)

text_examples = [
    "sono felice",
    "La deforestazione è un male",
    "Questo è spaventoso",
    "Questa è una mela marcia",
    "mostro nero?",
    "lei è una bellezza terribile",
    "L'Cleanup Ocean Ã¨ un movimento internazionale che si Ã¨ formato per combattere la pollution del mare.",
    """La (tigre) 

    bianca Ã¨ una rara variante genetica della tigre reale (Panthera tigris), 
    
    caratterizzata da una particolare colorazione del mantello causata da una mutazione genetica. Questi bellissimi felini sono spesso oggetto di ammirazione e curiositÃ  per la loro singolare bellezza. Vive principalmente in India e in alcune parti del sud-est asiatico."""
]

def clean_text(text):
    text = re.sub("(\s?\n){1,}", ".", text) # remove new lines and replace with .
    text = re.sub("\t{1,}", " ", text) # remove tab spaces and replace with a singular space
    text = re.sub("[^\w\-(\.{1})'\!\?]", " ", text) # remove non-alphanumeric symbols, except for ., ', !, -
    text = re.sub("[\(\)\[\]\{\}]", "", text) # remove brackets of any kind
    text = re.sub("\s{2,}", " ", text) # remove any multiple white spaces
    text = text.strip() # remove any leading or ending white spaces
    return text

cleaned_text_examples = [clean_text(text) for text in text_examples]

emo_scores = emo_classifier(cleaned_text_examples)
sent_scores = sent_classifier(cleaned_text_examples)

EP_semantic_results = {
    "text": [],
    "cleaned_text": [],
    "positive": [],
    "negative": [],
    "sent_sum": [],
    "joy": [],
    "sadness": [],
    "anger": [],
    "fear": [],
    "emo_sum": []
}
# print(emo_scores)


for text, cleaned_text, sent_score, emo_score in zip(text_examples, cleaned_text_examples, sent_scores, emo_scores):
    EP_semantic_results["text"].append(text)
    EP_semantic_results["cleaned_text"].append(cleaned_text)
    sent_sum = 0
    emo_sum = 0
    for sent_dict in sent_score:
        EP_semantic_results[sent_dict["label"]].append(sent_dict["score"]*100)
        sent_sum += sent_dict["score"]
    EP_semantic_results["sent_sum"].append(sent_sum)
    for emo_dict in emo_score:
        EP_semantic_results[emo_dict["label"]].append(emo_dict["score"]*100)
        emo_sum += emo_dict["score"]
    EP_semantic_results["emo_sum"].append(emo_sum)

# print(EP_semantic_results)
df = pd.DataFrame.from_dict(EP_semantic_results)
df

Unnamed: 0,text,cleaned_text,positive,negative,sent_sum,joy,sadness,anger,fear,emo_sum
0,sono felice,sono felice,99.9726,0.027401,1.0,99.90108,0.041839,0.017075,0.04,1.0
1,La deforestazione è un male,La deforestazione è un male,0.021573,99.978429,1.0,0.020934,92.929745,6.8701,0.179221,1.0
2,Questo è spaventoso,Questo è spaventoso,0.02175,99.97825,1.0,0.100756,0.102739,0.093424,99.703085,1.0
3,Questa è una mela marcia,Questa è una mela marcia,0.02188,99.978119,1.0,0.04351,70.565057,29.188761,0.202673,1.0
4,mostro nero?,mostro nero?,0.023057,99.976939,1.0,0.033823,3.354726,94.834226,1.777218,1.0
5,lei è una bellezza terribile,lei è una bellezza terribile,0.022565,99.97744,1.0,99.864703,0.107148,0.014439,0.013711,1.0
6,L'Cleanup Ocean Ã¨ un movimento internazionale...,L'Cleanup Ocean Ã un movimento internazionale ...,73.18204,26.817957,1.0,18.575568,48.435834,0.285596,32.703006,1.0
7,La (tigre) \n\n bianca Ã¨ una rara variante...,La tigre. bianca Ã una rara variante genetica ...,99.962687,0.037313,1.0,99.923551,0.029315,0.010889,0.036246,1.0


In [2]:
from transformers import pipeline
import pandas as pd
from tqdm.notebook import tqdm
import re

tokenizer_kwargs = {'padding':True,'truncation':True,'max_length':512}
emo_classifier = pipeline("text-classification",model='MilaNLProc/feel-it-italian-emotion',top_k=4)
sent_classifier = pipeline("text-classification",model='MilaNLProc/feel-it-italian-sentiment',top_k=2)


def clean_text(text):
    text = re.sub("(\s?\n){1,}", ".", text) # remove new lines and replace with .
    text = re.sub("\t{1,}", " ", text) # remove tab spaces and replace with a singular space
    text = re.sub("[^\w\-(\.{1})'\!\?]", " ", text) # remove non-alphanumeric symbols, except for ., ', !, -
    text = re.sub("[\(\)\[\]\{\}]", "", text) # remove brackets of any kind
    text = re.sub("\s{2,}", " ", text) # remove any multiple white spaces
    text = text.strip() # remove any leading or ending white spaces
    return text

def get_sent_emo_vectors(texts, text_name):
    cleaned_texts = [clean_text(text) for text in texts]
    cleaned_text_name = text_name+"_cleaned"
    try:
        emo_scores = emo_classifier(cleaned_texts)
        sent_scores = sent_classifier(cleaned_texts)
    except:
        emo_scores = emo_classifier(cleaned_texts, **tokenizer_kwargs)
        sent_scores = sent_classifier(cleaned_texts, **tokenizer_kwargs)

    EP_semantic_results = {
        cleaned_text_name: [],
        "positive": [],
        "negative": [],
        "joy": [],
        "sadness": [],
        "anger": [],
        "fear": [],
    }

    for cleaned_text, sent_score, emo_score in tqdm(zip(cleaned_texts, sent_scores, emo_scores), total=len(cleaned_texts)):
        EP_semantic_results[cleaned_text_name].append(cleaned_text)
        for sent_dict in sent_score:
            EP_semantic_results[sent_dict["label"]].append(sent_dict["score"]*100)
        for emo_dict in emo_score:
            EP_semantic_results[emo_dict["label"]].append(emo_dict["score"]*100)
    df = pd.DataFrame.from_dict(EP_semantic_results)
    return cleaned_text_name, df

In [None]:
import os  # import os module

directory = '../Data/Responses/'  # set directory path

for entry in os.scandir(directory):  
    if entry.is_file():  # check if it's a file
        resp_df = pd.read_csv(entry.path)
        file_name = entry.name[:-4]     
        print("\nCreating sentiment and emotion vectors for file: "+file_name)
        
        col_name, results_df = get_sent_emo_vectors(resp_df["Resp"].tolist(), "Resp")
        results_df["QID"] = resp_df["QID"]
        results_df["Task Sentiment"] = resp_df["Task Sentiment"]
        results_df["Task Sentiment"] = results_df["Task Sentiment"].fillna(value="General")
        
        if "gemma" in file_name:
            results_df["IAS"] = ["GEM"]*len(resp_df)
        elif "gpt" in file_name:
            results_df["IAS"] = ["GPT"]*len(resp_df)
        
        results_df = results_df.loc[:, ["QID", "Task Sentiment", "IAS", col_name, "positive", "negative", "joy", "sadness", "anger", "fear"]]
        
        
        file_path = "../Results/QueryWise_"+file_name+".csv"
        results_df.to_csv(file_path, index=False)
        # results_df.head()


Creating sentiment and emotion vectors for file: gemma_resp_baseline


  0%|          | 0/293 [00:00<?, ?it/s]


Creating sentiment and emotion vectors for file: gemma_resp_user_group_aware


  0%|          | 0/293 [00:00<?, ?it/s]


Creating sentiment and emotion vectors for file: gemma_resp_user_need_aware


  0%|          | 0/293 [00:00<?, ?it/s]


Creating sentiment and emotion vectors for file: gpt_resp_baseline


  0%|          | 0/293 [00:00<?, ?it/s]


Creating sentiment and emotion vectors for file: gpt_resp_user_group_aware


  0%|          | 0/293 [00:00<?, ?it/s]


Creating sentiment and emotion vectors for file: gpt_resp_user_need_aware


  0%|          | 0/2 [00:00<?, ?it/s]

## Emotional Profile

In [11]:
import os  # import os module

directory = '../Results/'  # set directory path

all_EP_list = []
for entry in os.scandir(directory):  
    if entry.is_file():  # check if it's a file
        results_df = pd.read_csv(entry.path)
        file_name = entry.name[10:-4]
        EP_vals = results_df.groupby("Task Sentiment").mean(numeric_only=True).round(2).reset_index()
        if "gemma" in file_name: 
            EP_vals["IAS"] = ["GEM"]*len(EP_vals)
        elif "gpt" in file_name:
            EP_vals["IAS"] = ["GPT"]*len(EP_vals)

        if "baseline" in file_name:
            EP_vals["Prompt Type"] = ["Baseline"]*len(EP_vals)
        elif "user_group_aware" in file_name:
            EP_vals["Prompt Type"] = ["User Group Aware"]*len(EP_vals)
        elif "user_need_aware" in file_name:
            EP_vals["Prompt Type"] = ["User Need Aware"] * len(EP_vals)

        EP_vals = EP_vals.loc[:, ["IAS", "Prompt Type", "Task Sentiment", "positive", "negative", "joy", "sadness", "anger", "fear"]]
        all_EP_list.append(EP_vals)

all_EP_df = pd.concat(all_EP_list, ignore_index=True)
all_EP_df.to_csv("../Results/EP.csv", index=False)
all_EP_df


Unnamed: 0,IAS,Prompt Type,Task Sentiment,positive,negative,joy,sadness,anger,fear
0,GEM,Baseline,General,51.17,48.83,41.63,26.41,3.66,28.3
1,GEM,Baseline,Negative,2.35,97.65,2.09,49.36,10.79,37.75
2,GEM,Baseline,Positive,57.06,42.94,57.46,22.84,17.13,2.58
3,GEM,User Group Aware,General,62.05,37.95,53.09,25.49,1.61,19.81
4,GEM,User Group Aware,Negative,7.39,92.61,15.6,66.99,7.05,10.36
5,GEM,User Group Aware,Positive,48.91,51.09,58.73,18.52,20.98,1.78
6,GEM,User Need Aware,General,61.83,38.17,55.58,25.18,0.75,18.5
7,GEM,User Need Aware,Negative,25.51,74.49,26.54,56.74,4.79,11.93
8,GEM,User Need Aware,Positive,50.46,49.54,51.52,21.68,23.22,3.58
9,GPT,Baseline,General,41.22,58.78,27.09,22.63,1.34,48.94
