# Importing Libraries

In [None]:
!pip install --upgrade spacy
!python -m spacy download pt_core_news_lg

In [None]:
!pip install transformers

In [3]:
from google.colab import drive
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
import collections
import pandas as pd
from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score, balanced_accuracy_score
from ast import literal_eval
import spacy

In [4]:
nlp = spacy.load("pt_core_news_lg")

In [5]:
drive.mount('/content/drive')

Mounted at /content/drive


# Importing Dataset

In [6]:
!gdown --id 1bjo9wqZpGiOanuB6MLDaCEkZ1VPXJMC1

Downloading...
From: https://drive.google.com/uc?id=1bjo9wqZpGiOanuB6MLDaCEkZ1VPXJMC1
To: /content/test_task2.csv
100% 302k/302k [00:00<00:00, 46.5MB/s]


In [7]:
df = pd.read_csv('/content/test_task2.csv', sep=';')
df.head()

Unnamed: 0,id,review,aspect,start_position,end_position
0,0,"Um hotel bem agradável, limpo, hospitaleiro e ...",localização,301,312
1,1,"Um hotel bem agradável, limpo, hospitaleiro e ...",café da manhã,226,239
2,2,"Um hotel bem agradável, limpo, hospitaleiro e ...",hotel,3,8
3,3,"Um hotel bem agradável, limpo, hospitaleiro e ...",quarto,86,92
4,4,"Eu realmente não aconselho esse lugar, tudo é ...",atendimento,57,68


# GoEmotions

In [8]:
#Folder path containing the fine-tuned model files
model_path = '/content/drive/MyDrive/IC/Inferencia_carga_afetiva/GoEmotions_portuguese-main/fine_tunel_model'
model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer, return_all_scores=True)



In [9]:
def goEmotionsInference(file_name, texts, infer=False):
  if infer:
    output = classifier(texts)
    out_all = str(output)
    with open('/content/drive/MyDrive/Shared Task - ABSAPT 2022/go_emotions_output/' + file_name, 'w') as f:
      f.write(out_all)
  with open('/content/drive/MyDrive/Shared Task - ABSAPT 2022/go_emotions_output/' + file_name) as f:
    out_task2_train = f.read()
    out_task2_train = literal_eval(out_task2_train)
  return out_task2_train

In [10]:
# Get the df with the top n predictions

def get_predictions(output):
  predictions = {}
  for id, l in enumerate(output):
    predictions[id] = {}
    for prediction in l:
      predictions[id][prediction['label']] = prediction['score']
  return predictions

def sort_dic(dic):
  sorted_dic = {}
  for i in sorted(dic, key=dic.get, reverse=True):
    sorted_dic[i] = dic[i]
  return {i: sorted_dic[i] for i in list(sorted_dic)}

def top_n_predictions(predictions, n):
  top_n = {}
  for id in predictions:
    sorted_dic = sort_dic(predictions[id])
    aux_dic = {}
    for i, key in enumerate(sorted_dic):
      if i < n:
        aux_dic[key] = predictions[id][key]
      top_n[id] = aux_dic
  return top_n

def get_result(top_n, df, sentences):
  dic_result = dict(df)
  dic_result['sentence'] = []
  for id in top_n:
    dic_result['sentence'].append(sentences[id])
    keys = list(top_n[id].keys())
    for i, key in enumerate(keys):
      n_prediction = 'prediction ' + str(i+1)
      n_score = 'score ' + str(i+1)
      if n_prediction not in dic_result.keys():
        dic_result[n_prediction] = []
        dic_result[n_score] = [] 
      dic_result[n_prediction].append(keys[i])
      dic_result[n_score].append(top_n[id][keys[i]])
  return dic_result

def get_df(df, output, n, sentences):
  predictions = get_predictions(output)
  top_n = top_n_predictions(predictions, n)
  dic_result = get_result(top_n, df, sentences)
  df_result = pd.DataFrame(data=dic_result)
  return df_result

In [11]:
mapping = {"admiração": 1,
            "diversão": 1,
            "raiva": -1,
            "aborrecimento": -1,
            "aprovação": 1,
            "zelo": 1,
            "confusão": -1,
            "curiosidade": 1,
            "desejo": 1,
            "decepção": -1,
            "desaprovação": -1,
            "nojo": -1,
            "constrangimento": -1,
            "entusiasmo": 1,
            "medo": -1,
            "gratidão": 1,
            "luto": -1,
            "alegria": 1,
            "amor": 1,
            "nervosismo": -1,
            "otimismo": 1,
            "orgulho": 1,
            "percepção": 0,
            "alívio": 1,
            "remorso": -1,
            "tristeza": -1,
            "surpresa": 1,
            "neutro": 0
}

In [12]:
def map_goEmotions(n, df):
  for i in range(n):
    df['prediction ' + str(i+1)] = df['prediction ' + str(i+1)].map(mapping)
  return df

In [13]:
def annull_obl(predictions, obl):
  for index in obl:
    if index < len(predictions):
      predictions[index] = 0
  return predictions

## Meaningful Surroundings

In [14]:
def get_surroundings(index, review):
  doc = nlp(review)
  for token in doc:
    surroundings = [' ']
    if token.idx == index:
      surroundings = {}
      surroundings[token.idx] = token.text
      surroundings[token.head.idx] = token.head.text
      for child in token.children:
        surroundings[child.idx] = child.text
        for c in child.children:
          surroundings[c.idx] = c.text
      for child in token.head.children:
        surroundings[child.idx] = child.text
        for c in child.children:
          surroundings[c.idx] = c.text
      surroundings = dict(sorted(surroundings.items()))
      s = [surroundings[i] for i in surroundings]
      return ' '.join(s)
  return ' '.join(surroundings)

In [15]:
aspect_surroundings = []
for i in df.iloc():
    aspect_surroundings.append(get_surroundings(i['start_position'], i['review']))

In [16]:
obl_index = []
for index, i in enumerate(df.iloc()):
  doc = nlp(i['review'])
  for token in doc:
    if token.idx == i['start_position'] and token.dep_ == 'obl':
        obl_index.append(index)

In [17]:
out_task2_test = goEmotionsInference('out_task2_test', aspect_surroundings, True)

In [18]:
n = 3
df_top3_s = get_df(df, out_task2_test, n, aspect_surroundings)
df_top3_s.head()

Unnamed: 0,id,review,aspect,start_position,end_position,sentence,prediction 1,score 1,prediction 2,score 2,prediction 3,score 3
0,0,"Um hotel bem agradável, limpo, hospitaleiro e ...",localização,301,312,O ponto forte é a localização .,admiração,0.493106,neutro,0.283013,aprovação,0.15717
1,1,"Um hotel bem agradável, limpo, hospitaleiro e ...",café da manhã,226,239,"O café da manhã é bom , mas simples mesmo .",admiração,0.906012,aprovação,0.049788,neutro,0.015803
2,2,"Um hotel bem agradável, limpo, hospitaleiro e ...",hotel,3,8,Um hotel bem agradável limpo hospitaleiro loca...,admiração,0.960308,aprovação,0.014327,neutro,0.006338
3,3,"Um hotel bem agradável, limpo, hospitaleiro e ...",quarto,86,92,"O quarto era pequeno mas bastante limpo , bem ...",admiração,0.665469,neutro,0.154558,aprovação,0.115362
4,4,"Eu realmente não aconselho esse lugar, tudo é ...",atendimento,57,68,"Eu realmente não aconselho esse lugar , tudo é...",desaprovação,0.328608,nojo,0.168453,decepção,0.101558


In [19]:
df_top3 = map_goEmotions(n, df_top3_s)
df_top3.head()

Unnamed: 0,id,review,aspect,start_position,end_position,sentence,prediction 1,score 1,prediction 2,score 2,prediction 3,score 3
0,0,"Um hotel bem agradável, limpo, hospitaleiro e ...",localização,301,312,O ponto forte é a localização .,1,0.493106,0,0.283013,1,0.15717
1,1,"Um hotel bem agradável, limpo, hospitaleiro e ...",café da manhã,226,239,"O café da manhã é bom , mas simples mesmo .",1,0.906012,1,0.049788,0,0.015803
2,2,"Um hotel bem agradável, limpo, hospitaleiro e ...",hotel,3,8,Um hotel bem agradável limpo hospitaleiro loca...,1,0.960308,1,0.014327,0,0.006338
3,3,"Um hotel bem agradável, limpo, hospitaleiro e ...",quarto,86,92,"O quarto era pequeno mas bastante limpo , bem ...",1,0.665469,0,0.154558,1,0.115362
4,4,"Eu realmente não aconselho esse lugar, tudo é ...",atendimento,57,68,"Eu realmente não aconselho esse lugar , tudo é...",-1,0.328608,-1,0.168453,-1,0.101558


### Top 3 with treshold

In [24]:
def get_top3_t(df, t):
  prediction = []
  for i in df.iloc():
    if i['score 1'] >= t:
      prediction.append(i['prediction 1'])
    else:
      if i['prediction 1'] == i['prediction 2'] or i['prediction 1'] == i['prediction 3']:
        prediction.append(i['prediction 1'])
      elif i['prediction 2'] == i['prediction 3']:
        prediction.append(i['prediction 2'])
      else:
        prediction.append(i['prediction 1'])
  return prediction

In [49]:
prediction = get_top3_t(df_top3_s, 0.9)

#### Annull OBL Aspects

In [50]:
prediction = annull_obl(prediction, obl_index)
print(prediction)

[1, 1, 1, 1, -1, 0, -1, -1, 0, 0, 0, 0, -1, 0, -1, -1, -1, -1, -1, -1, -1, 0, 1, 1, 0, 0, 0, 0, 0, -1, 0, -1, 0, -1, 0, 1, 1, 0, -1, 1, 1, -1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, -1, 0, 0, 1, 1, 0, 0, 1, -1, 0, -1, 1, -1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, -1, 1, -1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, -1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, -1, 1, 1, 0, 1, 0, 0, 1, 0, 1, -1, 0, -1, 1, 0, 1, 1, 1, 1, 0, 0, 0, -1, 0, -1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, -1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, -1, 0, -1, 0, 0, -1, -1, -1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, -1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, -1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, -1, 0, 1, 1, 1, 0, 0, 0, -1, 1, -1, 1, 1, -1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,

In [51]:
df = df_top3.copy()
df.drop(columns=['review', 'aspect', 'start_position', 'end_position', 
                 'sentence', 'prediction 1', 'score 1', 'prediction 2',
                 'score 2', 'prediction 3', 'score 3'],
       inplace=True)
df['polarity'] = prediction
df.head()

Unnamed: 0,id,polarity
0,0,1
1,1,1
2,2,1
3,3,1
4,4,-1
