In [1]:
# Installs older transformers version for GoEmotions compatability
!pip install transformers==2.11.0

[0mCollecting transformers==2.11.0
  Using cached transformers-2.11.0-py3-none-any.whl (674 kB)
Collecting tokenizers==0.7.0
  Using cached tokenizers-0.7.0-cp37-cp37m-manylinux1_x86_64.whl (5.6 MB)
[0mInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
[0m    Found existing installation: tokenizers 0.13.2
    Uninstalling tokenizers-0.13.2:
      Successfully uninstalled tokenizers-0.13.2
  Attempting uninstall: transformers
[0m    Found existing installation: transformers 4.25.1
    Uninstalling transformers-4.25.1:
      Successfully uninstalled transformers-4.25.1
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
simpletransformers 0.63.6 requires transformers>=4.6.0, but you have transformers 2.11.0 which is incompatible.
bert-score 0.3.11 requires transformers>=3.0.0numpy, but you have transformers 2.11.0 

In [2]:
# Activates CUDA for GPU use by GoEmotions
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
str(torch.cuda.memory_allocated(device)/1000000 ) + 'M'

'0.0M'

In [3]:
# Initializes a GoEmotions model instance
import transformers
import os
from transformers import BertTokenizer
from GoEmotions.model import BertForMultiLabelClassification
from GoEmotions.multilabel_pipeline import MultiLabelPipeline

tokenizer = BertTokenizer.from_pretrained("monologg/bert-base-cased-goemotions-original")
model = BertForMultiLabelClassification.from_pretrained("monologg/bert-base-cased-goemotions-original")

goemotions = MultiLabelPipeline(
    model=model,
    tokenizer=tokenizer,
    threshold=0.3,
    device=0
)



In [4]:
# Allows user to select a model and the predictions it had on an evaluation set
model_dataset = input('Dataset used to train model (mix, twit0.825, or combined): ')
training_type = input('Training type of model (regular or limited): ')
epochs = int(input('Number of epochs trained on model: '))
model_name = f"{training_type}-{model_dataset}-{epochs}epochs"

testing_set = input('Dataset for prediction generation (mix, twit0.825, or combined): ')

if training_type.lower() == 'regular':
    train_set = 'training'
    eval_set = 'testing'
elif training_type.lower() == 'limited':
    train_set = 'testing'
    eval_set = 'training'
else:
    print('Please enter a valid training type')

def print_base_info(model_name, testing_set):
    print(f'---- Scoring Predictions ----')
    print(f'Model: {model_name}')
    print(f'Test Set: {testing_set}')

Dataset used to train model (mix, twit0.825, or combined): mix
Training type of model (regular or limited): regular
Number of epochs trained on model: 3
Dataset for prediction generation (mix, twit0.825, or combined): mix


In [5]:
# Do this step if it is using BART
model_name = model_name + "-bart"

In [6]:
# Labels the target and predicted texts for scoring by emotion transition and paraphrasing metrics
import pandas as pd
import numpy as np

# Same Sigmoid Function for determining the emotion of a text
threshold = 0.5
def Top_Score_Label (outputs):
    scores = 1 / (1 + np.exp(-outputs))  # Sigmoid
    top_score = 0
    top_label = ""
    for item in scores:
        for idx, s in enumerate(item):
            if s > threshold:
                if s > top_score: 
                    top_label = model.config.id2label[idx]
    return top_label

print(f'---- Labeling Predictions for Model: {model_name} on Dataset: {testing_set} ----')

df = pd.read_csv(f'predictions-data/{model_name}/{testing_set}-predictions.tsv', sep='\t').astype(str) 


# Labels target and prediction emotions
target_labels = []
prediction_labels = []
for index, row in df.iterrows():
    # Ensures the text are within the token limit of 512
    t_text = (row.target_text[:512] + '..') if len(row.target_text) > 512 else row.target_text
    p_text = (row.predictions[:512] + '..') if len(row.predictions) > 512 else row.predictions

    # Uses GoEmotions to label potential emotions of target and prediction text
    target_emo = goemotions(t_text)
    prediction_emo = goemotions(p_text)

    # Finds best emotion for each text
    target_label = Top_Score_Label(target_emo)
    prediction_label = Top_Score_Label(prediction_emo)

    target_labels.append(target_label)
    prediction_labels.append(prediction_label)

    
# Adds target and prediction emotion labels to dataframe    
df["target_emo"] = target_labels 
df["prediction_emo"] = prediction_labels

# Saves the file with new labels to new location in preparation for scoring
df.to_csv(f'predictions-data/{model_name}/{testing_set}-prediction_emo.tsv', sep='\t')

---- Labeling Predictions for Model: regular-mix-3epochs-bart on Dataset: mix ----


In [7]:
# Uses HuggingFace's Evaluate package to score 
# emotion transition and paraphrasing capabilities of the given models and evaluation sets
import pandas as pd
import evaluate

from pprint import pprint
from statistics import mean

# Exact Match scores emotion transition
def exact(truths, preds):
    exact = evaluate.load('exact_match')
    result = exact.compute(predictions = preds, references = truths)['exact_match']
    return result

# BLEU, Google_BLEU, ROUGE, and METEOR score paraphrasing
def bleu(truths, preds):
    bleu = evaluate.load('bleu')
    result = bleu.compute(predictions = preds, references = truths)['bleu']
    return result

def google_bleu(truths, preds):
    google_bleu = evaluate.load('google_bleu')
    result = google_bleu.compute(predictions = preds, references = truths)['google_bleu']
    return result

def rouge1(truths, preds):
    rouge = evaluate.load('rouge')
    result = rouge.compute(predictions = preds, references = truths)['rouge1']
    return result
    
def rouge2(truths, preds):
    rouge = evaluate.load('rouge')
    result = rouge.compute(predictions = preds, references = truths)['rouge2']
    return result
    
def rougeL(truths, preds):
    rouge = evaluate.load('rouge')
    result = rouge.compute(predictions = preds, references = truths)['rougeL']
    return result

def bertscore(truths, preds):
    bscore = evaluate.load('bertscore')
    result = bscore.compute(predictions = preds, references = truths, model_type="distilbert-base-uncased")
    return result

def meteor(truths, preds):
    meteor = evaluate.load('meteor')
    result = meteor.compute(predictions = preds, references = truths)['meteor']
    return result



print('\n')

print_base_info(model_name, testing_set)
    
df = pd.read_csv(f'predictions-data/{model_name}/{testing_set}-prediction_emo.tsv', sep='\t').astype(str)

# Prints all scores of a model's performance
pprint("Exact Score")
pprint(exact(df["target_emo"], df["prediction_emo"]))

pprint("BLEU Score")
pprint(bleu(df["target_text"], df["predictions"]))

pprint('Google BLEU Score')
pprint(google_bleu(df["target_text"], df["predictions"]))

pprint('ROUGE1 Score')
pprint(rouge1(df["target_text"], df["predictions"]))

pprint('ROUGE2 Score')
pprint(rouge2(df["target_text"], df["predictions"]))

pprint('ROUGEL Score')
pprint(rougeL(df["target_text"], df["predictions"]))

pprint('METEOR Score')
pprint(meteor(df["target_text"], df["predictions"]))

print('\n')



---- Scoring Predictions ----
Model: regular-mix-3epochs-bart
Test Set: mix
'Exact Score'
0.317344589409056
'BLEU Score'
0.24106641931166153
'Google BLEU Score'
0.2645182357078268
'ROUGE1 Score'


INFO:absl:Using default tokenizer.


0.5426834674512404
'ROUGE2 Score'


INFO:absl:Using default tokenizer.


0.3123791723432929
'ROUGEL Score'


INFO:absl:Using default tokenizer.


0.4958498848778997
'METEOR Score'


[nltk_data] Downloading package wordnet to /home/xie/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/xie/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/xie/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


0.5238469173911132


