In [None]:
!pip install transformers==2.11.0

In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
str(torch.cuda.memory_allocated(device)/1000000 ) + 'M'

In [None]:
import transformers
import os
from transformers import BertTokenizer
from GoEmotions.model import BertForMultiLabelClassification
from GoEmotions.multilabel_pipeline import MultiLabelPipeline

tokenizer = BertTokenizer.from_pretrained("monologg/bert-base-cased-goemotions-original")
model = BertForMultiLabelClassification.from_pretrained("monologg/bert-base-cased-goemotions-original")

goemotions = MultiLabelPipeline(
    model=model,
    tokenizer=tokenizer,
    threshold=0.3,
    device=0
)

In [None]:
model_dataset = input('Dataset used to train model: ')
training_type = input('Training type of model: ')
epochs = int(input('Number of epochs trained on model'))
model_name = f"{training_type}-{model_dataset}-{epochs}epochs"

testing_set = input('Dataset for prediction generation: ')

if training_type.lower() == 'regular':
    train_set = 'training'
    eval_set = 'testing'
elif training_type.lower() == 'limited':
    train_set = 'testing'
    eval_set = 'training'
else:
    print('Please enter a valid training type')

def print_base_info(model_name, testing_set):
    print(f'---- Scoring Predictions ----')
    print(f'Model: {model_name}')
    print(f'Test Set: {testing_set}')

In [None]:
import pandas as pd
import numpy as np

threshold = 0.5

def Top_Score_Label (outputs):
    scores = 1 / (1 + np.exp(-outputs))  # Sigmoid
    top_score = 0
    top_label = ""
    for item in scores:
        for idx, s in enumerate(item):
            if s > threshold:
                if s > top_score: 
                    top_label = model.config.id2label[idx]
    return top_label

print(f'---- Labeling Predictions for Model: {model_name} on Dataset: {testing_set} ----')

df = pd.read_csv(f'predictions-data/{model_name}/{testing_set}-predictions.tsv', sep='\t').astype(str) 

target_labels = []
prediction_labels = []

for index, row in df.iterrows():
    t_text = (row.target_text[:512] + '..') if len(row.target_text) > 512 else row.target_text
    p_text = (row.predictions[:512] + '..') if len(row.predictions) > 512 else row.predictions

    target_emo = goemotions(t_text)
    prediction_emo = goemotions(p_text)

    target_label = Top_Score_Label(target_emo)
    prediction_label = Top_Score_Label(prediction_emo)

    target_labels.append(target_label)
    prediction_labels.append(prediction_label)

df["target_emo"] = target_labels 
df["prediction_emo"] = prediction_labels

df.to_csv(f'predictions-data/{model_name}/{testing_set}-prediction_emo.tsv', sep='\t')

In [None]:
import pandas as pd
import evaluate

from pprint import pprint
from statistics import mean

def exact(truths, preds):
    exact = evaluate.load('exact_match')
    result = exact.compute(predictions = preds, references = truths)['exact_match']
    return result

def bleu(truths, preds):
    bleu = evaluate.load('bleu')
    result = bleu.compute(predictions = preds, references = truths)['bleu']
    return result

def google_bleu(truths, preds):
    google_bleu = evaluate.load('google_bleu')
    result = google_bleu.compute(predictions = preds, references = truths)['google_bleu']
    return result

def rouge1(truths, preds):
    rouge = evaluate.load('rouge')
    result = rouge.compute(predictions = preds, references = truths)['rouge1']
    return result
    
def rouge2(truths, preds):
    rouge = evaluate.load('rouge')
    result = rouge.compute(predictions = preds, references = truths)['rouge2']
    return result
    
def rougeL(truths, preds):
    rouge = evaluate.load('rouge')
    result = rouge.compute(predictions = preds, references = truths)['rougeL']
    return result

def bertscore(truths, preds):
    bscore = evaluate.load('bertscore')
    result = bscore.compute(predictions = preds, references = truths, model_type="distilbert-base-uncased")
    return result

def meteor(truths, preds):
    meteor = evaluate.load('meteor')
    result = meteor.compute(predictions = preds, references = truths)['meteor']
    return result



print('\n')

print_base_info(model_name, testing_set)
    
df = pd.read_csv(f'predictions-data/{model_name}/{testing_set}-prediction_emo.tsv', sep='\t').astype(str)

pprint("Exact Score")
pprint(exact(df["target_emo"], df["prediction_emo"]))

pprint("BLEU Score")
pprint(bleu(df["target_text"], df["predictions"]))

pprint('Google BLEU Score')
pprint(google_bleu(df["target_text"], df["predictions"]))

pprint('ROUGE1 Score')
pprint(rouge1(df["target_text"], df["predictions"]))

pprint('ROUGE2 Score')
pprint(rouge2(df["target_text"], df["predictions"]))

pprint('ROUGEL Score')
pprint(rougeL(df["target_text"], df["predictions"]))

pprint('METEOR Score')
pprint(meteor(df["target_text"], df["predictions"]))

print('\n')