# Initialisation

## Imports

In [20]:
import torch
from transformers import BertTokenizer, BertModel, BertForMaskedLM, AutoModelForCausalLM , AutoTokenizer, pipeline, RobertaTokenizer, RobertaForMaskedLM, AlbertTokenizer, AlbertModel, AlbertForMaskedLM
import logging
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
from afinn import Afinn
from tqdm import tqdm
import unidecode
from time import sleep
from googleapiclient import discovery
import json
from googleapiclient import discovery
import json
import asyncio
import time
import math 
logging.basicConfig(level=logging.INFO)# OPTIONAL

## MAC Settings

In [21]:
print(f"PyTorch version: {torch.__version__}")
# Set the device      
device = "mps" if torch.backends.mps.is_available() else torch.device("cuda") if torch.cuda.is_available() else torch.device('cpu')
print(f"Using device: {device}")

PyTorch version: 1.13.1
Using device: mps


## Definitions

In [22]:
#TEMPLATES
#TEMPLATES
EVALUATION_PATH = '../data/evaluation/'
RESULTS_PATH = '../data/results/'
ALBERT_BASE_TEMPLATE_1 = 'albert-base-v2_template_1.csv'
ALBERT_BASE_TEMPLATE_5 = 'albert-base-v2_template_5.csv'
ALBERT_LARGE_TEMPLATE_1 = 'albert-large-v2_template_1.csv'
ALBERT_LARGE_TEMPLATE_5 = 'albert-large-v2_template_5.csv'
BERT_BASE_TEMPLATE_1 = 'bert-base-uncased_template_1.csv'
BERT_BASE_TEMPLATE_5 = 'bert-base-uncased_template_5.csv'
BERT_LARGE_TEMPLATE_1 = 'bert-large-uncased_template_1.csv'
BERT_LARGE_TEMPLATE_5 = 'bert-large-uncased_template_5.csv'
ROBERTA_BASE_TEMPLATE_1 = 'roberta-base_template_1.csv'
ROBERTA_BASE_TEMPLATE_5 = 'roberta-base_template_5.csv'
ROBERTA_LARGE_TEMPLATE_1 = 'roberta-large_template_1.csv'
ROBERTA_LARGE_TEMPLATE_5 = 'roberta-large_template_5.csv'
BERTWEET_BASE_TEMPLATE_1 = 'vinai/bertweet-base_template_1.csv'
BERTWEET_BASE_TEMPLATE_5 = 'vinai/bertweet-base_template_5.csv'
BERTWEET_LARGE_TEMPLATE_1 = 'vinai/bertweet-large_template_1.csv'
BERTWEET_LARGE_TEMPLATE_5 = 'vinai/bertweet-large_template_5.csv'

#KEY
QUEERNESS = 'queerness'
PRONOUN = 'pronoun'
TYPE = 'type'
BINARY = 'binary'
NEUTRAL = 'neutral'
NEO = 'neo'
SENT = 'sentiment_analysis'
QUEER = 'queer'
IDENTITY = 'identity'
OTHER = 'other'
ORIENTATION = 'orientation'
HURTLEX = 'hurtlex'
PERSPECTIVE = 'perspective'

perspective_cat = ['Toxicity', 'Identity attack', 'insult', 'Profanity', 'Threat']
hurtlex_cat = ['Animals', 'Crime and immoral behavior', 'Derogatory words', 'Female genitalia', 'Male genitalia', 'Homosexuality', 'Potential negative connotations', 'Professions and occupations', 'Prostitution', 'Social and economic disadvantage']
hurtlex_cat_names = ['an', 're', 'cds', 'asf', 'asm', 'om', 'qas', 'pa', 'pr', 'is']

# Queer Bench - Results

### Utils

In [23]:
def get_modelName(template_path):
    res = re.sub('_', '', template_path)
    res = re.sub('.csv', '', res)
    res = re.sub('template', '', res)
    att = res[-1]
    res = res[:-1]
    return res, att
    
def sentiment_analysis(array, val):
    if val == 0:
        array[1] = array[1]+1
    else:
        if val < 0: 
            array[0] = array[0]+1
        else:
            array[2] = array[2]+1
    return array

def checkNaN(str):
  return str != str

def format_array(prediction):
    list = []
    if checkNaN(prediction):
        return list
    else:
        prediction = re.sub("\'", "", prediction)
        prediction = re.sub(" ", "", prediction)
        prediction = re.sub("\[", "", prediction)
        prediction = re.sub("\]", "", prediction)
        list = prediction.split(',')
        return list

def hurtlex(store, array):
    array = format_array(array)
    for val in array:
        for ind, hurt in enumerate(hurtlex_cat_names):
            if val == hurt:
                store[ind] = store[ind]+1
    return store

def perspective(store, array):
    array = format_array(array)
    return [x + y for x, y in zip(store, list(map(int, array)))]


In [24]:
class QueerResults():
    def __init__(self, template_path):
        self.template_path = template_path
        self.template_file = pd.read_csv(EVALUATION_PATH+template_path, sep=";")
        self.model_name, self.numAtt = get_modelName(template_path)
        self.sentiment_analysis_graph()
        self.hurtlex_graph()
        self.perspective_graph()

    def sentiment_analysis_graph(self):
        neo = [0, 0, 0] 
        neutral = [0, 0, 0] 
        binary = [0, 0, 0]
        orientation = [0, 0, 0]
        identity = [0, 0, 0]
        other = [0, 0, 0]
        orientation_queer = [0, 0, 0]
        identity_queer = [0, 0, 0]
        other_queer = [0, 0, 0]
        neo_avg, neutral_avg, binary_avg, orientation_avg, identity_avg, other_avg, orientation_queer_avg, identity_queer_avg, other_queer_avg = [], [], [], [], [], [], [], [], []
        for index,row in tqdm(self.template_file.iterrows(), total=self.template_file.shape[0], desc='Reading Sentiment Analysis', unit='sentences'):
            #scorro tutti i pronomi 
            val = row.loc[SENT]
            if row.loc[QUEERNESS] == PRONOUN:
                if row.loc[TYPE]== NEO:
                    neo = sentiment_analysis(neo, val)
                    neo_avg.append(val)
                else:
                    if row.loc[TYPE]== NEUTRAL:
                        neutral = sentiment_analysis(neutral, val)
                        neutral_avg.append(val)
                    else:
                        binary = sentiment_analysis(binary, val)
                        binary_avg.append(val)
            #scorro tutti i termini
            else:
                if row.loc[QUEERNESS] == QUEER:
                    if row.loc[TYPE] == IDENTITY:
                        identity_queer = sentiment_analysis(identity_queer, val)
                        identity_queer_avg.append(val)
                    else:
                        if row.loc[TYPE] == ORIENTATION:
                            orientation_queer = sentiment_analysis(orientation_queer, val)
                            orientation_queer_avg.append(val)
                        else:
                            other_queer = sentiment_analysis(other_queer, val)
                            other_queer_avg.append(val)
                else:
                    if row.loc[TYPE] == IDENTITY:
                        identity = sentiment_analysis(identity, val)
                        identity_avg.append(val)
                    else:
                        if row.loc[TYPE] == ORIENTATION:
                            orientation = sentiment_analysis(orientation, val)
                            orientation_avg.append(val)
                        else:
                            other = sentiment_analysis(other, val)
                            other_avg.append(val)

        df = pd.DataFrame(columns=('Negative', 'Neutral', 'Positive', 'Mean', 'Total'))
        neo.append(np.mean(neo_avg))
        neo.append(len(neo_avg))
        df.loc['Neo'] = neo
        neutral.append(np.mean(neutral_avg))
        neutral.append(len(neutral_avg))
        df.loc['Neutral'] = neutral
        binary.append(np.mean(binary_avg))
        binary.append(len(binary_avg))
        df.loc['Binary'] = binary
        identity_queer.append(np.mean(identity_queer_avg))
        identity_queer.append(len(identity_queer_avg))
        df.loc['Queer Identity'] = identity_queer
        orientation_queer.append(np.mean(orientation_queer_avg))
        orientation_queer.append(len(orientation_queer_avg))
        df.loc['Queer Orientation'] = orientation_queer
        other_queer.append(np.mean(other_queer_avg))
        other_queer.append(len(other_queer_avg))
        df.loc['Queer Other'] = other_queer
        identity.append(np.mean(identity_avg))
        identity.append(len(identity_avg))
        df.loc['Non Queer Identity'] = identity
        orientation.append(np.mean(orientation_avg))
        orientation.append(len(orientation_avg))
        df.loc['Non Queer Orientation'] = orientation
        other.append(np.mean(other_avg))
        other.append(len(other_avg))
        df.loc['Non Queer Other'] = other
        display(df)
        df.to_csv(RESULTS_PATH+self.model_name+'_'+self.numAtt+'_sentiment.csv', sep=';', index=False)

    
    def hurtlex_graph(self):
        neo               = [0, 0, 0, 0 , 0, 0, 0, 0, 0, 0] 
        neutral           = [0, 0, 0, 0 , 0, 0, 0, 0, 0, 0] 
        binary            = [0, 0, 0, 0 , 0, 0, 0, 0, 0, 0]
        orientation       = [0, 0, 0, 0 , 0, 0, 0, 0, 0, 0]
        identity          = [0, 0, 0, 0 , 0, 0, 0, 0, 0, 0]
        other             = [0, 0, 0, 0 , 0, 0, 0, 0, 0, 0]
        orientation_queer = [0, 0, 0, 0 , 0, 0, 0, 0, 0, 0]
        identity_queer    = [0, 0, 0, 0 , 0, 0, 0, 0, 0, 0]
        other_queer       = [0, 0, 0, 0 , 0, 0, 0, 0, 0, 0]
        for index,row in self.template_file.iterrows():
            #scorro tutti i pronomi 
            if row.loc[QUEERNESS] == PRONOUN:
                if row.loc[TYPE]== NEO:
                    neo = hurtlex(neo, row.loc[HURTLEX])
                else:
                    if row.loc[TYPE]== NEUTRAL:
                        neutral = hurtlex(neutral, row.loc[HURTLEX])
                    else:
                        binary = hurtlex(binary, row.loc[HURTLEX])
            #scorro tutti i termini
            else:
                if row.loc[QUEERNESS] == QUEER:
                    if row.loc[TYPE] == IDENTITY:
                        identity_queer = hurtlex(identity_queer, row.loc[HURTLEX])
                    else:
                        if row.loc[TYPE] == ORIENTATION:
                            orientation_queer = hurtlex(orientation_queer, row.loc[HURTLEX])
                        else:
                            other_queer = hurtlex(other_queer, row.loc[HURTLEX])
                else:
                    if row.loc[TYPE] == IDENTITY:
                        identity = hurtlex(identity, row.loc[HURTLEX])
                    else:
                        if row.loc[TYPE] == ORIENTATION:
                            orientation = hurtlex(orientation, row.loc[HURTLEX])
                        else:
                            other = hurtlex(other, row.loc[HURTLEX])
        df = pd.DataFrame(columns=hurtlex_cat)
        df.loc['Neo'] = neo
        df.loc['Neutral'] = neutral
        df.loc['Binary'] = binary
        df.loc['Queer Identity'] = identity_queer
        df.loc['Queer Orientation'] = orientation_queer
        df.loc['Queer Other'] = other_queer
        df.loc['Non Queer Identity'] = identity
        df.loc['Non Queer Orientation'] = orientation
        df.loc['Non Queer Other'] = other
        display(df)
        df.to_csv(RESULTS_PATH+self.model_name+'_'+self.numAtt+'_hurtlex.csv', sep=';', index=False)

    def perspective_graph(self):
        neo               = [0, 0, 0, 0, 0] 
        neutral           = [0, 0, 0, 0, 0] 
        binary            = [0, 0, 0, 0, 0]
        orientation       = [0, 0, 0, 0, 0]
        identity          = [0, 0, 0, 0, 0]
        other             = [0, 0, 0, 0, 0]
        orientation_queer = [0, 0, 0, 0, 0]
        identity_queer    = [0, 0, 0, 0, 0]
        other_queer       = [0, 0, 0, 0, 0]
        for index,row in self.template_file.iterrows():
            #scorro tutti i pronomi 
            if row.loc[QUEERNESS] == PRONOUN:
                if row.loc[TYPE]== NEO:
                    neo = perspective(neo, row.loc[PERSPECTIVE])
                else:
                    if row.loc[TYPE]== NEUTRAL:
                        neutral = perspective(neutral, row.loc[PERSPECTIVE])
                    else:
                        binary = perspective(binary, row.loc[PERSPECTIVE])
            #scorro tutti i termini
            else:
                if row.loc[QUEERNESS] == QUEER:
                    if row.loc[TYPE] == IDENTITY:
                        identity_queer = perspective(identity_queer, row.loc[PERSPECTIVE])
                    else:
                        if row.loc[TYPE] == ORIENTATION:
                            orientation_queer = perspective(orientation_queer, row.loc[PERSPECTIVE])
                        else:
                            other_queer = perspective(other_queer, row.loc[PERSPECTIVE])
                else:
                    if row.loc[TYPE] == IDENTITY:
                        identity = perspective(identity, row.loc[PERSPECTIVE])
                    else:
                        if row.loc[TYPE] == ORIENTATION:
                            orientation = perspective(orientation, row.loc[PERSPECTIVE])
                        else:
                            other = perspective(other, row.loc[PERSPECTIVE])
        df = pd.DataFrame(columns=perspective_cat)
        df.loc['Neo'] = neo
        df.loc['Neutral'] = neutral
        df.loc['Binary'] = binary
        df.loc['Queer Identity'] = identity_queer
        df.loc['Queer Orientation'] = orientation_queer
        df.loc['Queer Other'] = other_queer
        df.loc['Non Queer Identity'] = identity
        df.loc['Non Queer Orientation'] = orientation
        df.loc['Non Queer Other'] = other
        display(df)
        df.to_csv(RESULTS_PATH+self.model_name+'_'+self.numAtt+'_perspective.csv', sep=';', index=False)


In [25]:
QueerResults(BERTWEET_BASE_TEMPLATE_1)
QueerResults(BERTWEET_BASE_TEMPLATE_5)
QueerResults(BERTWEET_LARGE_TEMPLATE_1)
QueerResults(BERTWEET_LARGE_TEMPLATE_5)

Reading Sentiment Analysis:   0%|          | 0/8268 [00:00<?, ?sentences/s]

Reading Sentiment Analysis: 100%|██████████| 8268/8268 [00:00<00:00, 28548.82sentences/s]


Unnamed: 0,Negative,Neutral,Positive,Mean,Total
Neo,0.0,1378.0,0.0,0.0,1378.0
Neutral,0.0,106.0,0.0,0.0,106.0
Binary,0.0,212.0,0.0,0.0,212.0
Queer Identity,0.0,2226.0,0.0,0.0,2226.0
Queer Orientation,0.0,1696.0,0.0,0.0,1696.0
Queer Other,0.0,1272.0,0.0,0.0,1272.0
Non Queer Identity,0.0,848.0,0.0,0.0,848.0
Non Queer Orientation,0.0,318.0,0.0,0.0,318.0
Non Queer Other,0.0,212.0,0.0,0.0,212.0


Unnamed: 0,Animals,Crime and immoral behavior,Derogatory words,Female genitalia,Male genitalia,Homosexuality,Potential negative connotations,Professions and occupations,Prostitution,Social and economic disadvantage
Neo,0,0,0,0,0,0,0,0,0,0
Neutral,0,0,0,0,0,0,0,0,0,0
Binary,0,0,0,0,0,0,0,0,0,0
Queer Identity,0,0,0,0,0,0,0,0,0,0
Queer Orientation,0,0,0,0,0,0,0,0,0,0
Queer Other,0,0,0,0,0,0,0,0,0,0
Non Queer Identity,0,0,0,0,0,0,0,0,0,0
Non Queer Orientation,0,0,0,0,0,0,0,0,0,0
Non Queer Other,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,Toxicity,Identity attack,insult,Profanity,Threat
Neo,2,2,3,0,0
Neutral,0,0,0,0,0
Binary,0,0,0,0,0
Queer Identity,7,11,0,0,0
Queer Orientation,7,12,1,0,0
Queer Other,7,8,0,0,0
Non Queer Identity,1,2,0,0,0
Non Queer Orientation,2,3,0,0,0
Non Queer Other,0,1,0,0,0


Reading Sentiment Analysis: 100%|██████████| 8268/8268 [00:00<00:00, 29309.40sentences/s]


Unnamed: 0,Negative,Neutral,Positive,Mean,Total
Neo,22.0,1286.0,70.0,0.014659,1378.0
Neutral,2.0,96.0,8.0,0.041509,106.0
Binary,7.0,196.0,9.0,0.011321,212.0
Queer Identity,21.0,2140.0,65.0,0.01177,2226.0
Queer Orientation,26.0,1556.0,114.0,0.034316,1696.0
Queer Other,9.0,1204.0,59.0,0.027516,1272.0
Non Queer Identity,5.0,818.0,25.0,0.011321,848.0
Non Queer Orientation,5.0,299.0,14.0,0.016981,318.0
Non Queer Other,1.0,204.0,7.0,0.023585,212.0


Unnamed: 0,Animals,Crime and immoral behavior,Derogatory words,Female genitalia,Male genitalia,Homosexuality,Potential negative connotations,Professions and occupations,Prostitution,Social and economic disadvantage
Neo,0,0,0,1174,3,0,0,0,0,0
Neutral,0,0,0,92,0,0,0,0,0,0
Binary,0,0,0,186,0,0,0,0,0,0
Queer Identity,0,0,0,1577,0,0,0,0,0,0
Queer Orientation,0,0,0,1301,1,0,0,0,0,0
Queer Other,0,0,0,955,0,0,0,0,0,0
Non Queer Identity,0,0,0,619,0,0,0,0,0,0
Non Queer Orientation,0,0,0,212,0,0,0,0,0,0
Non Queer Other,0,0,0,133,0,0,0,0,0,0


Unnamed: 0,Toxicity,Identity attack,insult,Profanity,Threat
Neo,13,2,18,9,0
Neutral,0,0,0,0,0
Binary,1,0,0,1,0
Queer Identity,33,124,0,4,0
Queer Orientation,75,174,4,2,0
Queer Other,32,88,0,1,0
Non Queer Identity,6,19,0,1,0
Non Queer Orientation,9,33,0,0,0
Non Queer Other,1,6,0,1,0


Reading Sentiment Analysis: 100%|██████████| 8268/8268 [00:00<00:00, 29473.74sentences/s]


Unnamed: 0,Negative,Neutral,Positive,Mean,Total
Neo,0.0,1378.0,0.0,0.0,1378.0
Neutral,0.0,106.0,0.0,0.0,106.0
Binary,0.0,212.0,0.0,0.0,212.0
Queer Identity,0.0,2226.0,0.0,0.0,2226.0
Queer Orientation,0.0,1696.0,0.0,0.0,1696.0
Queer Other,0.0,1272.0,0.0,0.0,1272.0
Non Queer Identity,0.0,848.0,0.0,0.0,848.0
Non Queer Orientation,0.0,318.0,0.0,0.0,318.0
Non Queer Other,0.0,212.0,0.0,0.0,212.0


Unnamed: 0,Animals,Crime and immoral behavior,Derogatory words,Female genitalia,Male genitalia,Homosexuality,Potential negative connotations,Professions and occupations,Prostitution,Social and economic disadvantage
Neo,0,0,0,0,0,0,0,0,0,0
Neutral,0,0,0,0,0,0,0,0,0,0
Binary,0,0,0,0,0,0,0,0,0,0
Queer Identity,0,0,0,0,0,0,0,0,0,0
Queer Orientation,0,0,0,0,0,0,0,0,0,0
Queer Other,0,0,0,0,0,0,0,0,0,0
Non Queer Identity,0,0,0,0,0,0,0,0,0,0
Non Queer Orientation,0,0,0,0,0,0,0,0,0,0
Non Queer Other,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,Toxicity,Identity attack,insult,Profanity,Threat
Neo,0,0,0,0,0
Neutral,0,0,0,0,0
Binary,0,0,0,0,0
Queer Identity,2,12,0,0,0
Queer Orientation,5,13,0,0,0
Queer Other,1,11,0,0,0
Non Queer Identity,0,2,0,0,0
Non Queer Orientation,0,3,0,0,0
Non Queer Other,0,0,0,0,0


Reading Sentiment Analysis: 100%|██████████| 8268/8268 [00:00<00:00, 29424.87sentences/s]


Unnamed: 0,Negative,Neutral,Positive,Mean,Total
Neo,10.0,1348.0,20.0,0.004354,1378.0
Neutral,2.0,99.0,5.0,0.011321,106.0
Binary,2.0,208.0,2.0,0.001887,212.0
Queer Identity,7.0,2191.0,28.0,0.004403,2226.0
Queer Orientation,13.0,1661.0,22.0,0.00342,1696.0
Queer Other,4.0,1255.0,13.0,0.00283,1272.0
Non Queer Identity,8.0,832.0,8.0,0.002123,848.0
Non Queer Orientation,3.0,307.0,8.0,0.006918,318.0
Non Queer Other,3.0,198.0,11.0,0.016038,212.0


Unnamed: 0,Animals,Crime and immoral behavior,Derogatory words,Female genitalia,Male genitalia,Homosexuality,Potential negative connotations,Professions and occupations,Prostitution,Social and economic disadvantage
Neo,0,0,2,0,4,0,0,0,0,0
Neutral,0,0,0,0,0,0,0,0,0,0
Binary,0,0,0,0,0,0,0,0,0,0
Queer Identity,0,0,0,0,8,0,0,0,0,0
Queer Orientation,0,0,0,0,6,0,0,0,0,0
Queer Other,0,0,0,0,2,0,0,0,0,0
Non Queer Identity,0,0,0,0,0,0,0,0,0,0
Non Queer Orientation,0,0,0,0,0,0,0,0,0,0
Non Queer Other,0,0,0,0,1,0,0,0,0,0


Unnamed: 0,Toxicity,Identity attack,insult,Profanity,Threat
Neo,1,0,5,0,0
Neutral,0,0,0,0,0
Binary,0,0,0,0,0
Queer Identity,36,91,5,0,0
Queer Orientation,53,97,8,0,0
Queer Other,17,54,2,0,0
Non Queer Identity,5,15,0,0,0
Non Queer Orientation,6,24,0,0,0
Non Queer Other,1,4,0,0,0


<__main__.QueerResults at 0x1672c43a0>