# Mount drive

In [None]:
from google.colab import drive
drive.mount("/content/drive")

# Import libraries

In [None]:
!pip install --upgrade pip  # ensures that pip is current
!git clone https://github.com/google-research/bleurt.git
%cd bleurt
!pip install .

In [None]:
from bleurt import score
import numpy as np
import pandas as pd
# Prevent AttributeError. via
# https://stackoverflow.com/a/76404841
pd.DataFrame.iteritems = pd.DataFrame.items
from statistics import mean
import urllib.request
from zipfile import ZipFile

In [None]:
%load_ext rpy2.ipython

In [None]:
%%R
install.packages("dplyr")
install.packages("ggh4x")
devtools::install_github("hadley/ggplot2")
devtools::install_github("cran/GiniWegNeg")
install.packages("patchwork")
install.packages("showtext")
install.packages("stringr")
library(dplyr)
library(ggh4x)
library(ggplot2)
library(GiniWegNeg)
library(patchwork)
library(showtext)
# Need to download Arial.ttf and upload it to your Google Drive prior.
font_add("Arial", regular = "/content/drive/MyDrive/Arial.ttf")
library(stringr)

# Import datasets

In [None]:
IHC_NLE_0301 = pd.read_csv(
    '/content/drive/MyDrive/datasets/cleaning/'
    'IHC_NLE_predictions_cleaned_gpt-3.5-turbo-0301.tsv',
    sep='\t')
IHC_NLE_0301  = IHC_NLE_0301.reset_index(drop=True)
IHC_NLE_0613 = pd.read_csv(
    '/content/drive/MyDrive/datasets/cleaning/'
    'IHC_NLE_predictions_cleaned_gpt-3.5-turbo-0613.tsv',
    sep='\t')
IHC_NLE_0613 = IHC_NLE_0613.reset_index(drop=True)

# Download the **BLEURT-20** checkpoint

In [None]:
urllib.request.urlretrieve(
    'https://storage.googleapis.com/bleurt-oss-21/BLEURT-20.zip',
    '/content/drive/MyDrive/BLEURT-20.zip')

In [None]:
# Unzip file. via
# https://www.geeksforgeeks.org/unzipping-files-in-python/
with ZipFile('/content/drive/MyDrive/BLEURT-20.zip',
             'r') as zObject:
    zObject.extractall('/content/drive/MyDrive/')

# Define a function to get BLEURT scores

In [None]:
checkpoint = "/content/drive/MyDrive/BLEURT-20"
# Use BLEURT as a Python library. via
# https://github.com/google-research/bleurt/tree/master
base = 'predicted_NLE_base_implied_statement_of_implicitly_hateful_tweet'
def get_BLEURT_scores(df, model):
    personas = []
    bleurt_scores = []
    for i in df.iloc[:, 5:]:
        if 'implied_statement_of_implicitly_hateful_tweet' in i:
            references = df[base].tolist()
            candidates = df[i].tolist()
            scorer = score.BleurtScorer(checkpoint)
            scores = scorer.score(references=references, candidates=candidates)
            print(i)
            print(np.round(mean(scores), 3))
            personas.append(i)
            bleurt_scores.append(mean(scores))
            model_list = [model] * len(personas)
    return pd.DataFrame(list(zip(personas, bleurt_scores, model_list)),
                        columns =['persona', 'BLEURT', 'model'])

# Call the `get_BLEURT_scores` function

You will require around 8.7GB of GPU RAM. Paid versions of Colab will give you access to faster GPUs. Alternatively, you can download the notebook and edit the code accordingly, specifically, to point to the directories on your local machine you want to use, if your machine has sufficient GPU RAM.

In [None]:
bleurt_0301 = get_BLEURT_scores(IHC_NLE_0301, 'gpt-3.5-turbo-\"0301\"')

In [None]:
bleurt_0613 = get_BLEURT_scores(IHC_NLE_0613, 'gpt-3.5-turbo-\"0613\"')

# Plot mean BLEURT scores and Bias

In [None]:
%%R -i bleurt_0301 -i bleurt_0613
showtext_auto()
rownames(bleurt_0301) <- NULL
rownames(bleurt_0613) <- NULL
data <- rbind(bleurt_0301, bleurt_0613)
# Replace spaces with underscores. via
# https://stackoverflow.com/a/53107084
data$persona <- gsub("_", " ", data$persona)
# Remove first few words. via
# https://stackoverflow.com/a/64889117
data$persona <- sub("^\\w+\\s\\w+\\s\\w+\\s", "", data$persona)
# Remove everything after specified substring. via
# https://stackoverflow.com/a/53503384
data$persona <- sub("States.+$", "States", data$persona)
# Compute bias
data$bias <- c(
    rep(Gini_RSV(bleurt_0301$BLEURT[1:4])[[1]], 4),
    rep(Gini_RSV(bleurt_0301$BLEURT[5:7])[[1]], 3),
    rep(Gini_RSV(bleurt_0301$BLEURT[8:13])[[1]], 6),
    rep(Gini_RSV(bleurt_0301$BLEURT[14:19])[[1]], 6),
    rep(Gini_RSV(bleurt_0613$BLEURT[1:4])[[1]], 4),
    rep(Gini_RSV(bleurt_0613$BLEURT[5:7])[[1]], 3),
    rep(Gini_RSV(bleurt_0613$BLEURT[8:13])[[1]], 6),
    rep(Gini_RSV(bleurt_0613$BLEURT[14:19])[[1]], 6)
)
data$labs <- c(
    rep(format(round(Gini_RSV(bleurt_0301$BLEURT[1:4])[[1]], digits = 3),
        nsmall = 3
    ), 4),
    rep(format(round(Gini_RSV(bleurt_0301$BLEURT[5:7])[[1]], digits = 3),
        nsmall = 3
    ), 3),
    rep(format(round(Gini_RSV(bleurt_0301$BLEURT[8:13])[[1]], digits = 3),
        nsmall = 3
    ), 6),
    rep(format(round(Gini_RSV(bleurt_0301$BLEURT[14:19])[[1]], digits = 3),
        nsmall = 3
    ), 6),
    rep(format(round(Gini_RSV(bleurt_0613$BLEURT[1:4])[[1]], digits = 3),
        nsmall = 3
    ), 4),
    rep(format(round(Gini_RSV(bleurt_0613$BLEURT[5:7])[[1]], digits = 3),
        nsmall = 3
    ), 3),
    rep(format(round(Gini_RSV(bleurt_0613$BLEURT[8:13])[[1]], digits = 3),
        nsmall = 3
    ), 6),
    rep(format(round(Gini_RSV(bleurt_0613$BLEURT[14:19])[[1]], digits = 3),
        nsmall = 3
    ), 6)
)
data$attribute <- c(
    rep("Gender/Gender~identity/Sexuality", 4),
    rep("Origin", 3),
    rep("Race/Ethnicity", 6),
    rep("Religion", 6),
    rep("Gender/Gender~identity/Sexuality", 4),
    rep("Origin", 3),
    rep("Race/Ethnicity", 6),
    rep("Religion", 6)
)
data$grp <- c(rep(1, 38))
# Dot plots. via
# https://uc-r.github.io/cleveland-dot-plots
g1 <- ggplot(
    data,
    aes(BLEURT, grp)
) +
    geom_line(aes(group = grp),
        size = 0.05
    ) +
    geom_point(size = 0.1) +
    # Label max value dots. via
    # https://stackoverflow.com/a/48351513
    geom_text(
        data = . %>% group_by(attribute, model) %>%
            filter(BLEURT == max(BLEURT)),
        # Wrap and truncate text. via
        # https://stackoverflow.com/a/73357493
        aes(label = str_wrap(persona, width = 20)),
        hjust = 0,
        family = "Arial",
        size = 7 / .pt,
        nudge_x = 0.01,
        lineheight = 1
    ) +
    scale_x_continuous(
        breaks = c(0, .25, .5, .75, 1),
        limits = c(0, 1)
    ) +
    xlab("Mean BLEURT") +
    facet_nested(
        rows = vars(attribute, model),
        switch = "y",
        labeller = label_parsed,
        strip = strip_nested(size = "variable")
    ) +
    theme(
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks = element_line(size = 0.1),
        axis.text = element_text(
            family = "Arial",
            size = 7
        ),
        strip.text.y.left = element_text(
            angle = 0,
            family = "Arial",
            size = 7
        ),
        strip.background = element_rect(
            colour = "white",
            fill = "#ECECEC"
        ),
        panel.grid = element_line(
            color = "black",
            size = 0.1,
            linetype = 2
        ),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.spacing.y = unit(0, "lines"),
        panel.background = element_rect(
            color = "black",
            fill = "white",
            linewidth = 0.1
        ),
        text = element_text(
            family = "Arial",
            size = 7
        )
    )
data$BLEURT <- NULL
data$persona <- NULL
data <- data[!duplicated(data), ]
g2 <- ggplot(
    data,
    aes(grp, bias)
) +
    geom_text(aes(label = labs),
        hjust = -0.2,
        family = "Arial",
        size = 7 / .pt
    ) +
    coord_flip() +
    ylab("Bias") +
    scale_y_continuous(
        breaks = c(0.00, 1.00),
        limits = c(0, 1)
    ) +
    geom_bar(stat = "identity", fill = "black") +
    facet_nested(
        rows = vars(attribute, model),
        labeller = label_parsed,
        strip = strip_nested(size = "variable")
    ) +
    theme(
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks = element_line(size = 0.1),
        axis.text = element_text(
            family = "Arial",
            size = 7
        ),
        strip.text = element_blank(),
        strip.background = element_blank(),
        panel.grid = element_line(
            color = "black",
            size = 0.1,
            linetype = 2
        ),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.spacing.y = unit(0, "lines"),
        panel.background = element_rect(
            color = "black",
            fill = "white",
            linewidth = 0.1
        ),
        text = element_text(
            family = "Arial",
            size = 7
        )
    )
g1 + g2 + plot_layout(widths = c(5, 1))
ggsave(
    "/content/drive/MyDrive/datasets/cleaning/IHC_NLE.eps",
    height = 8,
    width = 18,
    units = "cm"
)