# Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Import libraries

In [None]:
!pip install krippendorff

In [3]:
import krippendorff
import numpy as np
import pandas as pd
# Prevent AttributeError. via
# https://stackoverflow.com/a/76404841
pd.DataFrame.iteritems = pd.DataFrame.items
from scipy import stats
from sklearn.metrics import f1_score

In [4]:
%load_ext rpy2.ipython

In [None]:
%%R
install.packages("dplyr")
install.packages("ggh4x")
devtools::install_github("hadley/ggplot2")
devtools::install_github("cran/GiniWegNeg")
devtools::install_github("kgwet/irrCAC")
install.packages("patchwork")
install.packages("showtext")
install.packages("stringr")
library(dplyr)
library(ggh4x)
library(ggplot2)
library(GiniWegNeg)
library(irrCAC)
library(patchwork)
library(showtext)
# Need to download Arial.ttf and upload it to your Google Drive prior.
font_add("Arial", regular = "/content/drive/MyDrive/Arial.ttf")
library(stringr)

# Import datasets

## Test set with annotations and annotator information

In [6]:
SBIC = pd.read_csv(
    '/content/drive/MyDrive/datasets/SBIC.v2.tst.csv')
SBIC = SBIC.reset_index(drop=True)

## GPT annotations

In [7]:
SBIC_0301 = pd.read_csv(
    '/content/drive/MyDrive/datasets/cleaning/'
    'SBIC_predictions_cleaned_gpt-3.5-turbo-0301.tsv',
    sep='\t')
SBIC_0301 = SBIC_0301.reset_index(drop=True)
SBIC_0301 = SBIC_0301.astype('string')
SBIC_0613 = pd.read_csv(
    '/content/drive/MyDrive/datasets/cleaning/'
    'SBIC_predictions_cleaned_gpt-3.5-turbo-0613.tsv',
    sep='\t')
SBIC_0613 = SBIC_0613.reset_index(drop=True)
SBIC_0613 = SBIC_0613.astype('string')

# Compute macro F1-scores

In [None]:
def compute_F1(df):
    for i in df.iloc[:, 15:].columns:
        print(i)
        print(np.round(f1_score(df['offensiveYN_mode'],
                                df[i],
                                labels=['1.0', '0.5', '0.0'],
                                average='macro'),
                       3))


compute_F1(SBIC_0301)
print()
compute_F1(SBIC_0613)

# Aggregate actual annotations by sociodemographic attribute

In [9]:
# Pandas Groupby and mode. via
# https://stackoverflow.com/a/54304691 and
# https://stackoverflow.com/a/10374456
SBIC_gender = SBIC.groupby(['post', 'annotatorGender'])['offensiveYN'].agg(
    lambda x: stats.mode(x, nan_policy='omit')[0]).to_frame(
    name='offensiveYN_mode').reset_index()
SBIC_politics = SBIC.groupby(['post', 'annotatorPolitics'])['offensiveYN'].agg(
    lambda x: stats.mode(x, nan_policy='omit')[0]).to_frame(
    name='offensiveYN_mode').reset_index()
SBIC_race = SBIC.groupby(['post', 'annotatorRace'])['offensiveYN'].agg(
    lambda x: stats.mode(x, nan_policy='omit')[0]).to_frame(
    name='offensiveYN_mode').reset_index()

In [10]:
SBIC_gender = SBIC_gender.astype('string')
SBIC_politics = SBIC_politics.astype('string')
SBIC_race = SBIC_race.astype('string')

# Define a function to compute and interpret Krippendorff's alpha coefficients between personas and actual annotators

In [11]:
%%R
compute_alpha_ia <- function(df_gold, df_pred, demog_var, demog_spec, persona,
                             model, measure) {
    df_gold <- df_gold[df_gold[demog_var] == demog_spec, ]
    df <- merge(df_gold, df_pred[c("post", persona)], by = "post")
    df <- na.omit(df[c("offensiveYN_mode", persona)])
    print(nrow(df))
    # Retrieve and interpret Krippendorff's alpha values. via
    # https://cran.r-project.org/web/packages/irrCAC/vignettes/
    # benchmarking.html
    a <- krippen.alpha.raw(df)
    print(a$est)
    print(a$categories)
    print(landis.koch.bf(a$est$coeff.val, a$est$coeff.se))
    res <- data.frame(
        alpha = c(a$est$coeff.val),
        persona = c(persona),
        model = c(model),
        measure = c(measure)
    )
    return(res)
}

# Call the `compute_alpha_ia` function to compute and interpret Krippendorff's alpha coefficients between personas and actual annotators

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_gender -i SBIC_0301
alpha_ia_man_0301 <- compute_alpha_ia(
    SBIC_gender,
    SBIC_0301,
    "annotatorGender",
    "man",
    "predicted_q1a_labels_a_man/male_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_man_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_gender -i SBIC_0613
alpha_ia_man_0613 <- compute_alpha_ia(
    SBIC_gender,
    SBIC_0613,
    "annotatorGender",
    "man",
    "predicted_q1a_labels_a_man/male_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_man_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_gender -i SBIC_0301
alpha_ia_woman_0301 <- compute_alpha_ia(
    SBIC_gender,
    SBIC_0301,
    "annotatorGender",
    "woman",
    "predicted_q1a_labels_a_woman/female_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_woman_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_gender -i SBIC_0613
alpha_ia_woman_0613 <- compute_alpha_ia(
    SBIC_gender,
    SBIC_0613,
    "annotatorGender",
    "woman",
    "predicted_q1a_labels_a_woman/female_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_woman_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0301
alpha_ia_liberal_0301 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0301,
    "annotatorPolitics",
    "liberal",
    "predicted_q1a_labels_a_liberal/progressive_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_liberal_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0613
alpha_ia_liberal_0613 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0613,
    "annotatorPolitics",
    "liberal",
    "predicted_q1a_labels_a_liberal/progressive_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_liberal_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0301
alpha_ia_modlib_0301 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0301,
    "annotatorPolitics",
    "mod-liberal",
    "predicted_q1a_labels_a_moderate_liberal_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_modlib_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0613
alpha_ia_modlib_0613 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0613,
    "annotatorPolitics",
    "mod-liberal",
    "predicted_q1a_labels_a_moderate_liberal_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_modlib_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0301
alpha_ia_modcons_0301 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0301,
    "annotatorPolitics",
    "mod-cons",
    "predicted_q1a_labels_a_moderate_conservative_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_modcons_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0613
alpha_ia_modcons_0613 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0613,
    "annotatorPolitics",
    "mod-cons",
    "predicted_q1a_labels_a_moderate_conservative_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_modcons_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0301
alpha_ia_other_0301 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0301,
    "annotatorPolitics",
    "other",
    "predicted_q1a_labels_a_political_independent_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_other_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0613
alpha_ia_other_0613 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0613,
    "annotatorPolitics",
    "other",
    "predicted_q1a_labels_a_political_independent_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_other_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0301
alpha_ia_cons_0301 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0301,
    "annotatorPolitics",
    "cons",
    "predicted_q1a_labels_a_conservative_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_cons_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0613
alpha_ia_cons_0613 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0613,
    "annotatorPolitics",
    "cons",
    "predicted_q1a_labels_a_conservative_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_cons_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0301
alpha_ia_libert_0301 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0301,
    "annotatorPolitics",
    "libert",
    "predicted_q1a_labels_a_libertarian_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_libert_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_politics -i SBIC_0613
alpha_ia_libert_0613 <- compute_alpha_ia(
    SBIC_politics,
    SBIC_0613,
    "annotatorPolitics",
    "libert",
    "predicted_q1a_labels_a_libertarian_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_libert_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_race -i SBIC_0301
alpha_ia_white_0301 <- compute_alpha_ia(
    SBIC_race,
    SBIC_0301,
    "annotatorRace",
    "white",
    "predicted_q1a_labels_a_White/Caucasian_person_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_white_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_race -i SBIC_0613
alpha_ia_white_0613 <- compute_alpha_ia(
    SBIC_race,
    SBIC_0613,
    "annotatorRace",
    "white",
    "predicted_q1a_labels_a_White/Caucasian_person_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_white_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_race -i SBIC_0301
alpha_ia_hisp_0301 <- compute_alpha_ia(
    SBIC_race,
    SBIC_0301,
    "annotatorRace",
    "hisp",
    "predicted_q1a_labels_a_Hispanic/Latinx_person_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_hisp_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_race -i SBIC_0613
alpha_ia_hisp_0613 <- compute_alpha_ia(
    SBIC_race,
    SBIC_0613,
    "annotatorRace",
    "hisp",
    "predicted_q1a_labels_a_Hispanic/Latinx_person_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_hisp_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_race -i SBIC_0301
alpha_ia_asian_0301 <- compute_alpha_ia(
    SBIC_race,
    SBIC_0301,
    "annotatorRace",
    "asian",
    "predicted_q1a_labels_an_Asian/Asian_American_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_asian_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_race -i SBIC_0613
alpha_ia_asian_0613 <- compute_alpha_ia(
    SBIC_race,
    SBIC_0613,
    "annotatorRace",
    "asian",
    "predicted_q1a_labels_an_Asian/Asian_American_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_asian_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_race -i SBIC_0301
alpha_ia_black_0301 <- compute_alpha_ia(
    SBIC_race,
    SBIC_0301,
    "annotatorRace",
    "black",
    "predicted_q1a_labels_a_Black/African_American_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_black_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_race -i SBIC_0613
alpha_ia_black_0613 <- compute_alpha_ia(
    SBIC_race,
    SBIC_0613,
    "annotatorRace",
    "black",
    "predicted_q1a_labels_a_Black/African_American_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_black_0613)

# Define a function to compute and interpret Krippendorff"s alpha coefficients between the personas and the no-persona defaults

In [36]:
%%R
compute_alpha_ib <- function(df, reference, model, measure) {
    alpha_list <- c()
    for (i in seq_along(colnames(df))) {
        print(colnames(df)[i])
        # Retrieve and interpret Krippendorff's alpha values. via
        # https://cran.r-project.org/web/packages/irrCAC/vignettes/
        # benchmarking.html
        a <- krippen.alpha.raw(df[, c(reference, colnames(df)[i])])
        print(a$est)
        print(a$categories)
        print(landis.koch.bf(a$est$coeff.val, a$est$coeff.se))
        if (i != 48) {
            alpha_list <- c(alpha_list, a$est$coeff.val)
        }
    }
    res <- data.frame(
        alpha = alpha_list,
        persona = colnames(df[, -48]),
        model = rep(model, length(alpha)),
        measure = rep(measure, length(alpha))
    )
    return(res)
}

# Call the `compute_alpha_ib` function to compute and interpret Krippendorff's alpha coefficients between the personas and the no-persona defaults

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_0301
alpha_ib_0301 <- compute_alpha_ib(SBIC_0301[, 16:79], "predicted_q1a_labels_base", "gpt-3.5-turbo-\"0301\"", "\"alpha\"[ib]")
print(alpha_ib_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i SBIC_0613
alpha_ib_0613 <- compute_alpha_ib(SBIC_0613[, 16:79], "predicted_q1a_labels_base", "gpt-3.5-turbo-\"0613\"", "\"alpha\"[ib]")
print(alpha_ib_0613)

# Plot Krippendorff's alpha coefficients and Bias

In [39]:
%%R
showtext_auto()
data <- rbind(
    alpha_ib_0301,
    alpha_ib_0613,
    alpha_ia_man_0301,
    alpha_ia_man_0613,
    alpha_ia_woman_0301,
    alpha_ia_woman_0613,
    alpha_ia_liberal_0301,
    alpha_ia_liberal_0613,
    alpha_ia_modlib_0301,
    alpha_ia_modlib_0613,
    alpha_ia_modcons_0301,
    alpha_ia_modcons_0613,
    alpha_ia_other_0301,
    alpha_ia_other_0613,
    alpha_ia_cons_0301,
    alpha_ia_cons_0613,
    alpha_ia_libert_0301,
    alpha_ia_libert_0613,
    alpha_ia_white_0301,
    alpha_ia_white_0613,
    alpha_ia_hisp_0301,
    alpha_ia_hisp_0613,
    alpha_ia_asian_0301,
    alpha_ia_asian_0613,
    alpha_ia_black_0301,
    alpha_ia_black_0613
)
# Replace spaces with underscores. via
# https://stackoverflow.com/a/53107084
data$persona <- gsub("_", " ", data$persona)
# Remove first few words. via
# https://stackoverflow.com/a/64889117
data$persona <- sub("^\\w+\\s\\w+\\s\\w+\\s\\w+\\s", "", data$persona)
# Remove everything after specified substring. via
# https://stackoverflow.com/a/53503384
data$persona <- sub("in the.+$", "", data$persona)
data$persona <- Map(paste, data$persona, "...")
# Compute bias
data$bias <- c(
    rep(Gini_RSV(alpha_ib_0301$alpha[1:9])[[1]], 9),
    rep(Gini_RSV(alpha_ib_0301$alpha[10:14])[[1]], 5),
    rep(Gini_RSV(alpha_ib_0301$alpha[15:25])[[1]], 11),
    rep(Gini_RSV(alpha_ib_0301$alpha[26:42])[[1]], 17),
    rep(Gini_RSV(alpha_ib_0301$alpha[43:47])[[1]], 5),
    rep(Gini_RSV(alpha_ib_0301$alpha[48:53])[[1]], 6),
    rep(Gini_RSV(alpha_ib_0301$alpha[54:63])[[1]], 10),
    rep(Gini_RSV(alpha_ib_0613$alpha[1:9])[[1]], 9),
    rep(Gini_RSV(alpha_ib_0613$alpha[10:14])[[1]], 5),
    rep(Gini_RSV(alpha_ib_0613$alpha[15:25])[[1]], 11),
    rep(Gini_RSV(alpha_ib_0613$alpha[26:42])[[1]], 17),
    rep(Gini_RSV(alpha_ib_0613$alpha[43:47])[[1]], 5),
    rep(Gini_RSV(alpha_ib_0613$alpha[48:53])[[1]], 6),
    rep(Gini_RSV(alpha_ib_0613$alpha[54:63])[[1]], 10),
    rep(0, 24)
)
data$labs <- c(
    rep(format(round(Gini_RSV(alpha_ib_0301$alpha[1:9])[[1]], digits = 3),
        nsmall = 3
    ), 9),
    rep(format(round(Gini_RSV(alpha_ib_0301$alpha[10:14])[[1]], digits = 3),
        nsmall = 3
    ), 5),
    rep(format(round(Gini_RSV(alpha_ib_0301$alpha[15:25])[[1]], digits = 3),
        nsmall = 3
    ), 11),
    rep(format(round(Gini_RSV(alpha_ib_0301$alpha[26:42])[[1]], digits = 3),
        nsmall = 3
    ), 17),
    rep(format(round(Gini_RSV(alpha_ib_0301$alpha[43:47])[[1]], digits = 3),
        nsmall = 3
    ), 5),
    rep(format(round(Gini_RSV(alpha_ib_0301$alpha[48:53])[[1]], digits = 3),
        nsmall = 3
    ), 6),
    rep(format(round(Gini_RSV(alpha_ib_0301$alpha[54:63])[[1]], digits = 3),
        nsmall = 3
    ), 10),
    rep(format(round(Gini_RSV(alpha_ib_0613$alpha[1:9])[[1]], digits = 3),
        nsmall = 3
    ), 9),
    rep(format(round(Gini_RSV(alpha_ib_0613$alpha[10:14])[[1]], digits = 3),
        nsmall = 3
    ), 5),
    rep(format(round(Gini_RSV(alpha_ib_0613$alpha[15:25])[[1]], digits = 3),
        nsmall = 3
    ), 11),
    rep(format(round(Gini_RSV(alpha_ib_0613$alpha[26:42])[[1]], digits = 3),
        nsmall = 3
    ), 17),
    rep(format(round(Gini_RSV(alpha_ib_0613$alpha[43:47])[[1]], digits = 3),
        nsmall = 3
    ), 5),
    rep(format(round(Gini_RSV(alpha_ib_0613$alpha[48:53])[[1]], digits = 3),
        nsmall = 3
    ), 6),
    rep(format(round(Gini_RSV(alpha_ib_0613$alpha[54:63])[[1]], digits = 3),
        nsmall = 3
    ), 10),
    rep("NA", 24)
)
data$attribute <- c(
    rep("Age/Body", 9),
    rep("Crime/Violence/Tragedy~victim", 5),
    rep("Culture/Origin/Religion", 11),
    rep("Gender/Gender~identity/Sexuality", 17),
    rep("Mental~or~physical~disability/Disorder", 5),
    rep("Race/Ethnicity", 6),
    rep("Socio-economic/Political/Lifestyle", 10),
    rep("Age/Body", 9),
    rep("Crime/Violence/Tragedy~victim", 5),
    rep("Culture/Origin/Religion", 11),
    rep("Gender/Gender~identity/Sexuality", 17),
    rep("Mental~or~physical~disability/Disorder", 5),
    rep("Race/Ethnicity", 6),
    rep("Socio-economic/Political/Lifestyle", 10),
    rep("Gender/Gender~identity/Sexuality", 4),
    rep("Socio-economic/Political/Lifestyle", 12),
    rep("Race/Ethnicity", 8)
)
data$grp <- c(rep(1, 150))
# Dot plots. via
# https://uc-r.github.io/cleveland-dot-plots
g1 <- ggplot(data, aes(alpha, grp)) +
    geom_line(aes(group = grp), size = 0.05) +
    geom_point(size = 0.1) +
    # Label max value dots. via
    # https://stackoverflow.com/a/48351513
    geom_text(
        data = . %>% group_by(measure, attribute, model) %>%
            filter(alpha == max(alpha)),
        # Wrap and truncate text. via
        # https://stackoverflow.com/a/73357493
        aes(label = str_wrap(persona, width = 13)),
        hjust = 0,
        family = "Arial",
        size = 7 / .pt,
        nudge_x = 0.02,
        lineheight = 1
    ) +
    scale_x_continuous(
        breaks = c(-.5, -.25, 0, .25, .5, .75, 1),
        limits = c(-.5, 1)
    ) +
    xlab("Krippendorff's alpha coefficient") +
    facet_nested(
        rows = vars(measure, attribute, model),
        switch = "y",
        labeller = label_parsed,
        strip = strip_nested(size = "variable")
    ) +
    theme(
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks = element_line(size = 0.1),
        axis.text = element_text(
            family = "Arial",
            size = 7
        ),
        strip.text.y.left = element_text(
            angle = 0,
            family = "Arial",
            size = 7
        ),
        strip.background = element_rect(
            colour = "white",
            fill = "#ECECEC"
        ),
        panel.grid = element_line(
            color = "black",
            size = 0.1,
            linetype = 2
        ),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.spacing.y = unit(0, "lines"),
        panel.background = element_rect(
            color = "black",
            fill = "white",
            linewidth = 0.1
        ),
        text = element_text(
            family = "Arial",
            size = 7
        )
    )
data$alpha <- NULL
data$persona <- NULL
data <- data[!duplicated(data), ]
g2 <- ggplot(data, aes(grp, bias)) +
    geom_text(aes(label = labs),
        hjust = -0.1,
        family = "Arial",
        size = 7 / .pt
    ) +
    coord_flip() +
    ylab("Bias") +
    scale_y_continuous(
        breaks = c(0.00, 1.00),
        limits = c(0, 1)
    ) +
    geom_bar(stat = "identity", fill = "black") +
    facet_nested(
        rows = vars(measure, attribute, model),
        labeller = label_parsed
    ) +
    theme(
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks = element_line(size = 0.1),
        axis.text = element_text(
            family = "Arial",
            size = 7
        ),
        strip.text = element_blank(),
        strip.background = element_blank(),
        panel.grid = element_line(
            color = "black",
            size = 0.1,
            linetype = 2
        ),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.spacing.y = unit(0, "lines"),
        panel.background = element_rect(
            color = "black",
            fill = "white",
            linewidth = 0.1
        ),
        text = element_text(
            family = "Arial",
            size = 7
        )
    )
g1 + g2 + plot_layout(widths = c(3, 1))
ggsave(
    "/content/drive/MyDrive/datasets/cleaning/SBIC.eps",
    height = 20,
    width = 18,
    units = "cm"
)

# Bias score computation using finer-grained categories of personas

In [None]:
%%R
# Religion
print('Religion')
print(round(Gini_RSV(alpha_ib_0301$alpha[15:20])[[1]], digits = 3))
print(round(Gini_RSV(alpha_ib_0613$alpha[15:20])[[1]], digits = 3))

# Origin
print('Origin')
print(round(Gini_RSV(alpha_ib_0301$alpha[23:25])[[1]], digits = 3))
print(round(Gini_RSV(alpha_ib_0613$alpha[23:25])[[1]], digits = 3))

# Age
print('Age')
print(round(Gini_RSV(alpha_ib_0301$alpha[1:2])[[1]], digits = 3))
print(round(Gini_RSV(alpha_ib_0613$alpha[1:2])[[1]], digits = 3))

# Weight
print('Weight')
print(round(Gini_RSV(alpha_ib_0301$alpha[3:4])[[1]], digits = 3))
print(round(Gini_RSV(alpha_ib_0613$alpha[3:4])[[1]], digits = 3))

# Weight
print('Height')
print(round(Gini_RSV(alpha_ib_0301$alpha[5:6])[[1]], digits = 3))
print(round(Gini_RSV(alpha_ib_0613$alpha[5:6])[[1]], digits = 3))

# Attractiveness
print('Attractiveness')
print(round(Gini_RSV(alpha_ib_0301$alpha[7:8])[[1]], digits = 3))
print(round(Gini_RSV(alpha_ib_0613$alpha[7:8])[[1]], digits = 3))

# Socio-economic
print('Socio-economic')
print(round(Gini_RSV(alpha_ib_0301$alpha[54:56])[[1]], digits = 3))
print(round(Gini_RSV(alpha_ib_0613$alpha[54:56])[[1]], digits = 3))

# Political
print('Political')
print(round(Gini_RSV(alpha_ib_0301$alpha[57:62])[[1]], digits = 3))
print(round(Gini_RSV(alpha_ib_0613$alpha[57:62])[[1]], digits = 3))

# Double check Krippendorff's alpha coefficients

In [None]:
def double_check_alpha(df):
    for i in df.columns:
        print(i)
        print(np.round(krippendorff.alpha(
            df[['predicted_q1a_labels_base',
                i]].T.to_numpy().tolist(),
            level_of_measurement='nominal'), 3))


double_check_alpha(SBIC_0301.iloc[:, 15:])
print()
double_check_alpha(SBIC_0613.iloc[:, 15:])