# Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Import libraries

In [None]:
import numpy as np
import pandas as pd
# Prevent AttributeError. via
# https://stackoverflow.com/a/76404841
pd.DataFrame.iteritems = pd.DataFrame.items
import statistics

In [None]:
%load_ext rpy2.ipython

In [None]:
%%R
install.packages("dplyr")
install.packages("ggh4x")
devtools::install_github("hadley/ggplot2")
devtools::install_github("cran/GiniWegNeg")
devtools::install_github("kgwet/irrCAC")
install.packages("patchwork")
install.packages("showtext")
install.packages("stringr")
library(dplyr)
library(ggh4x)
library(ggplot2)
library(GiniWegNeg)
library(irrCAC)
library(patchwork)
library(showtext)
# Need to download Arial.ttf and upload it to your Google Drive prior.
font_add("Arial", regular = "/content/drive/MyDrive/Arial.ttf")
library(stringr)

# Import datasets

## Dataset with annotations and annotator information

In [None]:
raw_data = pd.read_csv('/content/drive/MyDrive/datasets/raw_data.csv')
raw_data = raw_data.reset_index(drop=True)

## GPT annotations

In [None]:
POPQUORN_0301 = pd.read_csv(
    '/content/drive/MyDrive/datasets/cleaning/'
    'POPQUORN_predictions_cleaned_gpt-3.5-turbo-0301.tsv',
    sep='\t')
POPQUORN_0301 = POPQUORN_0301.reset_index(drop=True)
POPQUORN_0613 = pd.read_csv(
    '/content/drive/MyDrive/datasets/cleaning/'
    'POPQUORN_predictions_cleaned_gpt-3.5-turbo-0613.tsv',
    sep='\t')
POPQUORN_0613 = POPQUORN_0613.reset_index(drop=True)

# Aggregate actual annotations by demographic group

In [None]:
# Pandas Groupby and mode. via
# https://stackoverflow.com/a/54304691 and
# https://stackoverflow.com/a/10374456
POPQUORN_age = raw_data.groupby(['text', 'age'])['offensiveness'].agg(
    lambda x: int(np.round(statistics.mean(x)))).to_frame(
    name='offensiveness_mean').reset_index()
POPQUORN_education = raw_data.groupby(['text', 'education'])[
    'offensiveness'].agg(lambda x: int(np.round(statistics.mean(x)))).to_frame(
    name='offensiveness_mean').reset_index()
POPQUORN_gender = raw_data.groupby(['text', 'gender'])['offensiveness'].agg(
    lambda x: int(np.round(statistics.mean(x)))).to_frame(
    name='offensiveness_mean').reset_index()
POPQUORN_occupation = raw_data.groupby(['text', 'occupation'])[
    'offensiveness'].agg(lambda x: int(np.round(statistics.mean(x)))).to_frame(
    name='offensiveness_mean').reset_index()
POPQUORN_race = raw_data.groupby(['text', 'race'])['offensiveness'].agg(
    lambda x: int(np.round(statistics.mean(x)))).to_frame(
    name='offensiveness_mean').reset_index()

# Define a function to compute and interpret Krippendorff's alpha coefficients between personas and actual annotators

In [None]:
%%R
compute_alpha_ia <- function(df_gold, df_pred, demog_var, demog_spec, persona,
                             model, measure) {
    df_gold <- df_gold[df_gold[demog_var] == demog_spec, ]
    df <- merge(df_gold, df_pred[c("text", persona)], by = "text")
    df <- df[c("offensiveness_mean", persona)]
    print(nrow(df))
    df <- as.matrix(sapply(df, as.numeric))
    # Retrieve and interpret Krippendorff's alpha values. via
    # https://cran.r-project.org/web/packages/irrCAC/vignettes/
    # benchmarking.html
    a <- krippen.alpha.raw(df,
        weights = "linear",
        categ.labels = c(1, 2, 3, 4, 5)
    )
    print(a$est)
    print(a$categories)
    print(landis.koch.bf(a$est$coeff.val, a$est$coeff.se))
    res <- data.frame(
        alpha = c(a$est$coeff.val),
        persona = c(persona),
        model = c(model),
        measure = c(measure)
    )
    return(res)
}

# Call the `compute_alpha_ia` function to compute and interpret Krippendorff's alpha coefficients between personas and actual annotators

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_18_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    "18-24",
    "predicted_scores_a_person_(aged_18-24)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_18_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_18_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    "18-24",
    "predicted_scores_a_person_(aged_18-24)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_18_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_25_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    "25-29",
    "predicted_scores_a_person_(aged_25-29)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_25_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_25_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    "25-29",
    "predicted_scores_a_person_(aged_25-29)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_25_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_30_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    "30-34",
    "predicted_scores_a_person_(aged_30-34)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_30_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_30_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    "30-34",
    "predicted_scores_a_person_(aged_30-34)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_30_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_35_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    "35-39",
    "predicted_scores_a_person_(aged_35-39)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_35_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_35_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    "35-39",
    "predicted_scores_a_person_(aged_35-39)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_35_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_40_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    "40-44",
    "predicted_scores_a_person_(aged_40-44)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_40_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_40_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    "40-44",
    "predicted_scores_a_person_(aged_40-44)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_40_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_45_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    "45-49",
    "predicted_scores_a_person_(aged_45-49)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_45_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_45_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    "45-49",
    "predicted_scores_a_person_(aged_45-49)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_45_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_50_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    "50-54",
    "predicted_scores_a_person_(aged_50-54)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_50_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_50_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    "50-54",
    "predicted_scores_a_person_(aged_50-54)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_50_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_54_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    "54-59",
    "predicted_scores_a_person_(aged_54-59)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_54_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_54_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    "54-59",
    "predicted_scores_a_person_(aged_54-59)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_54_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_60_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    "60-64",
    "predicted_scores_a_person_(aged_60-64)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_60_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_60_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    "60-64",
    "predicted_scores_a_person_(aged_60-64)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_60_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0301
alpha_ia_65_0301 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0301,
    "age",
    ">65",
    "predicted_scores_a_person_(aged_>65)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_65_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_age -i POPQUORN_0613
alpha_ia_65_0613 <- compute_alpha_ia(
    POPQUORN_age,
    POPQUORN_0613,
    "age",
    ">65",
    "predicted_scores_a_person_(aged_>65)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_65_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_education -i POPQUORN_0301
alpha_ia_lesshigh_0301 <- compute_alpha_ia(
    POPQUORN_education,
    POPQUORN_0301,
    "education",
    "Less than a high school diploma",
    "predicted_scores_a_person_(whose_education_level_is_less_than_a_high_school_diploma)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_lesshigh_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_education -i POPQUORN_0613
alpha_ia_lesshigh_0613 <- compute_alpha_ia(
    POPQUORN_education,
    POPQUORN_0613,
    "education",
    "Less than a high school diploma",
    "predicted_scores_a_person_(whose_education_level_is_less_than_a_high_school_diploma)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_lesshigh_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_education -i POPQUORN_0301
alpha_ia_high_0301 <- compute_alpha_ia(
    POPQUORN_education,
    POPQUORN_0301,
    "education",
    "High school diploma or equivalent",
    "predicted_scores_a_person_(whose_education_level_is_a_high_school_diploma_or_equivalent)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_high_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_education -i POPQUORN_0613
alpha_ia_high_0613 <- compute_alpha_ia(
    POPQUORN_education,
    POPQUORN_0613,
    "education",
    "High school diploma or equivalent",
    "predicted_scores_a_person_(whose_education_level_is_a_high_school_diploma_or_equivalent)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_high_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_education -i POPQUORN_0301
alpha_ia_college_0301 <- compute_alpha_ia(
    POPQUORN_education,
    POPQUORN_0301,
    "education",
    "College degree",
    "predicted_scores_a_person_(whose_education_level_is_a_college_degree)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_college_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_education -i POPQUORN_0613
alpha_ia_college_0613 <- compute_alpha_ia(
    POPQUORN_education,
    POPQUORN_0613,
    "education",
    "College degree",
    "predicted_scores_a_person_(whose_education_level_is_a_college_degree)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_college_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_education -i POPQUORN_0301
alpha_ia_grad_0301 <- compute_alpha_ia(
    POPQUORN_education,
    POPQUORN_0301,
    "education",
    "Graduate degree",
    "predicted_scores_a_person_(whose_education_level_is_a_graduate_degree)_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_grad_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_education -i POPQUORN_0613
alpha_ia_grad_0613 <- compute_alpha_ia(
    POPQUORN_education,
    POPQUORN_0613,
    "education",
    "Graduate degree",
    "predicted_scores_a_person_(whose_education_level_is_a_graduate_degree)_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_grad_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_gender -i POPQUORN_0301
alpha_ia_man_0301 <- compute_alpha_ia(
    POPQUORN_gender,
    POPQUORN_0301,
    "gender",
    "Man",
    "predicted_scores_a_man_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_man_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_gender -i POPQUORN_0613
alpha_ia_man_0613 <- compute_alpha_ia(
    POPQUORN_gender,
    POPQUORN_0613,
    "gender",
    "Man",
    "predicted_scores_a_man_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_man_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_gender -i POPQUORN_0301
alpha_ia_woman_0301 <- compute_alpha_ia(
    POPQUORN_gender,
    POPQUORN_0301,
    "gender",
    "Woman",
    "predicted_scores_a_woman_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_woman_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_gender -i POPQUORN_0613
alpha_ia_woman_0613 <- compute_alpha_ia(
    POPQUORN_gender,
    POPQUORN_0613,
    "gender",
    "Woman",
    "predicted_scores_a_woman_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_woman_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_gender -i POPQUORN_0301
alpha_ia_nonbinary_0301 <- compute_alpha_ia(
    POPQUORN_gender,
    POPQUORN_0301,
    "gender",
    "Non-binary",
    "predicted_scores_a_non-binary_person_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_nonbinary_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_gender -i POPQUORN_0613
alpha_ia_nonbinary_0613 <- compute_alpha_ia(
    POPQUORN_gender,
    POPQUORN_0613,
    "gender",
    "Non-binary",
    "predicted_scores_a_non-binary_person_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_nonbinary_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0301
alpha_ia_employed_0301 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0301,
    "occupation",
    "Employed",
    "predicted_scores_an_employed_person_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_employed_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0613
alpha_ia_employed_0613 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0613,
    "occupation",
    "Employed",
    "predicted_scores_an_employed_person_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_employed_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0301
alpha_ia_unemployed_0301 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0301,
    "occupation",
    "Unemployed",
    "predicted_scores_an_unemployed_person_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_unemployed_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0613
alpha_ia_unemployed_0613 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0613,
    "occupation",
    "Unemployed",
    "predicted_scores_an_unemployed_person_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_unemployed_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0301
alpha_ia_selfemp_0301 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0301,
    "occupation",
    "Self-employed",
    "predicted_scores_a_self-employed_person_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_selfemp_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0613
alpha_ia_selfemp_0613 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0613,
    "occupation",
    "Self-employed",
    "predicted_scores_a_self-employed_person_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_selfemp_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0301
alpha_ia_retired_0301 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0301,
    "occupation",
    "Retired",
    "predicted_scores_a_retiree_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_retired_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0613
alpha_ia_retired_0613 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0613,
    "occupation",
    "Retired",
    "predicted_scores_a_retiree_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_retired_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0301
alpha_ia_student_0301 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0301,
    "occupation",
    "Student",
    "predicted_scores_a_student_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_student_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0613
alpha_ia_student_0613 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0613,
    "occupation",
    "Student",
    "predicted_scores_a_student_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_student_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0301
alpha_ia_homemaker_0301 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0301,
    "occupation",
    "Homemaker",
    "predicted_scores_a_homemaker_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_homemaker_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_occupation -i POPQUORN_0613
alpha_ia_homemaker_0613 <- compute_alpha_ia(
    POPQUORN_occupation,
    POPQUORN_0613,
    "occupation",
    "Homemaker",
    "predicted_scores_a_homemaker_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_homemaker_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_race -i POPQUORN_0301
alpha_ia_white_0301 <- compute_alpha_ia(
    POPQUORN_race,
    POPQUORN_0301,
    "race",
    "White",
    "predicted_scores_a_White_person_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_white_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_race -i POPQUORN_0613
alpha_ia_white_0613 <- compute_alpha_ia(
    POPQUORN_race,
    POPQUORN_0613,
    "race",
    "White",
    "predicted_scores_a_White_person_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_white_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_race -i POPQUORN_0301
alpha_ia_black_0301 <- compute_alpha_ia(
    POPQUORN_race,
    POPQUORN_0301,
    "race",
    "Black or African American",
    "predicted_scores_a_Black_or_African_American_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_black_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_race -i POPQUORN_0613
alpha_ia_black_0613 <- compute_alpha_ia(
    POPQUORN_race,
    POPQUORN_0613,
    "race",
    "Black or African American",
    "predicted_scores_a_Black_or_African_American_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_black_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_race -i POPQUORN_0301
alpha_ia_asian_0301 <- compute_alpha_ia(
    POPQUORN_race,
    POPQUORN_0301,
    "race",
    "Asian",
    "predicted_scores_an_Asian_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_asian_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_race -i POPQUORN_0613
alpha_ia_asian_0613 <- compute_alpha_ia(
    POPQUORN_race,
    POPQUORN_0613,
    "race",
    "Asian",
    "predicted_scores_an_Asian_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_asian_0613)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_race -i POPQUORN_0301
alpha_ia_aian_0301 <- compute_alpha_ia(
    POPQUORN_race,
    POPQUORN_0301,
    "race",
    "Native American",
    "predicted_scores_an_American_Indian_or_Alaska_Native_in_the_United_States",
    "gpt-3.5-turbo-\"0301\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_aian_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_race -i POPQUORN_0613
alpha_ia_aian_0613 <- compute_alpha_ia(
    POPQUORN_race,
    POPQUORN_0613,
    "race",
    "Native American",
    "predicted_scores_an_American_Indian_or_Alaska_Native_in_the_United_States",
    "gpt-3.5-turbo-\"0613\"",
    "\"alpha\"[ia]"
)
print(alpha_ia_aian_0613)

# Define a function to compute and interpret Krippendorff"s alpha coefficients between the personas and the no-persona defaults

In [None]:
%%R
compute_alpha_ib <- function(df, reference, model, measure) {
    alpha_list <- c()
    for (i in seq_along(colnames(df))) {
        print(colnames(df)[i])
        # Retrieve and interpret Krippendorff's alpha values. via
        # https://cran.r-project.org/web/packages/irrCAC/vignettes/
        # benchmarking.html
        df_temp <- df[, c(reference, colnames(df)[i])]
        df_temp <- as.matrix(sapply(df_temp, as.numeric))
        a <- krippen.alpha.raw(df_temp,
            weights = "linear",
            c(1, 2, 3, 4, 5)
        )
        print(a$est)
        print(a$categories)
        print(landis.koch.bf(a$est$coeff.val, a$est$coeff.se))
        if (i != 16) {
            alpha_list <- c(alpha_list, a$est$coeff.val)
        }
    }
    res <- data.frame(
        alpha = alpha_list,
        persona = colnames(df[, -16]),
        model = rep(model, length(alpha)),
        measure = rep(measure, length(alpha))
    )
    return(res)
}

# Call the `compute_alpha_ib` function to compute and interpret Krippendorff's alpha coefficients between the personas and the no-persona defaults

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_0301
alpha_ib_0301 <- compute_alpha_ib(POPQUORN_0301[, 2:31], "predicted_scores_base", "gpt-3.5-turbo-\"0301\"", "\"alpha\"[ib]")
print(alpha_ib_0301)

In [None]:
# Import data between languages. via
# https://www.askpython.com/python/examples/
# use-r-and-python-in-the-same-notebook
%%R -i POPQUORN_0613
alpha_ib_0613 <- compute_alpha_ib(POPQUORN_0613[, 2:31], "predicted_scores_base", "gpt-3.5-turbo-\"0613\"", "\"alpha\"[ib]")
print(alpha_ib_0613)

# Plot Krippendorff's alpha coefficients and Bias

In [None]:
%%R
showtext_auto()
data <- rbind(
    alpha_ib_0301,
    alpha_ib_0613,
    alpha_ia_18_0301,
    alpha_ia_18_0613,
    alpha_ia_25_0301,
    alpha_ia_25_0613,
    alpha_ia_30_0301,
    alpha_ia_30_0613,
    alpha_ia_35_0301,
    alpha_ia_35_0613,
    alpha_ia_40_0301,
    alpha_ia_40_0613,
    alpha_ia_45_0301,
    alpha_ia_45_0613,
    alpha_ia_50_0301,
    alpha_ia_50_0613,
    alpha_ia_54_0301,
    alpha_ia_54_0613,
    alpha_ia_60_0301,
    alpha_ia_60_0613,
    alpha_ia_65_0301,
    alpha_ia_65_0613,
    alpha_ia_lesshigh_0301,
    alpha_ia_lesshigh_0613,
    alpha_ia_high_0301,
    alpha_ia_high_0613,
    alpha_ia_college_0301,
    alpha_ia_college_0613,
    alpha_ia_grad_0301,
    alpha_ia_grad_0613,
    alpha_ia_man_0301,
    alpha_ia_man_0613,
    alpha_ia_woman_0301,
    alpha_ia_woman_0613,
    alpha_ia_nonbinary_0301,
    alpha_ia_nonbinary_0613,
    alpha_ia_employed_0301,
    alpha_ia_employed_0613,
    alpha_ia_unemployed_0301,
    alpha_ia_unemployed_0613,
    alpha_ia_selfemp_0301,
    alpha_ia_selfemp_0613,
    alpha_ia_retired_0301,
    alpha_ia_retired_0613,
    alpha_ia_student_0301,
    alpha_ia_student_0613,
    alpha_ia_homemaker_0301,
    alpha_ia_homemaker_0613,
    alpha_ia_white_0301,
    alpha_ia_white_0613,
    alpha_ia_black_0301,
    alpha_ia_black_0613,
    alpha_ia_asian_0301,
    alpha_ia_asian_0613,
    alpha_ia_aian_0301,
    alpha_ia_aian_0613
)
# Replace spaces with underscores. via
# https://stackoverflow.com/a/53107084
data$persona <- gsub("_", " ", data$persona)
# Remove first few words. via
# https://stackoverflow.com/a/64889117
data$persona <- sub("^\\w+\\s\\w+\\s\\w+\\s", "", data$persona)
# Remove everything after specified substring. via
# https://stackoverflow.com/a/53503384
data$persona <- sub("in.+$", "", data$persona)
data$persona <- Map(paste, data$persona, "...")
# Remove substring. via
# https://stackoverflow.com/a/74536951
data$persona <- sub("person (whose education level is", "",
                    data$persona,
                    fixed = TRUE)
# Compute bias
data$bias <- c(
    rep(Gini_RSV(alpha_ib_0301$alpha[1:11])[[1]], 11),
    rep(Gini_RSV(alpha_ib_0301$alpha[12:15])[[1]], 4),
    rep(Gini_RSV(alpha_ib_0301$alpha[16:18])[[1]], 3),
    rep(Gini_RSV(alpha_ib_0301$alpha[19:24])[[1]], 6),
    rep(Gini_RSV(alpha_ib_0301$alpha[25:29])[[1]], 5),
    rep(Gini_RSV(alpha_ib_0613$alpha[1:11])[[1]], 11),
    rep(Gini_RSV(alpha_ib_0613$alpha[12:15])[[1]], 4),
    rep(Gini_RSV(alpha_ib_0613$alpha[16:18])[[1]], 3),
    rep(Gini_RSV(alpha_ib_0613$alpha[19:24])[[1]], 6),
    rep(Gini_RSV(alpha_ib_0613$alpha[25:29])[[1]], 5),
    rep(0, 54)
)
data$labs <- c(
    rep(round(Gini_RSV(alpha_ib_0301$alpha[1:11])[[1]], digits = 3), 11),
    rep(round(Gini_RSV(alpha_ib_0301$alpha[12:15])[[1]], digits = 3), 4),
    rep(round(Gini_RSV(alpha_ib_0301$alpha[16:18])[[1]], digits = 3), 3),
    rep(round(Gini_RSV(alpha_ib_0301$alpha[19:24])[[1]], digits = 3), 6),
    rep(round(Gini_RSV(alpha_ib_0301$alpha[25:29])[[1]], digits = 3), 5),
    rep(round(Gini_RSV(alpha_ib_0613$alpha[1:11])[[1]], digits = 3), 11),
    rep(round(Gini_RSV(alpha_ib_0613$alpha[12:15])[[1]], digits = 3), 4),
    rep(round(Gini_RSV(alpha_ib_0613$alpha[16:18])[[1]], digits = 3), 3),
    rep(round(Gini_RSV(alpha_ib_0613$alpha[19:24])[[1]], digits = 3), 6),
    rep(round(Gini_RSV(alpha_ib_0613$alpha[25:29])[[1]], digits = 3), 5),
    rep("NA", 54)
)
data$attribute <- c(
    rep("Age~range", 11),
    rep("Education~level", 4),
    rep("Gender~identity", 3),
    rep("Occupation", 6),
    rep("Race/Ethnicity", 5),
    rep("Age~range", 11),
    rep("Education~level", 4),
    rep("Gender~identity", 3),
    rep("Occupation", 6),
    rep("Race/Ethnicity", 5),
    rep("Age~range", 20),
    rep("Education~level", 8),
    rep("Gender~identity", 6),
    rep("Occupation", 12),
    rep("Race/Ethnicity", 8)
)
data$grp <- c(rep(1, 112))
# Dot plots. via
# https://uc-r.github.io/cleveland-dot-plots
g1 <- ggplot(
    data,
    aes(alpha, grp)
) +
    geom_line(aes(group = grp),
        size = 0.05
    ) +
    geom_point(size = 0.1) +
    # Label max value dots. via
    # https://stackoverflow.com/a/48351513
    geom_text(
        data = . %>% group_by(measure, attribute, model) %>%
            filter(alpha == max(alpha)),
        # Wrap and truncate text. via
        # https://stackoverflow.com/a/73357493
        aes(label = str_wrap(persona, width = 15)),
        hjust = 0,
        family = "Arial",
        size = 7 / .pt,
        nudge_x = 0.01,
        lineheight = 1
    ) +
    scale_x_continuous(
        breaks = c(-.5, -.25, 0, .25, .5, .75, 1),
        limits = c(-.5, 1)
    ) +
    xlab("Krippendorff's alpha coefficient") +
    facet_nested(
        rows = vars(measure, attribute, model),
        switch = "y",
        labeller = label_parsed,
        strip = strip_nested(size = "variable")
    ) +
    theme(
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks = element_line(size = 0.1),
        axis.text = element_text(
            family = "Arial",
            size = 7
        ),
        strip.text.y.left = element_text(
            angle = 0,
            family = "Arial",
            size = 7
        ),
        strip.background = element_rect(
            colour = "white",
            fill = "#ECECEC"
        ),
        panel.grid = element_line(
            color = "black",
            size = 0.1,
            linetype = 2
        ),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.spacing.y = unit(0, "lines"),
        panel.background = element_rect(
            color = "black",
            fill = "white",
            linewidth = 0.1
        ),
        text = element_text(
            family = "Arial",
            size = 7
        )
    )
data$alpha <- NULL
data$persona <- NULL
data <- data[!duplicated(data), ]
g2 <- ggplot(
    data,
    aes(grp, bias)
) +
    geom_text(aes(label = labs),
        hjust = -0.1,
        family = "Arial",
        size = 7 / .pt
    ) +
    coord_flip() +
    ylab("Bias") +
    scale_y_continuous(breaks = c(0.00, 1.00), limits = c(0, 1)) +
    geom_bar(stat = "identity", fill = "black") +
    facet_nested(
        rows = vars(measure, attribute, model),
        labeller = label_parsed
    ) +
    theme(
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks = element_line(size = 0.1),
        axis.text = element_text(
            family = "Arial",
            size = 7
        ),
        strip.text = element_blank(),
        strip.background = element_blank(),
        panel.grid = element_line(
            color = "black",
            size = 0.1,
            linetype = 2
        ),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.spacing.y = unit(0, "lines"),
        panel.background = element_rect(
            color = "black",
            fill = "white",
            linewidth = 0.1
        ),
        text = element_text(
            family = "Arial",
            size = 7
        )
    )
g1 + g2 + plot_layout(widths = c(2.3, 1))
ggsave(
    "/content/drive/MyDrive/datasets/cleaning/POPQUORN.eps",
    height = 20,
    width = 18,
    units = "cm"
)