# Mount drive

In [None]:
from google.colab import drive
drive.mount("/content/drive")

# Import libraries

In [None]:
%load_ext rpy2.ipython

In [None]:
%%R
install.packages("remotes")
remotes::install_github("kgwet/irrCAC@90c0d2f5cf7a94f42cc144a53028ab4a95d0f877")
remotes::install_github("kgwet/pairedCAC@ca6431c59633291a459c8c9a7523c63fc9c43e22")
remotes::install_version("ggplot2", version="4.0.1")
remotes::install_version("patchwork", version="1.3.2")
remotes::install_version("tidyverse", version="2.0.0")
library(irrCAC)
library(pairedCAC)
library(ggplot2)
library(patchwork)
library(tidyverse)

# Import data

### EXIST-es

In [None]:
%%R
EXIST_man <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/man.csv", stringsAsFactors = FALSE)
EXIST_sexist_es_man_qwen3_30b_a3b_instruct_2507 <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_man_qwen3_30b_a3b_instruct_2507.csv", stringsAsFactors = FALSE)
EXIST_sexist_es_man_llama_v3p3_70b_instruct <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_man_llama_v3p3_70b_instruct.csv", stringsAsFactors = FALSE)

EXIST_woman <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/woman.csv", stringsAsFactors = FALSE)
EXIST_sexist_es_woman_qwen3_30b_a3b_instruct_2507 <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_woman_qwen3_30b_a3b_instruct_2507.csv", stringsAsFactors = FALSE)
EXIST_sexist_es_woman_llama_v3p3_70b_instruct <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_woman_llama_v3p3_70b_instruct.csv", stringsAsFactors = FALSE)

In [None]:
%%R
EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct.csv", stringsAsFactors = FALSE)
EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned.csv", stringsAsFactors = FALSE)
EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507 <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507.csv", stringsAsFactors = FALSE)
EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned.csv", stringsAsFactors = FALSE)
EXIST_sexist_es_gender_identity_personas_kimi_k2_instruct_0905 <- read.csv("/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_kimi_k2_instruct_0905.csv", stringsAsFactors = FALSE)

# Computation and interpretation of Krippendorff's alpha coefficients

In [None]:
%%R
compute_alpha <- function(df, reference) {
    for (i in seq_along(colnames(df))) {
        if (colnames(df)[i] != reference && grepl("response_text", colnames(df)[i])) {
            print(colnames(df)[i])
            # Retrieve and interpret Krippendorff's alpha values. via
            # https://cran.r-project.org/web/packages/irrCAC/vignettes/
            # benchmarking.html
            a <- krippen.alpha.raw(ratings = df[, c(reference, colnames(df)[i])], weights = "unweighted", categ.labels = c("No", "Yes"), conflev = .95)
            print(a$est)
            print(a$categories)
            print(landis.koch.bf(a$est$coeff.val, a$est$coeff.se))
        }
    }
}

### Computation of Krippendorff's alpha coefficients between the personas and the no-persona defaults

In [None]:
%%R
compute_alpha(EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct, "baseline_response_text")

In [None]:
%%R
compute_alpha(EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned, "baseline_response_text")

In [None]:
%%R
compute_alpha(EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507, "baseline_response_text")

In [None]:
%%R
compute_alpha(EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned, "baseline_response_text")

In [None]:
%%R
compute_alpha(EXIST_sexist_es_gender_identity_personas_kimi_k2_instruct_0905, "baseline_response_text")

### Computation of Krippendorff's alpha coefficients between the personas and human annotations belonging to the same sociodemographic

In [None]:
%%R
compute_alpha(EXIST_sexist_es_man_llama_v3p3_70b_instruct, "label_task1_unanimous")
print("----------------------------------------------------------------------")
compute_alpha(EXIST_sexist_es_man_qwen3_30b_a3b_instruct_2507, "label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_man %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct %>%
        select(tweet, contains("a_man")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_man %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned %>%
        select(tweet, contains("a_man")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_man %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507 %>%
        select(tweet, contains("a_man")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_man %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned %>%
        select(tweet, contains("a_man")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_man %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_kimi_k2_instruct_0905 %>%
        select(tweet, contains("a_man")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")

In [None]:
%%R
compute_alpha(EXIST_sexist_es_woman_llama_v3p3_70b_instruct, "label_task1_unanimous")
print("----------------------------------------------------------------------")
compute_alpha(EXIST_sexist_es_woman_qwen3_30b_a3b_instruct_2507, "label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_woman %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct %>%
        select(tweet, contains("a_woman")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_woman %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned %>%
        select(tweet, contains("a_woman")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_woman %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507 %>%
        select(tweet, contains("a_woman")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_woman %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned %>%
        select(tweet, contains("a_woman")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")
EXIST_woman %>%
    left_join(
        EXIST_sexist_es_gender_identity_personas_kimi_k2_instruct_0905 %>%
        select(tweet, contains("a_woman")),
        by = "tweet"
    ) %>%
    compute_alpha("label_task1_unanimous")
print("----------------------------------------------------------------------")

# Computation of RIS

In [None]:
%%R
compute_RIS <- function(df) {
    patterns <- c(
        "persona_instruction1" = "persona_instruction1_response_text",
        "persona_instruction2" = "persona_instruction2_response_text",
        "persona_instruction3" = "persona_instruction3_response_text"
    )
    for (grp in names(patterns)) {
        cols <- grep(patterns[grp], names(df), value = TRUE)
        p_values <- apply(combn(cols, 2), 2, function(pair) {
            ttest.alpha(
                df[, c("baseline_response_text", pair[1])],
                df[, c("baseline_response_text", pair[2])]
            )$test$p.value
        })
        print(grp)
        print(mean(p_values < 0.05))
    }
}

In [None]:
%%R
compute_RIS(EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct)
print("----------------------------------------------------------------------")
compute_RIS(EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned)
print("----------------------------------------------------------------------")
compute_RIS(EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507)
print("----------------------------------------------------------------------")
compute_RIS(EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned)
print("----------------------------------------------------------------------")
compute_RIS(EXIST_sexist_es_gender_identity_personas_kimi_k2_instruct_0905)
print("----------------------------------------------------------------------")

# Generation of plots

In [None]:
%%R
persona_map <- c(
    "a_man" = "a man",
    "a_woman" = "a woman",
    "a_transgender_man" = "a transgender man",
    "a_transgender_woman" = "a transgender woman",
    "a_non_binary_individual" = "a non-binary individual"
)

generate_plot <- function(df, outputfile) {
    metadata <- list(
        list(name = "persona_instruction1",
             pattern = "persona_instruction1_response_text",
             subplot_title = "(a) Instruction 1"),
        list(name = "persona_instruction2",
             pattern = "persona_instruction2_response_text",
             subplot_title = "(b) Instruction 2"),
        list(name = "persona_instruction3",
             pattern = "persona_instruction3_response_text",
             subplot_title = "(c) Instruction 3")
    )
    plot_list <- list()
    for (grp in metadata) {
        personas <- sort(grep(grp$pattern, names(df), value = TRUE))
        pairs <- combn(personas, 2, simplify = FALSE)
        ttest_results <- do.call(rbind, lapply(pairs, function(pair) {
            tst <- ttest.alpha(
                df[, c(pair[1], "baseline_response_text")],
                df[, c(pair[2], "baseline_response_text")],
                weights = "unweighted",
                conflev = 0.95
            )$test
            data.frame(persona1 = pair[1], persona2 = pair[2], coeff_diff = tst$coeff.diff, p_value = tst$p.value)
        }))
        ttest_results$persona1_label <- unname(persona_map[sub("_in_the_United_States", "", sub(paste0("_", grp$pattern), "", ttest_results$persona1, fixed = TRUE), fixed = TRUE)])
        ttest_results$persona2_label <- unname(persona_map[sub("_in_the_United_States", "", sub(paste0("_", grp$pattern), "", ttest_results$persona2, fixed = TRUE), fixed = TRUE)])
        ttest_results$persona1_factor <- factor(ttest_results$persona1_label, levels = sort(unique(c(ttest_results$persona1_label, ttest_results$persona2_label))))
        ttest_results$persona2_factor <- factor(ttest_results$persona2_label, levels = sort(unique(c(ttest_results$persona1_label, ttest_results$persona2_label))))
        ttest_results$legend_label <- ifelse(ttest_results$p_value < 0.05 & ttest_results$coeff_diff < 0, "Krippendorff's alpha between baseline and x-axis persona is significantly higher", ifelse(ttest_results$p_value < 0.05 & ttest_results$coeff_diff > 0, "Krippendorff's alpha between baseline and y-axis persona is significantly higher", "nonsignificant"))
        g <- ggplot(ttest_results, aes(persona1_factor, persona2_factor, fill = legend_label)) +
            geom_tile(color = "white", linewidth = 1, show.legend = TRUE) +
            scale_fill_manual(
                values = c(
                    "Krippendorff's alpha between baseline and x-axis persona is significantly higher" = "#840032",
                    "Krippendorff's alpha between baseline and y-axis persona is significantly higher" = "#e59500",
                    "nonsignificant" = "#002642"
                ),
                limits = c(
                    "Krippendorff's alpha between baseline and x-axis persona is significantly higher",
                    "Krippendorff's alpha between baseline and y-axis persona is significantly higher",
                    "nonsignificant"
                ),
                name = "Paired T-test Results",
                drop = FALSE
            ) +
            labs(
                title = grp$subplot_title,
                x = NULL,
                y = NULL
            ) +
            coord_fixed() +
            theme_minimal(base_family = "Helvetica", base_size = 42) +
            theme(
                axis.text.x = element_text(angle = 30, hjust = 1),
                plot.title = element_text(hjust = 0.5, margin = margin(b = 3, unit = "pt")),
                panel.grid.major = element_line(linewidth = 0.5, color = "grey80")
            )
            if (grp$name == "persona_instruction1") {
                g <- g + theme(
                    plot.margin = margin(t = 0, r = 25, b = 0, l = 0, unit = "pt")
                )
            } else if (grp$name == "persona_instruction3") {
                g <- g + theme(
                    axis.title.y = element_blank(),
                    axis.text.y = element_blank(),
                    plot.margin = margin(t = 0, r = 0, b = 0, l = 25, unit = "pt")
                )
            } else {
                g <- g + theme(
                    axis.title.y = element_blank(),
                    axis.text.y = element_blank(),
                    plot.margin = margin(t = 0, r = 25, b = 0, l = 25, unit = "pt")
                )
            }
        plot_list[[grp$name]] <- g
    }
    # Wrap plots. via
    # https://stackoverflow.com/questions/66688668/automatically-assemble-plots-for-patchwork-from-a-list-of-ggplots#comment117887198_66688668
    aggregated_plot <- wrap_plots(plot_list, nrow = 1, guides = "collect") & theme(
        text = element_text(family = "Helvetica", size = 42),
        legend.title = element_text(margin = margin(b = 3, unit = "pt")),
        legend.position = "bottom",
        legend.justification = 0,
        legend.direction = "vertical"
    )
    ggsave(
        outputfile,
        plot = aggregated_plot,
        device = cairo_ps,
        bg = "white",
        width = 28,
        height = 14,
        units = "in"
    )
}

In [None]:
%%R
generate_plot(EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct, "/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct.eps")
generate_plot(EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned, "/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned.eps")
generate_plot(EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507, "/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507.eps")
generate_plot(EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned, "/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned.eps")
generate_plot(EXIST_sexist_es_gender_identity_personas_kimi_k2_instruct_0905, "/content/drive/MyDrive/MaskedModX/EXIST/EXIST_sexist_es_gender_identity_personas_kimi_k2_instruct_0905.eps")

# Comparison between specific and composite fine-tuning (paired t-tests)

In [None]:
%%R
specific_vs_composite <- function(df1, df2) {
    cols <- intersect(names(df1), names(df2)) %>% str_subset("response_text")
    joined_df <- inner_join(df1, df2, by = "tweet", suffix = c(".1", ".2"))
    for (col in cols) {
        print(col)
        tst <- ttest.alpha(
            joined_df[c(paste0(col, ".1"), "label_task1_unanimous")],
            joined_df[c(paste0(col, ".2"), "label_task1_unanimous")],
            weights = "unweighted",
            conflev = 0.95
        )$test
        print(tst)
    }
}

In [None]:
%%R
specific_vs_composite(EXIST_sexist_es_man_llama_v3p3_70b_instruct, EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned)
specific_vs_composite(EXIST_sexist_es_man_qwen3_30b_a3b_instruct_2507, EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned)
print("----------------------------------------------------------------------")
specific_vs_composite(EXIST_sexist_es_woman_llama_v3p3_70b_instruct, EXIST_sexist_es_gender_identity_personas_llama_v3p3_70b_instruct_finetuned)
specific_vs_composite(EXIST_sexist_es_woman_qwen3_30b_a3b_instruct_2507, EXIST_sexist_es_gender_identity_personas_qwen3_30b_a3b_instruct_2507_finetuned)
print("----------------------------------------------------------------------")