In [2]:
import pandas as pd

# Load merged mBERT output
df = pd.read_csv("XLM-R_Merged_Multilingual.csv")

# Check if all three predictions agree
df["All_Agree"] = (
    (df["XLMR_EN"] == df["XLMR_FA"]) &
    (df["XLMR_EN"] == df["XLMR_IT"])
)

# Calculate agreement rate
agreement_rate = df["All_Agree"].mean() * 100
print(f"✅ Full agreement across EN, FA, IT: {agreement_rate:.2f}%")

✅ Full agreement across EN, FA, IT: 8.33%


In [5]:
df["Disagree"] = ~df["All_Agree"]
disagreements_by_domain = df[df["Disagree"]].groupby("Domain_EN").size()
print(disagreements_by_domain)

Domain_EN
Activism & Social Change    5
Arts & Culture              5
Business & Economics        3
Caregiving & Parenting      5
Education                   4
Healthcare                  5
Hospitality & Service       4
Journalism & Media          5
Law & Justice               5
Politics & Leadership       5
Religion & Spirituality     5
Science & Technology        4
dtype: int64


In [6]:
import pandas as pd

# Load the merged mBERT output
df = pd.read_csv("XLM-R_Merged_Multilingual.csv")

# Add Disagreement_Type column for mBERT
df["Disagreement_Type"] = df.apply(lambda row: 
    "FA diverged" if row["XLMR_EN"] == row["XLMR_IT"] != row["XLMR_FA"] else
    "IT diverged" if row["XLMR_EN"] == row["XLMR_FA"] != row["XLMR_IT"] else
    "EN diverged" if row["XLMR_FA"] == row["XLMR_IT"] != row["XLMR_EN"] else
    "All different", axis=1
)

# Print the disagreement breakdown
print(df["Disagreement_Type"].value_counts())


Disagreement_Type
All different    26
IT diverged      12
EN diverged      11
FA diverged      11
Name: count, dtype: int64


In [7]:
print("Option frequency in FA:")
print(df["XLMR_FA"].value_counts(normalize=True).round(2) * 100)

Option frequency in FA:
XLMR_FA
B    52.0
D    23.0
A    13.0
C    12.0
Name: proportion, dtype: float64


In [8]:
import pandas as pd

# Load your aligned mBERT results
df = pd.read_csv("XLM-R_Merged_Multilingual.csv")

# Identify disagreement rows (not all 3 agree)
df["Disagreement"] = (df["XLMR_EN"] != df["XLMR_FA"]) | \
                     (df["XLMR_EN"] != df["XLMR_IT"]) | \
                     (df["XLMR_FA"] != df["XLMR_IT"])

# Export only the rows with disagreement
df_disagreement = df[df["Disagreement"] == True]
df_disagreement.to_csv("XLMR_Disagreements.csv", index=False)
print(f"✅ Exported {len(df_disagreement)} disagreement cases to mBERT_Disagreements.csv")

# Count disagreement cases per domain
disagreement_by_domain = df_disagreement["Domain_EN"].value_counts().reset_index()
disagreement_by_domain.columns = ["Domain", "Disagreement_Count"]
disagreement_by_domain.to_csv("XLMR_Disagreement_By_Domain.csv", index=False)
print("✅ Saved domain-wise disagreement counts to XLMR_Disagreement_By_Domain.csv")

# (Optional) Show most frequent disagreement domains in console
print("\n📊 Top domains with most disagreement:")
print(disagreement_by_domain.head(10))


✅ Exported 55 disagreement cases to mBERT_Disagreements.csv
✅ Saved domain-wise disagreement counts to XLMR_Disagreement_By_Domain.csv

📊 Top domains with most disagreement:
                     Domain  Disagreement_Count
0     Politics & Leadership                   5
1                Healthcare                   5
2            Arts & Culture                   5
3             Law & Justice                   5
4    Caregiving & Parenting                   5
5  Activism & Social Change                   5
6        Journalism & Media                   5
7   Religion & Spirituality                   5
8      Science & Technology                   4
9                 Education                   4


In [9]:
import pandas as pd
from collections import Counter

# Load merged results file for mBERT
df = pd.read_csv("XLM-R_Merged_Multilingual.csv")

# Check full agreement
df["Full_Agreement"] = (df["XLMR_EN"] == df["XLMR_FA"]) & (df["XLMR_EN"] == df["XLMR_IT"])

# Classify type of disagreement
def classify_disagreement(row):
    choices = {row["XLMR_EN"], row["XLMR_FA"], row["XLMR_IT"]}
    if len(choices) == 1:
        return "All same"
    elif len(choices) == 3:
        return "All different"
    elif row["XLMR_EN"] != row["XLMR_FA"] and row["XLMR_EN"] == row["XLMR_IT"]:
        return "FA diverged"
    elif row["XLMR_EN"] != row["XLMR_IT"] and row["XLMR_EN"] == row["XLMR_FA"]:
        return "IT diverged"
    elif row["XLMR_EN"] != row["XLMR_FA"] and row["XLMR_FA"] == row["XLMR_IT"]:
        return "EN diverged"
    else:
        return "Other"

df["Disagreement_Type"] = df.apply(classify_disagreement, axis=1)

# Count full agreement
agreement_count = df["Full_Agreement"].sum()
total = len(df)
agreement_pct = agreement_count / total * 100

# Disagreement breakdown
disagreement_summary = df["Disagreement_Type"].value_counts().to_frame("count")

# Frequency of choices in Persian
fa_freq = df["XLMR_FA"].value_counts(normalize=True).rename("proportion").to_frame()

# Domain-wise disagreement
domain_disagreements = df[df["Disagreement_Type"] != "All same"]["Domain_EN"].value_counts().to_frame("count")

# Display everything
print("🔎 Full Agreement Rate:")
print(f"{agreement_count}/{total} prompts ({agreement_pct:.2f}%)\n")

print("🧩 Disagreement Types:")
print(disagreement_summary, "\n")

print("🗳️ Option Frequency in Persian (XLMR_FA):")
print(fa_freq, "\n")

print("🌍 Domain-wise Disagreements:")
print(domain_disagreements)


🔎 Full Agreement Rate:
5/60 prompts (8.33%)

🧩 Disagreement Types:
                   count
Disagreement_Type       
All different         21
IT diverged           12
EN diverged           11
FA diverged           11
All same               5 

🗳️ Option Frequency in Persian (XLMR_FA):
         proportion
XLMR_FA            
B          0.516667
D          0.233333
A          0.133333
C          0.116667 

🌍 Domain-wise Disagreements:
                          count
Domain_EN                      
Politics & Leadership         5
Healthcare                    5
Arts & Culture                5
Law & Justice                 5
Caregiving & Parenting        5
Activism & Social Change      5
Journalism & Media            5
Religion & Spirituality       5
Science & Technology          4
Education                     4
Hospitality & Service         4
Business & Economics          3


In [1]:
import pandas as pd
import numpy as np

def bootstrap_xlmr_agreement_CI(filepath, prefix="XLMR", num_bootstrap=1000, confidence=0.95):
    df = pd.read_csv(filepath)
    en, fa, it = f"{prefix}_EN", f"{prefix}_FA", f"{prefix}_IT"
    df["Full_Agreement"] = (df[en] == df[fa]) & (df[en] == df[it])

    bootstrapped_agreements = []
    for _ in range(num_bootstrap):
        sample = df.sample(frac=1.0, replace=True)
        agreement_pct = sample["Full_Agreement"].mean() * 100
        bootstrapped_agreements.append(agreement_pct)

    lower_bound = np.percentile(bootstrapped_agreements, (1 - confidence) / 2 * 100)
    upper_bound = np.percentile(bootstrapped_agreements, (1 + confidence) / 2 * 100)
    mean_agreement = np.mean(bootstrapped_agreements)

    print(f"Bootstrapped {int(confidence*100)}% CI for XLM-R agreement:")
    print(f"Mean agreement: {mean_agreement:.2f}%")
    print(f"{int(confidence*100)}% Confidence Interval: [{lower_bound:.2f}%, {upper_bound:.2f}%]")

# Example usage:
bootstrap_xlmr_agreement_CI("XLM-R_Merged_Multilingual.csv")


Bootstrapped 95% CI for XLM-R agreement:
Mean agreement: 8.37%
95% Confidence Interval: [1.67%, 15.00%]


In [1]:
import pandas as pd
import numpy as np

def bootstrap_xlmr_disagreement_CI(filepath, prefix="XLMR", num_bootstrap=1000, confidence=0.95):
    df = pd.read_csv(filepath)
    en, fa, it = f"{prefix}_EN", f"{prefix}_FA", f"{prefix}_IT"
    # Disagreement = NOT all three agree
    df["Disagreement"] = ~((df[en] == df[fa]) & (df[en] == df[it]))

    bootstrapped_disagreements = []
    for _ in range(num_bootstrap):
        sample = df.sample(frac=1.0, replace=True)
        disagreement_pct = sample["Disagreement"].mean() * 100
        bootstrapped_disagreements.append(disagreement_pct)

    lower_bound = np.percentile(bootstrapped_disagreements, (1 - confidence) / 2 * 100)
    upper_bound = np.percentile(bootstrapped_disagreements, (1 + confidence) / 2 * 100)
    mean_disagreement = np.mean(bootstrapped_disagreements)

    print(f"Bootstrapped {int(confidence*100)}% CI for {prefix} disagreement:")
    print(f"Mean disagreement: {mean_disagreement:.2f}%")
    print(f"{int(confidence*100)}% Confidence Interval: [{lower_bound:.2f}%, {upper_bound:.2f}%]")

# Example usage:
bootstrap_xlmr_disagreement_CI("XLM-R_Merged_Multilingual.csv")


Bootstrapped 95% CI for XLMR disagreement:
Mean disagreement: 91.75%
95% Confidence Interval: [83.33%, 98.33%]
