In [1]:
import pandas as pd

# Load the merged LLaMA 3.1 shuffled output
df = pd.read_csv("LLaMA3.1_Shuffled_Merged_Multilingual.csv")

# Check if all three predictions agree across EN, FA, IT
df["All_Agree"] = (
    (df["LLaMA3.1_EN"] == df["LLaMA3.1_FA"]) &
    (df["LLaMA3.1_EN"] == df["LLaMA3.1_IT"])
)

# Calculate agreement rate
agreement_rate = df["All_Agree"].mean() * 100
print(f"✅ Full agreement across EN, FA, IT (Shuffled): {agreement_rate:.2f}%")


✅ Full agreement across EN, FA, IT (Shuffled): 43.33%


In [2]:
df["Disagree"] = ~df["All_Agree"]
disagreements_by_domain = df[df["Disagree"]].groupby("Domain_EN").size()
print(disagreements_by_domain)


Domain_EN
Activism & Social Change    2
Arts & Culture              3
Business & Economics        3
Caregiving & Parenting      3
Education                   4
Healthcare                  1
Hospitality & Service       3
Journalism & Media          4
Law & Justice               4
Politics & Leadership       3
Religion & Spirituality     2
Science & Technology        2
dtype: int64


In [3]:
import pandas as pd

# Load the merged LLaMA 3.1 shuffled output
df = pd.read_csv("LLaMA3.1_Shuffled_Merged_Multilingual.csv")

# Add Disagreement_Type column
df["Disagreement_Type"] = df.apply(lambda row: 
    "FA diverged" if row["LLaMA3.1_EN"] == row["LLaMA3.1_IT"] != row["LLaMA3.1_FA"] else
    "IT diverged" if row["LLaMA3.1_EN"] == row["LLaMA3.1_FA"] != row["LLaMA3.1_IT"] else
    "EN diverged" if row["LLaMA3.1_FA"] == row["LLaMA3.1_IT"] != row["LLaMA3.1_EN"] else
    "All different", axis=1
)

# Print breakdown of disagreement types
print(df["Disagreement_Type"].value_counts())


Disagreement_Type
All different    32
IT diverged      15
FA diverged       7
EN diverged       6
Name: count, dtype: int64


In [5]:
print("Option frequency in FA:")
print(df["LLaMA3.1_FA"].value_counts(normalize=True).round(2) * 100)

Option frequency in FA:
LLaMA3.1_FA
B    38.0
C    33.0
A    18.0
D    10.0
Name: proportion, dtype: float64


In [6]:
import pandas as pd

# Load your aligned LLaMA 3.1 shuffled results
df = pd.read_csv("LLaMA3.1_Shuffled_Merged_Multilingual.csv")

# Identify disagreement rows (not all 3 agree)
df["Disagreement"] = (df["LLaMA3.1_EN"] != df["LLaMA3.1_FA"]) | \
                     (df["LLaMA3.1_EN"] != df["LLaMA3.1_IT"]) | \
                     (df["LLaMA3.1_FA"] != df["LLaMA3.1_IT"])

# Export only rows with disagreement
df_disagreement = df[df["Disagreement"] == True]
df_disagreement.to_csv("LLaMA3.1_Shuffled_Disagreements.csv", index=False)
print(f"✅ Exported {len(df_disagreement)} disagreement cases to LLaMA3.1_Shuffled_Disagreements.csv")

# Count disagreement cases per domain
disagreement_by_domain = df_disagreement["Domain_EN"].value_counts().reset_index()
disagreement_by_domain.columns = ["Domain", "Disagreement_Count"]
disagreement_by_domain.to_csv("LLaMA3.1_Shuffled_Disagreement_By_Domain.csv", index=False)
print("✅ Saved domain-wise disagreement counts to LLaMA3.1_Shuffled_Disagreement_By_Domain.csv")

# Show top domains with most disagreement
print("\n📊 Top domains with most disagreement:")
print(disagreement_by_domain.head(10))


✅ Exported 34 disagreement cases to LLaMA3.1_Shuffled_Disagreements.csv
✅ Saved domain-wise disagreement counts to LLaMA3.1_Shuffled_Disagreement_By_Domain.csv

📊 Top domains with most disagreement:
                     Domain  Disagreement_Count
0                 Education                   4
1             Law & Justice                   4
2        Journalism & Media                   4
3     Politics & Leadership                   3
4    Caregiving & Parenting                   3
5     Hospitality & Service                   3
6      Business & Economics                   3
7            Arts & Culture                   3
8  Activism & Social Change                   2
9      Science & Technology                   2


In [7]:
import pandas as pd
from collections import Counter

# Load merged results file for LLaMA 3.1 shuffled
df = pd.read_csv("LLaMA3.1_Shuffled_Merged_Multilingual.csv")

# Check full agreement
df["Full_Agreement"] = (
    (df["LLaMA3.1_EN"] == df["LLaMA3.1_FA"]) &
    (df["LLaMA3.1_EN"] == df["LLaMA3.1_IT"])
)

# Classify type of disagreement
def classify_disagreement(row):
    choices = {row["LLaMA3.1_EN"], row["LLaMA3.1_FA"], row["LLaMA3.1_IT"]}
    if len(choices) == 1:
        return "All same"
    elif len(choices) == 3:
        return "All different"
    elif row["LLaMA3.1_EN"] != row["LLaMA3.1_FA"] and row["LLaMA3.1_EN"] == row["LLaMA3.1_IT"]:
        return "FA diverged"
    elif row["LLaMA3.1_EN"] != row["LLaMA3.1_IT"] and row["LLaMA3.1_EN"] == row["LLaMA3.1_FA"]:
        return "IT diverged"
    elif row["LLaMA3.1_EN"] != row["LLaMA3.1_FA"] and row["LLaMA3.1_FA"] == row["LLaMA3.1_IT"]:
        return "EN diverged"
    else:
        return "Other"

df["Disagreement_Type"] = df.apply(classify_disagreement, axis=1)

# Count full agreement
agreement_count = df["Full_Agreement"].sum()
total = len(df)
agreement_pct = agreement_count / total * 100

# Disagreement breakdown
disagreement_summary = df["Disagreement_Type"].value_counts().to_frame("count")

# Frequency of choices in Persian
fa_freq = df["LLaMA3.1_FA"].value_counts(normalize=True).rename("proportion").to_frame()

# Domain-wise disagreement
domain_disagreements = df[df["Disagreement_Type"] != "All same"]["Domain_EN"].value_counts().to_frame("count")

# Display results
print("🔎 Full Agreement Rate:")
print(f"{agreement_count}/{total} prompts ({agreement_pct:.2f}%)\n")

print("🧩 Disagreement Types:")
print(disagreement_summary, "\n")

print("🗳️ Option Frequency in Persian (LLaMA3.1_FA):")
print(fa_freq, "\n")

print("🌍 Domain-wise Disagreements:")
print(domain_disagreements)


🔎 Full Agreement Rate:
26/60 prompts (43.33%)

🧩 Disagreement Types:
                   count
Disagreement_Type       
All same              26
IT diverged           15
FA diverged            7
EN diverged            6
All different          6 

🗳️ Option Frequency in Persian (LLaMA3.1_FA):
             proportion
LLaMA3.1_FA            
B              0.383333
C              0.333333
A              0.183333
D              0.100000 

🌍 Domain-wise Disagreements:
                          count
Domain_EN                      
Education                     4
Law & Justice                 4
Journalism & Media            4
Politics & Leadership         3
Caregiving & Parenting        3
Hospitality & Service         3
Business & Economics          3
Arts & Culture                3
Activism & Social Change      2
Science & Technology          2
Religion & Spirituality       2
Healthcare                    1
