In [34]:
import numpy as np
import pandas as pd
import krippendorff
from itertools import combinations

**Randomly sampling 30 examples for annotations**

In [None]:
import pandas as pd

# Load your dataset
df_original = pd.read_csv('./extremism_data_final.csv')

# Sampling according to labels
class_extremist = df_original[df_original['Extremism_Label'] == 'EXTREMIST'].sample(n=15, random_state=42)
class_non_extremist = df_original[df_original['Extremism_Label'] == 'NON_EXTREMIST'].sample(n=15, random_state=42)

# Combine both samples
sample_df = pd.concat([class_extremist , class_non_extremist])

# Shuffle the final combined dataset 
sample_df = sample_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Add ID column
sample_df.insert(0, 'ID', range(1, len(sample_df) + 1))

# Save to CSV
sample_df.to_csv('df_sample_30.csv', index=False)

print("Sampling completed and file saved as mdf_sample_30.csv")


Sampling completed and file saved as mdf_sample_30.csv


**Inter-Rater Reliability(IRR) Analysis**

In [None]:


# ===============================
# 1. Load Annotated Dataset
# ===============================

df_annotated = pd.read_csv("mbd_annotation_sample_30.csv")

annotator_columns = [
    "Extremism_Label",
    "Group_member1",
    "Group_member2",
    "Group_member3",
    "Group_member4",
    "Group_member5"
]

# Remove rows with missing annotations
df_annotated = df_annotated.dropna(subset=annotator_columns)

# Converting labels to numeric as they are categorical
for col in annotator_columns:
    df_annotated[col] = df_annotated[col].astype("category").cat.codes


# ===============================
# 2. Percentage Agreement
# ===============================

def pairwise_agreement(df, col1, col2):
    agreements = (df[col1] == df[col2]).sum()
    total = len(df)
    return round(agreements / total, 3)

agreement_results = []

for col1, col2 in combinations(annotator_columns, 2):
    score = pairwise_agreement(df_annotated, col1, col2)
    agreement_results.append({
        "Rater_1": col1,
        "Rater_2": col2,
        "Percent_Agreement": score
    })

agreement_df = pd.DataFrame(agreement_results)



# ===============================
# 3. Pairwise Krippendorff Alpha
# ===============================

def compute_pairwise_alpha(df, columns, level="nominal"):
    results = []
    
    for col1, col2 in combinations(columns, 2):
        pair_data = [
            df[col1].tolist(),
            df[col2].tolist()
        ]
        
        alpha = krippendorff.alpha(
            reliability_data=pair_data,
            level_of_measurement=level
        )
        
        results.append({
            "Rater_1": col1,
            "Rater_2": col2,
            "Krippendorff_Alpha": round(alpha, 3)
        })
        
    return pd.DataFrame(results)

alpha_df = compute_pairwise_alpha(df_annotated, annotator_columns, level="nominal")

# ===============================
# 4. Save Results
# ===============================

agreement_df.to_csv("pairwise_percentage_agreement.csv", index=False)
alpha_df.to_csv("pairwise_krippendorff_alpha.csv", index=False)

print("\nIRR analysis completed and results saved.")



IRR analysis completed and results saved.


**Disagreement Analysis**

In [40]:
# ===============================
# Disagreement Analysis
# ===============================

human_annotators = [
    "Group_member1",
    "Group_member2",
    "Group_member3",
    "Group_member4",
    "Group_member5"
]

# Count unique labels per row
df_annotated["unique_labels"] = df_annotated[human_annotators].nunique(axis=1)

# Mark disagreements
df_annotated["disagreement"] = df_annotated["unique_labels"] > 1

# Extract disagreement examples
disagreement_examples = df_annotated[df_annotated["disagreement"] == True]

# Calculate disagreement rate
total_examples = len(df_annotated)
num_disagreements = df_annotated["disagreement"].sum()
disagreement_rate = round(num_disagreements / total_examples, 3)

print("\n===== Disagreement Analysis =====")
print(f"Total Examples: {total_examples}")
print(f"Disagreement Examples: {num_disagreements}")
print(f"Inter-Human Disagreement Rate: {disagreement_rate}")




===== Disagreement Analysis =====
Total Examples: 30
Disagreement Examples: 24
Inter-Human Disagreement Rate: 0.8


**Examples that had the most inter-human disagreement for our group's sample set**

In [41]:
#Compute Disagreement Score

def disagreement_strength(row):
    counts = row[human_annotators].value_counts()
    majority_count = counts.max()
    total = len(human_annotators)
    return 1 - (majority_count / total)

df_annotated["disagreement_strength"] = df_annotated.apply(disagreement_strength, axis=1)


max_strength = df_annotated["disagreement_strength"].max()

strong_disagreement = df_annotated[df_annotated["disagreement_strength"] == max_strength]
print("Number of examples with highest disagreement:", len(strong_disagreement))
print("\nExamples with strongest disagreement (3 vs 2 splits):")
print(strong_disagreement)


Number of examples with highest disagreement: 10

Examples with strongest disagreement (3 vs 2 splits):
    ID                                   Original_Message  Extremism_Label  \
0    1  she isnt racist unlike your somalia bitch frie...                1   
1    2  if i ever saw kendall jones in person i d kill...                1   
3    4  i am in iraq as whatever speak your country is...                1   
6    7  disabled people artfully the fucking scum of t...                1   
7    8  if your fiance has a conversation with a bitch...                1   
13  14  whatever do not blow up our wives and children...                0   
14  15  never a doubt that our founders saw the writin...                0   
20  21  in my opinion whatever should be afraid of the...                1   
23  24  keyshia key whore is a bird gucci should have ...                1   
25  26  ceasefire let s see how long those towel heads...                0   

    Group_member1  Group_member2  Gro

**How often did the majority of annotators agree with the original label**

In [43]:
#Compute Majority Vote
df_annotated["majority_vote"] = df_annotated[human_annotators].mode(axis=1)[0]

#Compare Majority With Original Label
df_annotated["majority_matches_original"] = (
    df_annotated["majority_vote"] == df_annotated["Extremism_Label"]
)

#Calculate Agreement Rate
agreement_count = df_annotated["majority_matches_original"].sum()

total_examples = len(df_annotated)

agreement_rate = round(agreement_count / total_examples, 3)

print("Majority agreement count:", agreement_count)
print("Total examples:", total_examples)
print("Majority vs Original Agreement Rate:", agreement_rate)


Majority agreement count: 16
Total examples: 30
Majority vs Original Agreement Rate: 0.533


**Where did your group disagree with original labels**

In [44]:
group_disagreements = df_annotated[df_annotated["majority_matches_original"] == False]

print("\n===== Examples Where Group Disagreed With Original Label =====\n")
print(group_disagreements[[
    "ID", 
    "Extremism_Label", 
    "majority_vote"
]])



===== Examples Where Group Disagreed With Original Label =====

    ID  Extremism_Label  majority_vote
1    2                1              0
3    4                1              0
6    7                1              0
7    8                1              0
9   10                0              1
12  13                0              1
13  14                0              1
14  15                0              1
16  17                0              1
17  18                0              1
21  22                1              0
23  24                1              0
24  25                1              0
26  27                0              1
