In [None]:
import pandas as pd
import numpy as np

In [None]:
# --- 1. Define File Paths ---
# IMPORTANT: Make sure these file names match your CSV files exactly.
tflite_predictions_file = '/home/kronbii/repos/content-violence-detection/output/text/text_pytorch_jigsaw.csv'
pytorch_predictions_file = '/home/kronbii/repos/content-violence-detection/output/text/text_pytorch_jigsaw.csv'

In [None]:
# --- 2. Load the Prediction CSVs ---
print(f"Loading TFLite predictions from: {tflite_predictions_file}")
print(f"Loading PyTorch predictions from: {pytorch_predictions_file}")

try:
    tflite_df = pd.read_csv(tflite_predictions_file)
    pytorch_df = pd.read_csv(pytorch_predictions_file)
except FileNotFoundError as e:
    print(f"\n File Not Found Error: {e}")
    print("Please ensure the CSV files exist in the same directory as this script.")
    exit()

In [None]:
# --- 3. Binarize Predictions and Prepare for Merge ---
# Define the label columns that will be processed
label_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

# Create new DataFrames to hold the binarized data
tflite_binary_df = tflite_df[['id']].copy()
pytorch_binary_df = pytorch_df[['id']].copy()

print("\nBinarizing predictions with a 0.5 threshold...")
# Apply the 0.5 threshold to convert probabilities to 0s and 1s
for col in label_cols:
    tflite_binary_df[col] = (tflite_df[col] >= 0.5).astype(int)
    pytorch_binary_df[col] = (pytorch_df[col] >= 0.5).astype(int)

In [None]:
# --- 4. Merge DataFrames for Comparison ---
# Merge on the 'id' column to align predictions for the same text.
# Suffixes are added to distinguish columns (e.g., 'toxic_tflite', 'toxic_pytorch').
merged_df = pd.merge(
    tflite_binary_df,
    pytorch_binary_df,
    on='id',
    suffixes=('_tflite', '_pytorch')
)

if len(merged_df) == 0:
    print("\n Error: No matching IDs found between the two CSV files.")
    print("Please ensure both files were generated from the exact same dataset.")
    exit()

print(f"\nSuccessfully merged {len(merged_df)} corresponding predictions.")

In [None]:
# --- 5. Calculate Agreement Rate Per Label ---
print("\n" + "="*45)
print("Model Agreement Rates (Per Label)")
print("="*45)

total_samples = len(merged_df)

# Calculate agreement for each label independently
for label in label_cols:
    tflite_col = f"{label}_tflite"
    pytorch_col = f"{label}_pytorch"

    # Count how many times the predictions in the two columns are identical
    agreement_count = (merged_df[tflite_col] == merged_df[pytorch_col]).sum()

    # Calculate the percentage
    agreement_rate = (agreement_count / total_samples) * 100

    # Display the result, formatted for alignment
    print(f"- {label.capitalize():<16} -> {agreement_rate:.2f}% agreement")

In [None]:
# --- 6. Calculate Stricter Overall Agreement Rate ---
# This checks how often the models agreed on ALL 6 labels for a given text.
agreement_conditions = [
    (merged_df[f"{label}_tflite"] == merged_df[f"{label}_pytorch"]) for label in label_cols
]
all_labels_agree = np.logical_and.reduce(agreement_conditions)

overall_agreement_count = all_labels_agree.sum()
overall_agreement_rate = (overall_agreement_count / total_samples) * 100

print("\n" + "="*45)
print("Stricter Overall Agreement Rate")
print("="*45)
print("This measures how often the models' full prediction vectors were identical.")
print(f"\nOverall Agreement: {overall_agreement_rate:.2f}%")
print(f"({overall_agreement_count} out of {total_samples} predictions were identical across all labels)")