In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

def calculate_kappa_and_confusion_matrix(csv_file, annotator1_column, annotator2_column, annotator3_column):
    # Read CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file)

    # Extract annotations from DataFrame
    annotator1 = df[annotator1_column]
    annotator2 = df[annotator2_column]
    annotator3 = df[annotator3_column]

    # Create a set of all unique categories
    categories = sorted(set(annotator1) | set(annotator2) | set(annotator3))

    # Generate Confusion Matrix with category labels
    conf_matrix_12 = confusion_matrix(annotator1, annotator2, labels=categories)
    conf_matrix_13 = confusion_matrix(annotator1, annotator3, labels=categories)
    conf_matrix_23 = confusion_matrix(annotator2, annotator3, labels=categories)

    # Plot and save confusion matrices as PNG files
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix_12, annot=True, fmt="d", cmap="Blues", cbar=False, xticklabels=categories, yticklabels=categories)
    plt.xlabel('Annotator 2')
    plt.ylabel('Annotator 1')
    plt.title('Confusion Matrix (Annotator 1 vs Annotator 2)')
    plt.savefig('confusion_matrix_12.png')
    plt.close()

    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix_13, annot=True, fmt="d", cmap="Blues", cbar=False, xticklabels=categories, yticklabels=categories)
    plt.xlabel('Annotator 3')
    plt.ylabel('Annotator 1')
    plt.title('Confusion Matrix (Annotator 1 vs Annotator 3)')
    plt.savefig('confusion_matrix_13.png')
    plt.close()

    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix_23, annot=True, fmt="d", cmap="Blues", cbar=False, xticklabels=categories, yticklabels=categories)
    plt.xlabel('Annotator 3')
    plt.ylabel('Annotator 2')
    plt.title('Confusion Matrix (Annotator 2 vs Annotator 3)')
    plt.savefig('confusion_matrix_23.png')
    plt.close()

# Example usage
csv_file_path = 'TeamMultDogoAnnotations.csv'
annotator1_column_name = 'annotations_sujit'
annotator2_column_name = 'annotations_oj'
annotator3_column_name = 'annotations_nikhil'

calculate_kappa_and_confusion_matrix(csv_file_path, annotator1_column_name, annotator2_column_name, annotator3_column_name)


In [None]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

def calculate_kappa_and_confusion_matrix(csv_file, annotator1_column, annotator2_column, annotator3_column):
    # Read CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file)

    # Convert columns to strings to handle potential NaN values
    df[annotator1_column] = df[annotator1_column].astype(str)
    df[annotator2_column] = df[annotator2_column].astype(str)
    df[annotator3_column] = df[annotator3_column].astype(str)

    # Extract annotations from DataFrame
    annotator1 = df[annotator1_column]
    annotator2 = df[annotator2_column]
    annotator3 = df[annotator3_column]

    # Create a set of all unique categories
    categories = sorted(set(annotator1) | set(annotator2) | set(annotator3))

    # Calculate Kappa score for all pairwise combinations of annotators
    kappa_score_12 = cohen_kappa_score(annotator1, annotator2)
    kappa_score_13 = cohen_kappa_score(annotator1, annotator3)
    kappa_score_23 = cohen_kappa_score(annotator2, annotator3)

    # Generate Confusion Matrix with category labels for all pairwise combinations of annotators
    conf_matrix_12 = confusion_matrix(annotator1, annotator2, labels=categories)
    conf_matrix_13 = confusion_matrix(annotator1, annotator3, labels=categories)
    conf_matrix_23 = confusion_matrix(annotator2, annotator3, labels=categories)

    return (kappa_score_12, kappa_score_13, kappa_score_23), (conf_matrix_12, conf_matrix_13, conf_matrix_23), categories

# Example usage
csv_file_path = 'TeamUbuntuAnnotations.csv'
annotator1_column_name = 'annotations_sujit'
annotator2_column_name = 'annotations_oj'
annotator3_column_name = 'annotations_nikhil'

kappa_scores, confusion_matrices, categories = calculate_kappa_and_confusion_matrix(csv_file_path, annotator1_column_name, annotator2_column_name, annotator3_column_name)

# Display results
kappa_score_12, kappa_score_13, kappa_score_23 = kappa_scores
conf_matrix_12, conf_matrix_13, conf_matrix_23 = confusion_matrices

print(f"Kappa Score (Annotator 1 vs Annotator 2): {kappa_score_12}")
print("\nConfusion Matrix (Annotator 1 vs Annotator 2):")
print(pd.DataFrame(conf_matrix_12, index=categories, columns=categories))

print(f"Kappa Score (Annotator 1 vs Annotator 3): {kappa_score_13}")
print("\nConfusion Matrix (Annotator 1 vs Annotator 3):")
print(pd.DataFrame(conf_matrix_13, index=categories, columns=categories))

print(f"Kappa Score (Annotator 2 vs Annotator 3): {kappa_score_23}")
print("\nConfusion Matrix (Annotator 2 vs Annotator 3):")
print(pd.DataFrame(conf_matrix_23, index=categories, columns=categories))

# Save confusion matrices as PNG files
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix_12, annot=True, fmt="d", cmap="Blues", cbar=False, xticklabels=categories, yticklabels=categories)
plt.xlabel('Annotator 2')
plt.ylabel('Annotator 1')
plt.title('Confusion Matrix (Annotator 1 vs Annotator 2)')
plt.savefig('ubuntu_confusion_matrix_12.png')
plt.close()

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix_13, annot=True, fmt="d", cmap="Blues", cbar=False, xticklabels=categories, yticklabels=categories)
plt.xlabel('Annotator 3')
plt.ylabel('Annotator 1')
plt.title('Confusion Matrix (Annotator 1 vs Annotator 3)')
plt.savefig('ubuntu_confusion_matrix_13.png')
plt.close()

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix_23, annot=True, fmt="d", cmap="Blues", cbar=False, xticklabels=categories, yticklabels=categories)
plt.xlabel('Annotator 3')
plt.ylabel('Annotator 2')
plt.title('Confusion Matrix (Annotator 2 vs Annotator 3)')
plt.savefig('ubuntu_confusion_matrix_23.png')
plt.close()


Kappa Score (Annotator 1 vs Annotator 2): 0.8479852006756213

Confusion Matrix (Annotator 1 vs Annotator 2):
      %  aa  ad  bc  bh  bk  br  fa  fc  ft  ...  no  ny  ny^e  qo  qw  qw^d  \
%     3   0   0   0   0   0   0   0   0   0  ...   0   0     0   0   0     0   
aa    0   0   0   0   0   6   0   0   0   1  ...   0   0     0   0   0     0   
ad    0   0   9   0   0   0   0   0   0   0  ...   0   0     0   0   0     0   
bc    0   0   0   3   0   0   0   0   0   0  ...   0   0     0   0   0     0   
bh    0   0   0   0   1   0   0   0   0   0  ...   0   0     0   0   0     0   
bk    0   0   0   0   0   4   0   0   0   0  ...   0   0     0   0   0     0   
br    0   0   0   0   0   0   0   0   0   0  ...   0   0     0   0   0     0   
fa    0   0   0   0   0   0   0   1   0   0  ...   0   0     0   0   0     0   
fc    0   0   0   0   0   0   0   0   1   0  ...   0   0     0   0   0     0   
ft    0   0   0   0   0   0   0   0   0   6  ...   0   0     0   0   0     0   
fw    0   0