In [1]:
pip install krippendorff

Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import krippendorff
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [1]:
# Load the data from both Excel sheets
coder1_data = pd.read_excel('/Users/helgegeurtjacobusmoes/Desktop/thesis data/Thesis_Coding_Coder1.xlsx')
coder2_data = pd.read_excel('/Users/helgegeurtjacobusmoes/Desktop/thesis data/Thesis_Coding_Coder2.xlsx')

# Display the first few rows of each dataframe to understand their structure
print(coder1_data.head())
print(coder2_data.head())

# Extract Q1 to Q9 columns
questions = ['Q1 - AI Act 4', 'Q2 - AI Act 3', 'Q3 - AI Act 2', 'Q4 - AI Act 1', 
             'Q5 - Risk Presence', 'Q6 - Risk Level 4', 'Q7 - Risk Level 3', 
             'Q8 - Risk Level 2', 'Q9 - Risk Level 1']

df_coder1_questions = coder1_data[questions]
df_coder2_questions = coder2_data[questions]

# Replace NaN and blank values with 0
df_coder1_questions = df_coder1_questions.fillna(0).replace("", 0)
df_coder2_questions = df_coder2_questions.fillna(0).replace("", 0)

# Convert the responses to numeric values, ignoring NaNs
df_coder1_questions = df_coder1_questions.apply(pd.to_numeric, errors='coerce')
df_coder2_questions = df_coder2_questions.apply(pd.to_numeric, errors='coerce')

# Initialize list to store metrics
metrics = []

# Calculate metrics for each question
for column in questions:
    # Drop rows with NaNs in both columns
    common_data = pd.concat([df_coder1_questions[column], df_coder2_questions[column]], axis=1).dropna()
    if len(common_data) > 0:
        y_true = common_data.iloc[:, 0]
        y_pred = common_data.iloc[:, 1]
        
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
        recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
        f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
        n = len(common_data)
        
        metrics.append([column, accuracy, precision, recall, f1, n])
    else:
        metrics.append([column, None, None, None, None, 0])  # or another indicator of insufficient data

# Convert the metrics to a DataFrame and display
metrics_df = pd.DataFrame(metrics, columns=["Question", "Accuracy", "Precision", "Recall", "F1 Score", "N"])
print(metrics_df)

metrics_df

             StartDate              EndDate         Status     IPAddress   
0           Start Date             End Date  Response Type    IP Address  \
1  2024-05-22 09:32:01  2024-05-22 09:34:32              0  146.50.72.92   
2  2024-05-22 09:36:27  2024-05-22 09:38:46              0  146.50.72.92   
3  2024-05-22 09:38:51  2024-05-22 09:49:37              0  146.50.72.92   
4  2024-05-22 09:49:40  2024-05-22 09:54:01              0  146.50.72.92   

   Progress  Duration (in seconds)  Finished                RecordedDate   
0  Progress  Duration (in seconds)  Finished               Recorded Date  \
1       100                    151         1  2024-05-22 09:34:33.281000   
2       100                    138         1  2024-05-22 09:38:46.914000   
3       100                    646         1  2024-05-22 09:49:37.805000   
4       100                    261         1  2024-05-22 09:54:02.233000   

          ResponseId    RecipientLastName  ...   UserLanguage   
0        Response ID 

Unnamed: 0,Question,Accuracy,Precision,Recall,F1 Score,N
0,Q1 - AI Act 4,0.95,0.927566,0.894661,0.91,120
1,Q2 - AI Act 3,0.916667,0.90625,0.90625,0.90625,120
2,Q3 - AI Act 2,0.883333,0.811744,0.811744,0.811744,120
3,Q4 - AI Act 1,0.916667,0.896662,0.90873,0.902312,120
4,Q5 - Risk Presence,0.941667,0.940559,0.941919,0.941172,120
5,Q6 - Risk Level 4,0.916667,0.87,0.843228,0.8557,120
6,Q7 - Risk Level 3,0.891667,0.828421,0.838542,0.833316,120
7,Q8 - Risk Level 2,0.966667,0.913731,0.913731,0.913731,120
8,Q9 - Risk Level 1,0.983333,0.924147,0.924147,0.924147,120


In [15]:
# Load the data from both Excel sheets
coder1_data = pd.read_excel('/Users/helgegeurtjacobusmoes/Desktop/thesis data/Thesis_Coding_Coder1.xlsx')
coder2_data = pd.read_excel('/Users/helgegeurtjacobusmoes/Desktop/thesis data/Thesis_Coding_Coder2.xlsx')

# Extract Q1 to Q9 columns
questions = [
    'Q1 - AI Act 4', 'Q2 - AI Act 3', 'Q3 - AI Act 2', 'Q4 - AI Act 1', 
    'Q5 - Risk Presence', 'Q6 - Risk Level 4', 'Q7 - Risk Level 3', 
    'Q8 - Risk Level 2', 'Q9 - Risk Level 1'
]

df_coder1_questions = coder1_data[questions]
df_coder2_questions = coder2_data[questions]

# Replace NaN and blank values with 0
df_coder1_questions = df_coder1_questions.fillna(0).replace("", 0)
df_coder2_questions = df_coder2_questions.fillna(0).replace("", 0)

# Convert the responses to numeric values, ignoring NaNs
df_coder1_questions = df_coder1_questions.apply(pd.to_numeric, errors='coerce')
df_coder2_questions = df_coder2_questions.apply(pd.to_numeric, errors='coerce')

# Rename columns to distinguish between the two coders
df_coder1_questions = df_coder1_questions.rename(columns=lambda x: f"{x}_Coder1")
df_coder2_questions = df_coder2_questions.rename(columns=lambda x: f"{x}_Coder2")

# Merge the datasets on the index
combined_data_cleaned = pd.concat([df_coder1_questions, df_coder2_questions], axis=1)

# Drop the first row
combined_data_cleaned = combined_data_cleaned.drop(combined_data_cleaned.index[0])

# Replace 0.0 with 0
combined_data_cleaned = combined_data_cleaned.applymap(lambda x: int(x) if pd.notnull(x) else x)

combined_data_cleaned

Unnamed: 0,Q1 - AI Act 4_Coder1,Q2 - AI Act 3_Coder1,Q3 - AI Act 2_Coder1,Q4 - AI Act 1_Coder1,Q5 - Risk Presence_Coder1,Q6 - Risk Level 4_Coder1,Q7 - Risk Level 3_Coder1,Q8 - Risk Level 2_Coder1,Q9 - Risk Level 1_Coder1,Q1 - AI Act 4_Coder2,Q2 - AI Act 3_Coder2,Q3 - AI Act 2_Coder2,Q4 - AI Act 1_Coder2,Q5 - Risk Presence_Coder2,Q6 - Risk Level 4_Coder2,Q7 - Risk Level 3_Coder2,Q8 - Risk Level 2_Coder2,Q9 - Risk Level 1_Coder2
1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
2,0,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,0,0
3,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0
4,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,0,0,1
5,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0
117,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
118,0,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,0,0
119,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,1,0,0


In [16]:
# Save the combined data to a new Excel file
combined_data_cleaned.to_excel('/Users/helgegeurtjacobusmoes/Desktop/thesis data/Thesis_Coding_Combined.xlsx', index=False)

print("Combined data saved successfully.")

Combined data saved successfully.


In [18]:
# Extract Q1 to Q9 columns
questions = [
    'Q1 - AI Act 4', 'Q2 - AI Act 3', 'Q3 - AI Act 2', 'Q4 - AI Act 1', 
    'Q5 - Risk Presence', 'Q6 - Risk Level 4', 'Q7 - Risk Level 3', 
    'Q8 - Risk Level 2', 'Q9 - Risk Level 1'
]

df_coder1_questions = coder1_data[questions]
df_coder2_questions = coder2_data[questions]

# Replace NaN and blank values with 0
df_coder1_questions = df_coder1_questions.fillna(0).replace("", 0)
df_coder2_questions = df_coder2_questions.fillna(0).replace("", 0)

# Convert the responses to numeric values, ignoring NaNs
df_coder1_questions = df_coder1_questions.apply(pd.to_numeric, errors='coerce')
df_coder2_questions = df_coder2_questions.apply(pd.to_numeric, errors='coerce')

# Rename columns to distinguish between the two coders
df_coder1_questions = df_coder1_questions.rename(columns=lambda x: f"{x}_Coder1")
df_coder2_questions = df_coder2_questions.rename(columns=lambda x: f"{x}_Coder2")

# Merge the datasets on the index
combined_data_cleaned = pd.concat([df_coder1_questions, df_coder2_questions], axis=1)

# Drop the first row
combined_data_cleaned = combined_data_cleaned.drop(combined_data_cleaned.index[0])

# Replace 0.0 with 0 and convert other floats to integers
combined_data_cleaned = combined_data_cleaned.applymap(lambda x: int(x) if pd.notnull(x) else x)

# Create a long-format dataframe for Krippendorff's alpha analysis
long_format_data = pd.DataFrame()

for question in questions:
    coder1_col = f"{question}_Coder1"
    coder2_col = f"{question}_Coder2"
    temp_df = pd.DataFrame({
        'Question': question,
        'Coder1': combined_data_cleaned[coder1_col],
        'Coder2': combined_data_cleaned[coder2_col]
    })
    long_format_data = pd.concat([long_format_data, temp_df], ignore_index=True)

long_format_data

Unnamed: 0,Question,Coder1,Coder2
0,Q1 - AI Act 4,0,0
1,Q1 - AI Act 4,0,1
2,Q1 - AI Act 4,0,0
3,Q1 - AI Act 4,0,0
4,Q1 - AI Act 4,0,0
...,...,...,...
1075,Q9 - Risk Level 1,0,0
1076,Q9 - Risk Level 1,0,0
1077,Q9 - Risk Level 1,0,0
1078,Q9 - Risk Level 1,0,0


In [14]:
# Save the combined data to a new Excel file
long_format_data.to_excel('/Users/helgegeurtjacobusmoes/Desktop/thesis data/Thesis_Coding_Combined_SPSS.xlsx', index=False)

print("Combined data saved successfully.")

Combined data saved successfully.


In [17]:
# Load the long-format data
long_format_data = pd.read_excel('/Users/helgegeurtjacobusmoes/Desktop/thesis data/Thesis_Coding_Combined_SPSS.xlsx')

# Function to calculate Krippendorff's alpha
def calculate_alpha(data):
    coder1 = data['Coder1'].values
    coder2 = data['Coder2'].values
    data_matrix = np.array([coder1, coder2])
    alpha = krippendorff.alpha(reliability_data=data_matrix, level_of_measurement='nominal')
    return alpha

# Calculate the original Krippendorff's alpha
original_alpha = calculate_alpha(long_format_data)

# Bootstrap to estimate the reliability
n_bootstrap = 10000
bootstrap_alphas = np.zeros(n_bootstrap)

for i in range(n_bootstrap):
    bootstrap_sample = long_format_data.sample(frac=1, replace=True)
    bootstrap_alphas[i] = calculate_alpha(bootstrap_sample)

# Compute the confidence interval
alpha_mean = np.mean(bootstrap_alphas)
alpha_ci_lower = np.percentile(bootstrap_alphas, 2.5)
alpha_ci_upper = np.percentile(bootstrap_alphas, 97.5)

# Results
original_alpha, alpha_mean, alpha_ci_lower, alpha_ci_upper


(0.8028585296930493, 0.802335832826237, 0.7579208379792715, 0.8438544019111965)