In [None]:
import numpy as np
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
path_minke = '/home/jupyter-n.mekkes@gmail.com-f6d87/clinical_history/input_data/training_data_relabel_minke.xlsx'
path_megan = '/home/jupyter-n.mekkes@gmail.com-f6d87/clinical_history/input_data/training_data_relabel_megan.xlsx'

In [None]:
minke = pd.read_excel(path_minke, engine='openpyxl', index_col=[0])
megan = pd.read_excel(path_megan, engine='openpyxl', index_col=[1])
# megan.columns

In [None]:
## for minke, convert to 0-1 df
minke.fillna(0, inplace=True)
minke.replace('X', 1, inplace=True)

## for megan, only relevant column
megan = megan.drop(['Unnamed: 0','sum_true','sentence_length'], axis=1, errors='ignore')

## we start with the first 1000 rows
megan = megan.head(n=1000)
minke = minke.head(n=1000)

## calculate means
counts_minke = minke.iloc[:,2:].sum()
counts_megan = megan.iloc[:,2:].sum()

## modify the column names so we know which columns belong to who
clinical_attributes = minke.columns[2:].tolist()
minke.columns = minke.columns[:2].tolist() + [col + '_minke' for col in minke.columns[2:]]
megan.columns = megan.columns[:2].tolist() + [col + '_megan' for col in megan.columns[2:]]

print(minke.shape)
print(megan.shape)
print(clinical_attributes)
display(minke.head())

In [None]:
counts_minke
counts_megan

# # Calculate the mean between the two sums
mean_values = pd.concat([counts_minke, counts_megan]).groupby(level=0).mean()

print(mean_values)

In [None]:
combined_df = pd.concat([megan, minke], axis=1)
# combined_df.head()

In [None]:
from sklearn.metrics import jaccard_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import pairwise_distances
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings("ignore", category=DataConversionWarning)

def krippendorff_alpha(labels_minke, labels_megan):
    # Convert labels to a matrix with rows as coders and columns as instances
    labels_matrix = np.vstack((labels_minke, labels_megan))
    # Calculate pairwise distance matrix
    distance_matrix = pairwise_distances(labels_matrix, metric='jaccard')
    # Calculate observed disagreement
    observed_disagreement = np.mean(distance_matrix)
    # Calculate expected disagreement
    random_labels = np.random.randint(low=0, high=2, size=labels_matrix.shape)
    random_distance_matrix = pairwise_distances(random_labels, metric='jaccard')
    expected_disagreement = np.mean(random_distance_matrix)
    # display(labels_matrix)
    # display(labels_matrix.shape)
    # display(distance_matrix)
    # display(distance_matrix.shape)
    # display(observed_disagreement)
    # print('random labels')
    # display(random_labels)
    # display(expected_disagreement)
    # print('max')
    # display(np.max(labels_matrix))
    # Calculate Krippendorff's alpha
    alpha = 1 - (observed_disagreement / expected_disagreement)
    
    return alpha

jaccard_scores = {}
cohen_score = {}
krip = {}
# symptom_columns = combined_df.columns[3:]  # Exclude the sentence column

for symptom in clinical_attributes:
    # display(symptom)
    labels_minke = combined_df[symptom + '_minke']
    labels_megan = combined_df[symptom + '_megan']
    
    ## if a clinical attribute is not scored by either scores, it is not possible or meaningful to calculate IOS
    if (labels_minke.eq(0).all()) and (labels_megan.eq(0).all()):
        print(f"{symptom} is NOT labeled in the 1000 sentence by either Minke or Megan")
    else:
    
        ## ranges from 0(perfect disagreement) to 1 (perfect agreement)
        ## e.g. 10 sentences scored for 'tremor'. 6 are all scored as tremor by both labelers.
        ## 2 are scored as present by labeler A but not by B, and the last two sentences vice versa
        ## simple, 6 sentences out of 10 agree--> 6/10 = 0.6
        jaccard_similarity = jaccard_score(labels_minke, labels_megan, average='binary')
        jaccard_scores[symptom] = jaccard_similarity

        ## ranges from -1 to 1
        ## same example as above, P0 is 0.6
        ## then we calculate Pe:
        ## Pe = ((A notes symptom present) * (B notes symptom present)) + ((A notes absent)*(B notes absent))
        ## In this case, Pe = (((6/10)*(6/10))+((4/10)*(4/10))) = 0.52
        ## κ = (0.6 - 0.52) / (1 - 0.52) ≈ 0.1667
        kappa = cohen_kappa_score(labels_minke, labels_megan)
        cohen_score[symptom] = kappa
        
        alpha = krippendorff_alpha(labels_minke, labels_megan)
        krip[symptom] = alpha
        print(f"for {symptom}, the jaccard similarity is {jaccard_similarity} and cohens kappa is {kappa} and krip is {alpha}")
    

mean_jaccard_score = sum(jaccard_scores.values()) / len(jaccard_scores)
mean_cohen_score = sum(cohen_score.values()) / len(cohen_score)
mean_krip = sum(krip.values()) / len(krip)

print(jaccard_scores)
print(mean_cohen_score)
print(mean_jaccard_score)
print(mean_krip)

In [None]:
# Convert the dictionary to a DataFrame
data_jaccard = pd.DataFrame.from_dict(jaccard_scores, orient='index', columns=['Jaccard'])
data_krip = pd.DataFrame.from_dict(krip, orient='index', columns=['Krippendorf Alpha'])
data_cohen = pd.DataFrame.from_dict(cohen_score, orient='index', columns=["Cohen's Kappa"])

# Merge the DataFrames
data = pd.concat([data_jaccard, data_krip, data_cohen], axis=1)
data = data.sort_values(by=["Cohen's Kappa"],ascending=False)

# Rotate the plot by 90 degrees
plt.figure(figsize=(8, 20))

# Create the scatter plot
sns.scatterplot(data=data, y=data.index, x='Jaccard', label='Jaccard', alpha=0.5)
sns.scatterplot(data=data, y=data.index, x='Krippendorf Alpha', label='Krippendorf Alpha', alpha=0.5)
sns.scatterplot(data=data, y=data.index, x="Cohen's Kappa", label="Cohen's Kappa",alpha=0.5)

# Add legend
plt.legend()
# Set x-axis label
plt.xlabel('Performance')
# Add mean values as text
for symptom, mean_value in mean_values.items():
    if symptom in jaccard_scores.keys():
        # print(symptom)
        plt.text(1.20, data.index.get_loc(symptom), f'{mean_value:.1f}', ha='center', va='center')

# Show the plot
# plt.show()
plt.savefig('inter_operator.pdf', dpi=300,bbox_inches='tight')



In [None]:
# Convert the data to long format
data_long = data.melt(var_name='Metric', value_name='Value', ignore_index=False).reset_index()

# Define the number of rows and columns for subplots
num_rows = len(data.index) // 3  # Divide by 3 for 3 columns
num_cols = 3

# Calculate the figure size based on the number of subplots
fig_height = 3 * num_rows
fig_width = 12  # Increase the width for 3 columns

# Create the figure and subplots
fig, axes = plt.subplots(num_rows, num_cols, figsize=(fig_width, fig_height), sharex=True)

# Flatten the axes array
axes = axes.flatten()

# Loop through each symptom and create the bar plots
for i, symptom in enumerate(data.index):
    # Get the data for the current symptom
    symptom_data = data_long[data_long['index'] == symptom]

    # Set the subplot title as the symptom name
    axes[i].set_title(symptom)

    # Create the bar plot
    sns.barplot(data=symptom_data, x='Metric', y='Value', hue='Metric', palette='pastel', ax=axes[i])
    
    # Remove legend for each subplot
    axes[i].get_legend().remove()

    # Remove y-axis labels for all subplots except the first one in each row
    if i % num_cols != 0:
        axes[i].set_ylabel('')

    # Set y-axis label for the first subplot in each row
    if i % num_cols == 0:
        axes[i].set_ylabel('Value')

    # Set x-axis label for the last row of subplots
    if i >= num_cols * (num_rows - 1):
        axes[i].set_xlabel('Metric')

# Adjust the spacing between subplots
fig.tight_layout()

# Show the plot
plt.show()