In [1]:
from sklearn import metrics
from scipy import stats
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 100)
sns.set()
plt.rcParams["figure.figsize"] = (10,10)

In [2]:
def process_metadata(data_path, participant_id_column, dropna_subset_col):
    """
    Read each group's response file and loop through each participant in the file
    Create a dataframe for each participant and concat all frames at the end.
    """
    
    mask_path = "/home/kti01/Documents/My Files/Projects/Overlap/data/Data Upload/Ham10k_test_masked"

    # Create a dictionary with groups as keys and image masks as values.
    # Image masks are sorted in ascending order, same order as the participants process them.
    masks_dict = {}
    for i in range(1, 15):
        masks = sorted(os.listdir(os.path.join(mask_path, str(i))))
        masks = [mask.split('.')[0] for mask in masks]
        masks.insert(12, masks.pop(14))
        masks_dict[str(i)] = masks
    
    files = os.listdir(data_path)
    participant_dfs = []
    
    for file in files:
        
        if not file.endswith('csv'):
            continue

        data = pd.read_csv(os.path.join(data_path, file)).dropna(subset=dropna_subset_col)
        # Get the group number from the file name
        group = file.split('.')[0].split('_')[-1][5:]
        
        for participant in data[participant_id_column]:
            df = data[data[participant_id_column] == participant].iloc[:, 6:81].transpose().copy()

            participant_dict = {
                'participant': [participant] * 15,
                'group': [group] * 15,
                'mask': masks_dict[group],
                'prediction': [],
                'confidence': [],
                'trust': [],
                'issues': []
            }
            # Loop through each question group (5 questions) at a time
            for i in range(0, len(df), 5):
                participant_dict['prediction'].append(df.iloc[i].item())
                participant_dict['confidence'].append(df.iloc[i+1].item())
                participant_dict['trust'].append(df.iloc[i+2].item())

                issues = str(df.iloc[i+3].item())
                if not pd.isna(df.iloc[i+4].item()):
                    issues += ', '+df.iloc[i+4].item()

                participant_dict['issues'].append(issues)
                
            participant_df = pd.DataFrame(participant_dict)
            participant_dfs.append(participant_df)
            
    # Create metadata dataframe from all participants' frames.
    metadata = pd.concat(participant_dfs)
    
    
    # Read testset metadata and results files to map masks to image_ids and also 
    #  get the ground truth and AI predictions.
    metadata_testset = pd.read_csv("/home/kti01/Documents/My Files/Projects/Overlap/data/metadata_testset.csv")
    result_test = pd.read_csv("/home/kti01/Documents/My Files/Projects/Overlap/data/result_test.csv").rename({'prediction': 'AI_prediction'}, axis=1)

    # Map masks to image_ids by creating a dict of mappings
    mask_to_image_id = dict(zip(metadata_testset['mask'], metadata_testset['image_id']))
    metadata['image_id'] = metadata.apply(lambda row: mask_to_image_id[int(row['mask'])] if row['mask'] != '9898' else row['mask'], axis=1)

    # Add ground truth and AI prediction columns to metadata by merging with the result_test dataframe 
    metadata = pd.merge(metadata, result_test[['image_id', 'benign_malignant', 'AI_prediction']], 
                        on='image_id', how='left')
    
    
    metadata['prediction'] = metadata['prediction'].apply(lambda x: 1 if x in ['Melanoma', 'Melanom'] else 
                                                  (0.5 if x in ['Nevus (excise)', 'Nävus (exzidieren)'] else 0))

    metadata['confidence'] = metadata['confidence'].apply(lambda x: 10 if 'absolut sicher' in str(x) else
                                                     (1 if 'gar nicht sicher' in str(x) else x))

    metadata['confidence'] = metadata['confidence'].apply(lambda x: 10 if 'Completely' in str(x) else
                                                     (1 if 'Not at all' in str(x) else x))

    metadata['trust'] = metadata['trust'].apply(lambda x: 10 if 'vollkommen' in str(x) else
                                                     (1 if 'gar nicht' in str(x) else x))

    metadata['trust'] = metadata['trust'].apply(lambda x: 10 if 'Completely' in str(x) else
                                                     (1 if 'Not at all' in str(x) else x))
    
    metadata['trust'] = metadata['trust'].apply(lambda x: 10 if 'absolut sicher' in str(x) else
                                                     (1 if 'gar nicht sicher' in str(x) else x))
    
    # Set datatypes
    metadata['confidence'] = metadata['confidence'].astype(int)
    metadata['trust'] = metadata['trust'].astype(int)
    
    # Map participant Emails to Codes defined in participants.csv
    participants_df = pd.read_csv("/home/kti01/Documents/My Files/Projects/Overlap/data/participants.csv")
    participants_df['E-Mail'] = participants_df['E-Mail'].apply(lambda x: x.strip() if pd.notna(x) else x)
    participants_code_dict = dict(zip(participants_df['E-Mail'], participants_df['Code']))

    metadata['participant'] = metadata.apply(lambda row: participants_code_dict[row['participant']] 
                                             if row['participant'] in participants_code_dict and 
                                             pd.notna(participants_code_dict[row['participant']])
                                             else row['participant'], axis=1)
    metadata['language'] = metadata['participant'].apply(lambda x: 'en' if x.startswith('e') else 'de')

    return metadata

In [3]:
phase2_path = "/home/kti01/Documents/My Files/Projects/Overlap/data/phase2/groups"

metadata_phase2 = process_metadata(phase2_path, participant_id_column='email. Email address', dropna_subset_col='submitdate. Date submitted')
metadata_phase2.to_csv("/home/kti01/Documents/My Files/Projects/Overlap/data/phase2/metadata_phase2.csv", index=False)

In [4]:
phase3_path = "/home/kti01/Documents/My Files/Projects/Overlap/data/phase3/groups"

metadata_phase3 = process_metadata(phase3_path, participant_id_column='email', dropna_subset_col='submitdate')
metadata_phase3.to_csv("/home/kti01/Documents/My Files/Projects/Overlap/data/phase3/metadata_phase3.csv", index=False)

In [9]:
accuracy_dict = {}
for participant in metadata_phase3.participant.unique():
    p2 = metadata_phase2[metadata_phase2.participant==participant].dropna()
    p3 = metadata_phase3[metadata_phase3.participant==participant].dropna()
    # Acc
    p2_acc_floor = metrics.balanced_accuracy_score(p2.benign_malignant, np.floor(p2.prediction)).round(3)
    p2_acc_ceil = metrics.balanced_accuracy_score(p2.benign_malignant, np.ceil(p2.prediction)).round(3)
    p2_AI_acc = metrics.balanced_accuracy_score(p2.benign_malignant, p2.AI_prediction).round(3)
    # Sen
    p2_sen_floor = metrics.recall_score(p2.benign_malignant, np.floor(p2.prediction)).round(3)
    p2_sen_ceil = metrics.recall_score(p2.benign_malignant, np.ceil(p2.prediction)).round(3)
    # Spec
    p2_spec_floor = metrics.recall_score(p2.benign_malignant, np.floor(p2.prediction), pos_label=0).round(3)
    p2_spec_ceil = metrics.recall_score(p2.benign_malignant, np.ceil(p2.prediction), pos_label=0).round(3)
    
    # Acc
    p3_acc_floor = metrics.balanced_accuracy_score(p3.benign_malignant, np.floor(p3.prediction)).round(3)
    p3_acc_ceil = metrics.balanced_accuracy_score(p3.benign_malignant, np.ceil(p3.prediction)).round(3)
    p3_AI_acc = metrics.balanced_accuracy_score(p3.benign_malignant, p3.AI_prediction).round(3)
    # Sen
    p3_sen_floor = metrics.recall_score(p3.benign_malignant, np.floor(p3.prediction)).round(3)
    p3_sen_ceil = metrics.recall_score(p3.benign_malignant, np.ceil(p3.prediction), pos_label=0).round(3)
    # Spec
    p3_spec_floor = metrics.recall_score(p3.benign_malignant, np.floor(p3.prediction), pos_label=0).round(3)
    p3_spec_ceil = metrics.recall_score(p3.benign_malignant, np.ceil(p3.prediction), pos_label=0).round(3)
    
    group = p3.group.iloc[0]
    
    accuracy_dict[participant] = [group, p3_AI_acc,
                                  p2_acc_floor, p3_acc_floor, 
                                  p2_sen_floor, p3_sen_floor,
                                  p2_spec_floor, p3_spec_floor,
                                  p2_acc_ceil, p3_acc_ceil, 
                                  p2_sen_ceil, p3_sen_ceil,
                                  p2_spec_ceil, p3_spec_ceil,
                                  ]
df = pd.DataFrame.from_dict(accuracy_dict, orient='index').reset_index()
df.columns = ['Participant', 'Group', 'AI Accuracy',
              'Accuracy floor (Phase 2)', 'Accuracy floor (Phase 3)', 
              'Sensitivity floor (Phase 2)', 'Sensitivity floor (Phase 3)',
              'Specificity floor (Phase 2)', 'Specificity floor (Phase 3)',
              
              'Accuracy ceil (Phase 2)', 'Accuracy ceil (Phase 3)', 
              'Sensitivity ceil (Phase 2)', 'Sensitivity ceil (Phase 3)',
              'Specificity ceil (Phase 2)', 'Specificity ceil (Phase 3)',]
df.to_csv('doctor_accuracy_phase2+3.csv', index=False)

In [11]:
metadata_phase1 = pd.read_csv("/home/kti01/Documents/My Files/Projects/Overlap/data/phase1/metadata_phase1.csv")

accuracy_dict = {}
for participant in metadata_phase1.participant.unique():
    p1 = metadata_phase1[metadata_phase1.participant==participant].dropna()
    # Acc
    p1_acc_floor = metrics.balanced_accuracy_score(p1.benign_malignant, np.floor(p1.prediction)).round(3)
    p1_acc_ceil = metrics.balanced_accuracy_score(p1.benign_malignant, np.ceil(p1.prediction)).round(3)
    #p1_AI_acc = metrics.balanced_accuracy_score(p2.benign_malignant, p2.AI_prediction).round(3)
    # Sen
    p1_sen_floor = metrics.recall_score(p1.benign_malignant, np.floor(p1.prediction)).round(3)
    p1_sen_ceil = metrics.recall_score(p1.benign_malignant, np.ceil(p1.prediction)).round(3)
    # Spec
    p1_spec_floor = metrics.recall_score(p1.benign_malignant, np.floor(p1.prediction), pos_label=0).round(3)
    p1_spec_ceil = metrics.recall_score(p1.benign_malignant, np.ceil(p1.prediction), pos_label=0).round(3)
    
    group = p1.group.iloc[0]
    
    accuracy_dict[participant] = [group,
                                  p1_acc_floor,
                                  p1_sen_floor,
                                  p1_spec_floor,
                                  p1_acc_ceil,
                                  p1_sen_ceil,
                                  p1_spec_ceil
                                  ]
df = pd.DataFrame.from_dict(accuracy_dict, orient='index').reset_index()
df.columns = ['Participant', 'Group', 
              'Accuracy floor (Phase 1)',  
              'Sensitivity floor (Phase 1)',
              'Specificity floor (Phase 1)',
              
              'Accuracy ceil (Phase 1)', 
              'Sensitivity ceil (Phase 1)', 
              'Specificity ceil (Phase 1)']
df.to_csv('doctor_accuracy_phase1.csv', index=False)