In [None]:
import numpy as np
import pandas as pd
from termcolor import colored

In [None]:
# Load data
projectfolder = '/Users/Symptoms_Documentation/'

df_rev1 = pd.read_excel(projectfolder + 'data/Rater1.xlsx')
df_rev2 = pd.read_excel(projectfolder + 'data/Rater2.xlsx')

df_rev1 = df_rev1.rename(columns={77: 'Study ID', 'Unnamed: 1': 'Note ID'})
df_rev2 = df_rev2.rename(columns={'M.Soreness': 'Muscle Soreness', 'Study ID ': 'Study ID', 'Unnamed: 1': 'Note ID'})

print(len(df_rev1), len(df_rev2))
display(df_rev1.head())
display(df_rev2.head())

In [None]:
# Patient IDs
sample_ids = df_rev1['Study ID'].unique()
cramps = [3, 9, 16, 19, 20, 38, 54, 57, 60, 65, 69, 76, 81, 100]
dryskin = []
fatigue = [61, 93]
itching = [2, 3, 6, 9, 10, 11, 16, 31, 37, 38, 60, 62, 75, 77, 83]
musclesore = [66, 86, 88, 94, 95]

print('cramps: ', np.intersect1d(sample_ids, cramps))
print('fatigue: ', np.intersect1d(sample_ids, fatigue))
print('itching: ', np.intersect1d(sample_ids, itching))
print('muscle soreness: ', np.intersect1d(sample_ids, musclesore))

In [None]:
symptom_labels = ['Fatigue', 'Cramps', 'Dry skin', 'itching', 'Muscle Soreness']

# Select relevant columns 
df_symp1 = df_rev1[['Study ID', 'Note ID'] + symptom_labels]
df_symp2 = df_rev2[['Study ID', 'Note ID'] + symptom_labels]

match_list = []
uniq_pts = df_symp1['Study ID'].unique()

# For each unique patient ID, checks the presence of symptom and records whether each rater flagged the symptom 
for pt in uniq_pts:
    # Filter to patient ID
    pt_filt = df_symp1['Study ID'] == pt
    # Loop through all the symptoms
    for symp in symptom_labels:
        # Filters for notes where symptom is present 
        symp_filt1 = df_symp1[symp] > 0
        symp_filt2 = df_symp2[symp] > 0
        # Extract note IDs for the symptom 
        note_id1 = df_symp1.loc[pt_filt & symp_filt1, 'Note ID'].to_list()
        note_id2 = df_symp2.loc[pt_filt & symp_filt2, 'Note ID'].to_list()
        # Check if the symptom is flagged
        flag1 = np.sum(symp_filt1 & pt_filt) > 0
        flag2 = np.sum(symp_filt2 & pt_filt) > 0
        # Create a dictionary to store matching details 
        match_obj = {'Study ID': pt, 'Symptom': symp, 'Rater1': flag1, 'Rater2': flag2, \
                     'Rater1 Note IDs': note_id1, 'Rater2 Note IDs': note_id2}
        
        color1 = 'green'
        color2 = 'green'
        if flag1:
            color1 = 'red'
        if flag2:
            color2 = 'red'
            
#         print(pt, symp, colored(flag1, color1), colored(flag2, color2))
        # append to a list
        match_list.append(match_obj)
# turn match_list into a dataframe
df_match = pd.DataFrame(match_list)
display(df_match)

In [None]:
# Save reviewer match
df_match.to_csv(projectfolder + 'data/reviewer_match.csv')