In [1]:
# Libraries
import pandas as pd
import PySimpleGUI as sg
import re

In [2]:
# Paths
path = 'MergedData.csv'

# Test Your Knowledge

This notebook is to be run by the medical student/doctor who wishes to test his/her knowledge.

The file of a random patient will be pulled from the dataset, and you will be presented with the vital signs, some contextual information and a doctor's note for the patient. You will then be asked to name up to 5 different diagnoses of the patient. If the name of the diagnoses appears in the doctor's note, it will be replaced with a "(DIAGNOSIS)". This does not mean that if there is one blank, there is only one diagnosis. It is up to you to determine the right number of diagnoses.

The possible diagnosis/es are:
* asthma
* iron deficiency
* pneumonia
* substancedependence
* fibrosis (any kind)
* malnutrition

You can carry on testing yourself as long as you like. Once you choose to finish testing, you will be presented with two scores:
1. Macro score: This will be the percentage of patients you managed to guess completely correct. If you guessed one out of two diagnoses for a patient, you will not get any marks here.
2. Micro score: This will take into consideration the number of diagnoses correctly guessed out of the total number of diagnoses.

To run this notebook, navigate to **Kernel > Restart Kernel and Run All Cells**

merged_df.head(4)

# Load data set and name it merged_df

In [3]:
merged_df = pd.read_csv(path)

# Choosing a random patient & removing keywords from the doctor's note

In [4]:
# First draft Erika
# Initialize variables to store cumulative scores
total_correct_patients = 0
total_correct_diagnoses = 0
total_diagnoses = 0
tests_taken = 0

while True:
    # Pull up a random entry
    random_patient = merged_df.sample(n=1, replace=False)

    # Display the random patient's doctors note as is - this was used as part of testing to see if the replacement is working correctly.
    #print("Random Patient doctor's note before replacement:\n", random_patient['Part 7'].iloc[0])
    
    # Define the list of n-grams to replace
    ngrams_to_replace = ['asthma', 'iron def', 'pneum', 'substance', 'fibro', 'malnutr']
    
    # Create a new column for modified notes without overwriting the original 'Part 7'
    random_patient['Modified_Note'] = random_patient['Doc Note'].apply(lambda x: str(x))
    
    # Replace entire words matching the n-grams with "(DIAGNOSIS)"
    for ngram in ngrams_to_replace:
        # Use capturing group to match either the n-gram or the n-gram followed by additional characters
        random_patient['Modified_Note'] = random_patient['Modified_Note'].apply(lambda x: re.sub(r'\b({}|{}(?:\w+)?)(?!\w)'.format(re.escape(ngram), re.escape(ngram)), '(DIAGNOSIS)', str(x), flags=re.IGNORECASE))
    
    # Display the full note for the random patient after replacement - this was used as part of testing to see if the replacement is working correctly.
    #print("\nRandom Patient Note after replacement:\n", random_patient['Modified_Note'].iloc[0])

    # Setting up the UI
    
    sg.theme('Default1')

    standardfont = ("Segoe UI",12)
    layout = [[sg.Text('Test Your Knowledge', font=("Segoe UI Black", 16))],[sg.Text('Patient Information:', font=standardfont)],]

    patient_info_keys = {
        "rcount": "Number of Readmissions in the last 180 days",
        "gender": "Gender",
        "hematocrit": "Hematocrit (g/dL)",
        "neutrophils": "Neutrophils (cells/microL)",
        "sodium": "Sodium (mmol/L)",
        "glucose": "Glucose (mmol/L)",
        "bloodureanitro": "Blood Urea Nitrogen (mg/dL)",
        "creatinine": "Creatinine (mg/dL)",
        "bmi": "BMI (kg/m2)",
        "pulse": "Pulse (beats/min)",
        "respiration": "Respiration (breaths/min)",
    }

    layout += [
        [sg.Text(f'{display_name}: {random_patient[key].iloc[0]}', font=standardfont)] for key, display_name in patient_info_keys.items()
    ]

    layout += [
        [sg.Text('Doctor\'s Note:', font=standardfont)],
        [sg.Text(random_patient['Modified_Note'].iloc[0], size=(60, 8), key='note', font=standardfont)],
        [sg.Text('Select Diagnosis(es):', font=standardfont)],
        [sg.Checkbox(display_name, key=key.lower(), font=standardfont) for key, display_name in {"asthma": "Asthma", "iron_def": "Iron Deficiency", "pneum": "Pneumonia", "substancedependence": "Substance Abuse", "fibrosisandother": "Fibrosis", "malnutrition": "Malnutrition"}.items()],
        [sg.Button('Submit', font=standardfont), sg.Button('Exit', font=standardfont)]
    ]
    
    # Create the window
    window = sg.Window('Medical Knowledge Test', layout)

    event, values = window.read()

    if event == sg.WINDOW_CLOSED or event == 'Exit':
        break
    elif event == 'Submit':
        # Add one patient to the scoring system
        tests_taken += 1
        
        # This code only keeps record of checkboxes which are ticked through the if value.
        selected_diagnoses = [key.replace('_', '') for key, value in values.items() if value]

        # Check which diagnoses were selected correctly, incorrectly and those which weren't selected but should have bene
        correct_diagnoses = [diag for diag in selected_diagnoses if random_patient[diag].iloc[0] == 1]
        incorrect_diagnoses = [diag for diag in selected_diagnoses if random_patient[diag].iloc[0] == 0]
        missed_diagnoses = [diag for diag in random_patient.columns if random_patient[diag].iloc[0] == 1 and diag not in selected_diagnoses]

        # Keep track of the metrics that will be used for the final scoring
        total_correct_patients += 1 if not missed_diagnoses and not incorrect_diagnoses else 0
        total_correct_diagnoses += len(correct_diagnoses)
        total_diagnoses += len(correct_diagnoses) + len(missed_diagnoses)

        # Give instnat feedback to the student
        feedback = f"Selected Correctly: {', '.join(correct_diagnoses) or 'None'}\n"
        feedback += f"Selected Incorrectly: {', '.join(incorrect_diagnoses) or 'None'}\n"
        feedback += f"Missed Diagnoses: {', '.join(missed_diagnoses) or 'None'}"

        sg.popup(f"Feedback", feedback)
        window.close()
        
# Calculate overall scores after the user exits
overall_macro_score = total_correct_patients / tests_taken if tests_taken > 0 else 0
overall_micro_score = total_correct_diagnoses / total_diagnoses if total_diagnoses > 0 else 0

# Display overall scores
overall_feedback = f"Overall Macro Score (Patients guessed completely correctly): {overall_macro_score * 100:.2f}%\n"
overall_feedback += f"Overall Micro Score (Percentage of diagnoses guessed correctly): {overall_micro_score * 100:.2f}%\n"

sg.popup(f"Overall Results", overall_feedback)

# Close the window
window.close()

#The below lines of code were used to experiment and navigate through PySimpleGUI to understand what was customizable.

import PySimpleGUI as sg

sg.theme_previewer()

#for font in sg.Text.fonts_installed_list():
    #print(font)