In [None]:
import ast

import pandas as pd

from src.preprocessing.chief_complaint_mapping import add_system_and_chief_complaints_to_notes
from src.preprocessing.disease_mapping import add_primary_diagnosis_column, add_wikidoc_columns
from src.preprocessing.note_ranking import add_rank_columns
from src.utils import init_notebook

init_notebook()

%load_ext autoreload
%autoreload 2

In [None]:
from src.utils import load_diseases_for_chief_complaint
from src.preprocessing.chief_complaint_mapping import extract_chief_complaint
from src.preprocessing.utils import load_mimic_notes, filter_mimic_by_meta_data
from src.exp_args import ExpArgs

exp_args = ExpArgs()
notes = load_mimic_notes(exp_args)

# Add system and chief complaints to note
cc_df = pd.read_csv('data/medical_schemes/cc_synonyms_systems.csv')
cc_with_synonyms = dict(zip(cc_df['chief_complaint'], cc_df['synonyms'].map(ast.literal_eval)))
cc_system_map = dict(zip(cc_df['chief_complaint'], cc_df['systems']))

notes['cc_string'] = notes['discharge_note'].apply(extract_chief_complaint)
annotated_df = add_system_and_chief_complaints_to_notes(
    df=notes,
    cc_with_synonyms=cc_with_synonyms,
    cc_system_map=cc_system_map
)

# Extract primary diagnosis string from discharge note and add as primary_diagnosis column
add_primary_diagnosis_column(annotated_df)

# Filter Mimic for Notes we use in Merlin 1.0
chief_complaint = 'abdominal pain'
abdominal_pain_notes = filter_mimic_by_meta_data(
    df=annotated_df, 
    icd_version=10, 
    data_source='hosp', 
    chief_complaint=chief_complaint
)
# Add wikidoc disease and disease_vector columns based on primary diagnoses to DataFrame.
wikidoc_df = load_diseases_for_chief_complaint(exp_args, chief_complaint)
add_wikidoc_columns(abdominal_pain_notes, wikidoc_df)

In [None]:
add_rank_columns(abdominal_pain_notes, wikidoc_df, True)