## Healh event timelines

Contains one function:
- plot_initial_and_generalised_view: plots health event timelines of the person with given person_id

Usage:
- uncomment function
- set value for person_id parameter
- run cells

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Received from the corresponding OMOP format database tables
%store -r df_person # person
%store -r df_condition_occurrence # condition_occurrence
%store -r df_drug_exposure # drug_exposure
%store -r df_concept # concept
%store -r df_procedure # procedure_occurrence

# Received from fim_impl.ipynb
%store -r generalized_timeline

In [None]:
# ID of person to be plotted
person_id = 5

# plot_initial_and_generalised_view(person_id)

In [None]:
def get_person_dataset(person_id):
    condition_occurrence_dataset = df_condition_occurrence[df_condition_occurrence['person_id'] == person_id]
    drug_exposure_dataset = df_drug_exposure[df_drug_exposure['person_id'] == person_id]
    procedure_dataset = df_procedure[df_procedure['person_id'] == person_id]
    
    drug_exposure_dataset = drug_exposure_dataset.rename(columns={
        'person_id': 'person_id',
        'drug_concept_id': 'concept_id',
        'drug_exposure_start_date': 'start_datetime'
    })
    
    condition_occurrence_dataset = condition_occurrence_dataset.rename(columns={
        'person_id': 'person_id',
        'condition_concept_id': 'concept_id',
        'condition_start_date': 'start_datetime'
    })
    
    procedure_dataset = procedure_dataset.rename(columns={
        'person_id': 'person_id',
        'procedure_concept_id': 'concept_id',
        'procedure_date': 'start_datetime'
    })
    
    df_medical_data = pd.concat([
        drug_exposure_dataset[['person_id', 'concept_id', 'start_datetime']],
        procedure_dataset[['person_id', 'concept_id', 'start_datetime']],
        condition_occurrence_dataset[['person_id', 'concept_id', 'start_datetime']]
    ], ignore_index=True)
    
    df_medical_data['source_value'] = df_medical_data.apply(lambda row: get_medical_label_by_concept_id(row['concept_id']), axis=1)
    person_df = df_person[df_person['person_id'] == person_id]
    return df_medical_data, person_df

In [None]:
def plot_person_timeline(person, event_data):
    person_birth_datetime = person.iloc[0]['birth_datetime']
    person_id = person.iloc[0]['person_id']

    event_data['start_datetime'] = pd.to_datetime(event_data['start_datetime'])
    event_data['age'] = (event_data['start_datetime'] - person_birth_datetime).dt.days / 365.25

    event_types = event_data['source_value'].unique()

    plt.figure(figsize=(35, 20))

    for event_type in event_types:
        events_of_type = event_data[event_data['source_value'] == event_type]
        plt.scatter(
            events_of_type['age'],
            [event_type] * len(events_of_type),
            label=event_type,
            s=100
        )

    plt.xlabel("Age (years)")
    plt.ylabel("Concepts")
    plt.grid(False)
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.17), ncol=7)

    plt.xticks(ticks=range(int(event_data['age'].min()), int(event_data['age'].max()) + 1, 1), labels=[''] * len(range(int(event_data['age'].min()), int(event_data['age'].max()) + 1, 1)))
    plt.yticks(ticks=[])
    
    folder_path = '../output_files/person_timeline_plots'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    plot_title = "person_timeline_id_" + str(person_id) + ".png"
    save_path = os.path.join(folder_path, plot_title)
    plt.savefig(save_path)

    plt.show()

In [None]:
def plot_person_timeline_generalized(person, event_data):
    person_birth_datetime = person.iloc[0]['birth_datetime']
    person_id = person.iloc[0]['person_id']

    event_data['start_datetime'] = pd.to_datetime(event_data['start_datetime'])
    event_data['age'] = (event_data['start_datetime'] - person_birth_datetime).dt.days / 365.25

    event_types = event_data['event_type'].unique()

    plt.figure(figsize=(20, 10))

    for event_type in event_types:
        events_of_type = event_data[event_data['event_type'] == event_type]
        plt.scatter(
            events_of_type['age'],
            [event_type] * len(events_of_type),
            label=event_type,
            s=100
        )

    plt.xlabel("Age (years)")
    plt.ylabel("Concepts")

    plt.grid(True, linestyle='--', linewidth=0.5)

    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.19), ncol=5)

    plt.xticks(ticks=range(int(event_data['age'].min()), int(event_data['age'].max()) + 1, 1), labels=[''] * len(range(int(event_data['age'].min()), int(event_data['age'].max()) + 1, 1)))
    
    folder_path = '../output_files/person_timeline_plots'
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    plot_title = "generalized_person_timeline_id_" + str(person_id) + ".png"
    save_path = os.path.join(folder_path, plot_title)
    plt.savefig(save_path)

    plt.show()

In [None]:
def get_medical_label_by_concept_id(concept_id):
    concept_id = int(concept_id)
    concept_labels = df_concept.loc[df_concept['concept_id'] == concept_id, 'concept_name'].values
    if len(concept_labels) > 0:
        return concept_labels[0][:50]
    else:
        return ""

In [None]:
def plot_initial_and_generalised_view(person_id):
    df_medical_data, person_df = get_person_dataset(person_id)
    df_medical_data = df_medical_data.drop(0)
    plot_person_timeline(person_df, df_medical_data)
    df_events = pd.DataFrame({'start_datetime': generalized_timeline[0], 'event_type': generalized_timeline[1]})
    plot_person_timeline_generalized(person_df, df_events)