In [3]:
import numpy as np
import pandas as pd
import os
from datetime import datetime
import seaborn as sns
import pylab as pl
import matplotlib.ticker as ticker
from pandas.plotting import register_matplotlib_converters
from matplotlib import pyplot as plt
from matplotlib import colors
from pathlib import Path
from matplotlib.pyplot import MultipleLocator

import warnings
warnings.filterwarnings("ignore")

Load data and select participants

In [4]:
DATA_DIR = os.path.join("raw_data")
PLOT_DIR = os.path.join("plots", "paper")

In [5]:
def select_patient():
    daytime_entropy = pd.read_csv('All_activity_daytime_per_week_entropy.csv', usecols=['patient_id'])
    daytime_entropy_mk = pd.read_csv('All_activity_daytime_per_week_entropy_mk.csv', usecols=['patient_id'])
    daytime_entropy_mk_duration = pd.read_csv('All_activity_daytime_per_week_entropy_mk.csv', usecols=['patient_id'])
    daytime_entropy_production_rate = pd.read_csv('All_activity_daytime_per_week_entropy_production_rate.csv',
                                                  usecols=['patient_id'])

    night_entropy = pd.read_csv('All_activity_night_per_week_entropy.csv', usecols=['patient_id'])
    night_entropy_mk = pd.read_csv('All_activity_night_per_week_entropy_mk.csv', usecols=['patient_id'])
    night_entropy_mk_duration = pd.read_csv('All_activity_night_per_week_entropy_mk.csv', usecols=['patient_id'])
    night_entropy_production_rate = pd.read_csv('All_activity_night_per_week_entropy_production_rate.csv',
                                                usecols=['patient_id'])

    daytime_entropy_id = pd.DataFrame(daytime_entropy.patient_id.unique())
    daytime_entropy_id.columns = ['patient_id']
    daytime_entropy_id = daytime_entropy_id['patient_id'].values.tolist()

    daytime_entropy_mk_id = pd.DataFrame(daytime_entropy_mk.patient_id.unique())
    daytime_entropy_mk_id.columns = ['patient_id']
    daytime_entropy_mk_id = daytime_entropy_mk_id['patient_id'].values.tolist()


    daytime_entropy_production_rate_id = pd.DataFrame(daytime_entropy_production_rate.patient_id.unique())
    daytime_entropy_production_rate_id.columns = ['patient_id']
    daytime_entropy_production_rate_id = daytime_entropy_production_rate_id['patient_id'].values.tolist()

    night_entropy_id = pd.DataFrame(night_entropy.patient_id.unique())
    night_entropy_id.columns = ['patient_id']
    night_entropy_id = night_entropy_id['patient_id'].values.tolist()

    night_entropy_mk_id = pd.DataFrame(night_entropy_mk.patient_id.unique())
    night_entropy_mk_id.columns = ['patient_id']
    night_entropy_mk_id = night_entropy_mk_id['patient_id'].values.tolist()


    night_entropy_production_rate_id = pd.DataFrame(night_entropy_production_rate.patient_id.unique())
    night_entropy_production_rate_id.columns = ['patient_id']
    night_entropy_production_rate_id = night_entropy_production_rate_id['patient_id'].values.tolist()

    all_patients = pd.DataFrame(set(daytime_entropy_id) & set(daytime_entropy_mk_id)  & set(
        daytime_entropy_production_rate_id) & set(night_entropy_id) & set(night_entropy_mk_id) 
         & set(night_entropy_production_rate_id))
    all_patients.columns = ['patient_id']
    all_patients = all_patients.sort_values(['patient_id'])
    all_patients = all_patients.reset_index(drop=True)


    return all_patients.patient_id

Define Entropy Measures

In [6]:
def entropy_daytime(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "activity_daytime_per_week_entropy")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'entropy_week'])
    patient_activity = patient_activity[~(patient_activity['entropy_week'].isnull())]
    patient_activity.start_date = patient_activity.start_date.values.astype('datetime64[D]')

    patient_activity.entropy_week = (patient_activity.entropy_week - np.mean(patient_activity.entropy_week)) / (
        np.std(patient_activity.entropy_week))
    patient_activity['color'] = None
    patient_activity['color'].loc[patient_activity['entropy_week'].between(-9999, -0.67)] = '#87CEFA'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(-0.67, 0)] = '#1E90FF'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(0, 0.67)] = '#0000FF'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(0.67, 9999)] = '#000080'

    patient_activity = patient_activity[patient_activity['start_date'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['start_date'] < '2022-01-02']
    x = patient_activity.start_date.values.astype('datetime64[D]')

    y = []
    for m in range(len(patient_activity.entropy_week)):
        y.append(-14)

    return x, y, patient_activity.color


def entropy_night(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "activity_night_per_week_entropy")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'entropy_week'])
    patient_activity = patient_activity[~(patient_activity['entropy_week'].isnull())]
    patient_activity.start_date = patient_activity.start_date.values.astype('datetime64[D]')

    patient_activity.entropy_week = (patient_activity.entropy_week - np.mean(patient_activity.entropy_week)) / (
        np.std(patient_activity.entropy_week))
    patient_activity['color'] = None
    patient_activity['color'].loc[patient_activity['entropy_week'].between(-9999, -0.67)] = '#87CEFA'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(-0.67, 0)] = '#1E90FF'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(0, 0.67)] = '#0000FF'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(0.67, 9999)] = '#000080'

    patient_activity = patient_activity[patient_activity['start_date'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['start_date'] < '2022-01-02']
    x = patient_activity.start_date.values.astype('datetime64[D]')
    y = []
    for m in range(len(patient_activity.entropy_week)):
        y.append(-12)

    return x, y, patient_activity.color



def entropy_mk_daytime(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "activity_daytime_per_week_entropy_mk")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'entropy_week'])
    patient_activity = patient_activity[~(patient_activity['entropy_week'].isnull())]
    patient_activity.start_date = patient_activity.start_date.values.astype('datetime64[D]')
    patient_activity.entropy_week = (patient_activity.entropy_week - np.mean(patient_activity.entropy_week)) / (
        np.std(patient_activity.entropy_week))
    patient_activity['color'] = None
    patient_activity['color'].loc[patient_activity['entropy_week'].between(-9999, -0.67)] = '#87CEFA'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(-0.67, 0)] = '#1E90FF'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(0, 0.67)] = '#0000FF'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(0.67, 9999)] = '#000080'
    patient_activity = patient_activity[patient_activity['start_date'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['start_date'] < '2022-01-02']
    x = patient_activity.start_date.values.astype('datetime64[D]')
    y = []
    for m in range(len(patient_activity.entropy_week)):
        y.append(-10)

    return x, y, patient_activity.color


def entropy_mk_night(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "activity_night_per_week_entropy_mk")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'entropy_week'])

    patient_activity = patient_activity[~(patient_activity['entropy_week'].isnull())]
    patient_activity.start_date = patient_activity.start_date.values.astype('datetime64[D]')
    patient_activity.entropy_week = (patient_activity.entropy_week - np.mean(patient_activity.entropy_week)) / (
        np.std(patient_activity.entropy_week))
    patient_activity['color'] = None
    patient_activity['color'].loc[patient_activity['entropy_week'].between(-9999, -0.67)] = '#87CEFA'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(-0.67, 0)] = '#1E90FF'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(0, 0.67)] = '#0000FF'
    patient_activity['color'].loc[patient_activity['entropy_week'].between(0.67, 9999)] = '#000080'

    patient_activity = patient_activity[patient_activity['start_date'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['start_date'] < '2022-01-02']
    x = patient_activity.start_date.values.astype('datetime64[D]')
    y = []
    for m in range(len(patient_activity.entropy_week)):
        y.append(-8)

    return x, y, patient_activity.color


def entropy_production_rate_daytime(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "activity_daytime_per_week_entropy_production_rate_day")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'week', 'entropy_production'])
    patient_activity = patient_activity[~(patient_activity['entropy_production'].isnull())]
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity.entropy_production = (patient_activity.entropy_production - np.mean(patient_activity.entropy_production)) / (
        np.std(patient_activity.entropy_production))

    patient_activity['color'] = None
    patient_activity['color'].loc[patient_activity['entropy_production'].between(-9999, -0.67)] = '#87CEFA'
    patient_activity['color'].loc[patient_activity['entropy_production'].between(-0.67, 0)] = '#1E90FF'
    patient_activity['color'].loc[patient_activity['entropy_production'].between(0, 0.67)] = '#0000FF'
    patient_activity['color'].loc[patient_activity['entropy_production'].between(0.67, 9999)] = '#000080'
    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = []
    for m in range(len(patient_activity.entropy_production)):
        y.append(-6)

    return x, y, patient_activity.color


def entropy_production_rate_night(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "activity_night_per_week_entropy_production_rate_day")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'week', 'entropy_production'])
    patient_activity = patient_activity[~(patient_activity['entropy_production'].isnull())]
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity.entropy_production = (patient_activity.entropy_production - np.mean(patient_activity.entropy_production)) / (
        np.std(patient_activity.entropy_production))

    patient_activity['color'] = None
    patient_activity['color'].loc[patient_activity['entropy_production'].between(-9999, -0.67)] = '#87CEFA'
    patient_activity['color'].loc[patient_activity['entropy_production'].between(-0.67, 0)] = '#1E90FF'
    patient_activity['color'].loc[patient_activity['entropy_production'].between(0, 0.67)] = '#0000FF'
    patient_activity['color'].loc[patient_activity['entropy_production'].between(0.67, 9999)] = '#000080'

    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = []
    for m in range(len(patient_activity.entropy_production)):
        y.append(-4)

    return x, y, patient_activity.color


Define Healthcare-related Events

In [7]:
def label_depressed_anxiety(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "patients_labels")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'depressed_anxiety'])
    patient_activity['week'] = pd.to_datetime(patient_activity['start_date'], format='%Y/%m/%d')
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = -2*patient_activity.depressed_anxiety

    return x, y


def label_disturbed_sleep_pattern(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "patients_labels")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'disturbed_sleep_pattern'])
    patient_activity['week'] = pd.to_datetime(patient_activity['start_date'], format='%Y/%m/%d')
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = 0*patient_activity.disturbed_sleep_pattern

    return x, y


def label_agitation_irritability_aggression(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "patients_labels")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'agitation_irritability_aggression'])
    patient_activity['week'] = pd.to_datetime(patient_activity['start_date'], format='%Y/%m/%d')
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = 2*patient_activity.agitation_irritability_aggression

    return x, y


def label_accidental_fall(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "patients_labels")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'accidental_fall'])
    patient_activity['week'] = pd.to_datetime(patient_activity['start_date'], format='%Y/%m/%d')
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = 4*patient_activity.accidental_fall

    return x, y


def label_motor_function_behavior(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "patients_labels")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'motor_function_behavior'])
    patient_activity['week'] = pd.to_datetime(patient_activity['start_date'], format='%Y/%m/%d')
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = 6*patient_activity.motor_function_behavior

    return x, y


def label_period_of_confusion(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "patients_labels")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'period_of_confusion'])
    patient_activity['week'] = pd.to_datetime(patient_activity['start_date'], format='%Y/%m/%d')
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = 8*patient_activity.period_of_confusion

    return x, y


def label_uti(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "patients_labels")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'uti'])
    patient_activity['week'] = pd.to_datetime(patient_activity['start_date'], format='%Y/%m/%d')
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = 10*patient_activity.uti

    return x, y


def label_hospital(my_patient_id):
    data_dir = os.path.join(DATA_DIR, "patients_labels")
    patient_activity = pd.read_csv(os.path.join(data_dir, f"{my_patient_id}.csv"))
    patient_activity = pd.DataFrame(patient_activity, columns=['patient_id', 'start_date', 'hospital'])
    patient_activity['week'] = pd.to_datetime(patient_activity['start_date'], format='%Y/%m/%d')
    patient_activity.week = patient_activity.week.values.astype('datetime64[D]')
    patient_activity = patient_activity[patient_activity['week'] > '2021-07-01']
    patient_activity = patient_activity[patient_activity['week'] < '2022-01-02']
    x = patient_activity.week.values.astype('datetime64[D]')
    y = 12*patient_activity.hospital

    return x, y

Define Main function

In [8]:
def creat_dataset():
    all_patients = np.array(select_patient()).tolist()
    plot_path = os.path.join(PLOT_DIR, "paper_all_patients.png")

    print(f"{len(all_patients)} patients.")

    row_height = 3
    row_width = 10
    n_rows = len(all_patients)
    plt.figure()
    plt.tight_layout()
    fig, axs = plt.subplots(nrows=1, ncols=n_rows, figsize=(
        row_width*n_rows, row_height), sharex=False, sharey=False)
    fig.subplots_adjust(hspace=0, wspace=1)


    for i, patient_id in enumerate(all_patients[:]):
        print(i+1, '/', len(all_patients), patient_id)
        x_entropy_daytime, y_entropy_daytime, color_entropy_daytime = entropy_daytime(patient_id)
        x_entropy_night, y_entropy_night, color_entropy_night = entropy_night(patient_id)

        x_entropy_mk_daytime, y_entropy_mk_daytime, color_entropy_mk_daytime = entropy_mk_daytime(patient_id)
        x_entropy_mk_night, y_entropy_mk_night, color_entropy_mk_night = entropy_mk_night(patient_id)


        x_entropy_production_rate_daytime, y_entropy_production_rate_daytime, color_entropy_production_rate_daytime = entropy_production_rate_daytime(patient_id)
        x_entropy_production_rate_night, y_entropy_production_rate_night, color_entropy_production_rate_night = entropy_production_rate_night(patient_id)

        x_anxiety_depressed, y_anxiety_depressed = label_depressed_anxiety(patient_id)
        x_sleep, y_sleep = label_disturbed_sleep_pattern(patient_id)
        x_agitation, y_agitation = label_agitation_irritability_aggression(patient_id)
        x_fall, y_fall = label_accidental_fall(patient_id)
        x_motor, y_motor = label_motor_function_behavior(patient_id)
        x_confusion, y_confusion = label_period_of_confusion(patient_id)
        x_uti, y_uti = label_uti(patient_id)
        x_hospital, y_hospital = label_hospital(patient_id)

        # Figure parameters
        ax = axs[i]

        x_labels = ['Entropy for daytime', 'Entropy for night', 'ERFMK for daytime', 'ERFMK for night', 'EP for daytime', 'EP for night', 'Anxiety and depression', 'Disturbed sleep pattern', 'Agitation', 'Accidental fall', 'Motor function behaviour', 'Confusion', 'UTI', 'Hospital service']
        x_names = (-14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12)
        ax.vlines(x=[-13,-11,-9,-7,-5,-3,-1,1,3,5,7,9,11], ymin=min(x_entropy_daytime), ymax=max(x_entropy_daytime), color='white', lw=1)
        ax.set_xticks(x_names, x_labels, fontsize='x-small')

        for xtick in ax.get_xticklabels():
            xtick.set_rotation(90)
            xtick.set_fontsize(12)
        for ytick in ax.get_yticklabels():
            ytick.set_fontsize(12)

        ax.set_xlim(-16, 14)
        ax.tick_params(bottom=False, left=False, right=False)
        ax.patch.set_facecolor('#F0FFFF')  # background color
        ax.patch.set_alpha(0.4)  # Transparency
        ax.spines['right'].set_color('#FFFFFF')
        ax.spines['left'].set_color('#FFFFFF')
        ax.spines['right'].set_linewidth(1)
        ax.spines['left'].set_linewidth(1)

        if i>0:
            ax.spines['left'].set_visible(False)

        # Draw figures of features
        ax.scatter(y_entropy_daytime, x_entropy_daytime, c=color_entropy_daytime, s=25, marker='s')
        ax.scatter(y_entropy_night, x_entropy_night, c=color_entropy_night, s=25, marker='s')
        ax.scatter(y_entropy_mk_daytime, x_entropy_mk_daytime, c=color_entropy_mk_daytime, s=25, marker='s')
        ax.scatter(y_entropy_mk_night, x_entropy_mk_night, c=color_entropy_mk_night, s=25, marker='s')
        ax.scatter(y_entropy_production_rate_daytime, x_entropy_production_rate_daytime, c=color_entropy_production_rate_daytime, s=25, marker='s')
        ax.scatter(y_entropy_production_rate_night, x_entropy_production_rate_night, c=color_entropy_production_rate_night, s=25, marker='s')

        ax.scatter(y_anxiety_depressed, x_anxiety_depressed, s=20, marker='^', c='r')
        ax.scatter(y_sleep, x_sleep, s=20, marker='o', c='r')
        ax.scatter(y_agitation, x_agitation, s=20, marker='v', c='r')
        ax.scatter(y_fall, x_fall, s=20, marker='<', c='r')
        ax.scatter(y_motor, x_motor, s=20, marker='X', c='r')
        ax.scatter(y_confusion, x_confusion, s=20, marker='+', c='r')
        ax.scatter(y_uti, x_uti, s=20, marker='D', c='r')
        ax.scatter(y_hospital, x_hospital, s=20, marker="x", c='r')

    plt.savefig(plot_path, bbox_inches="tight", dpi=200)

In [None]:
creat_dataset()