In [None]:
%reset

In [None]:
import os

import numpy as np
import pandas as pd

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
os.chdir('/home/adam/adam/causal_inference')

from causal_inference.make_data.make_data import UseCaseLoader

from causal_inference.make_data.make_proning_sessions import *

In [None]:
dl = UseCaseLoader()

In [None]:
raw_data_path = '/home/adam/adam/data/causal_inference/data/raw/position_measurements.csv'
data_sessions_path = '/home/adam/adam/data/causal_inference/data/interim/unique_sessions_test.csv'

In [None]:
dl.get_position_measurements(path=raw_data_path)

In [None]:
dl.make_unique_sessions(load_path=raw_data_path,
                        save_path=data_sessions_path,
                        n_of_batches=10)

In [None]:
df = pd.read_csv(data_sessions_path)

In [None]:
row = df.iloc[1]

In [None]:
df.columns.to_list()

In [None]:
    ############
    ### LOAD ###
    ############

    # Load inclusion criteria measurements from the warehouse // don't load measurements close to the session end
df_measurements = load_measurements_to_split_supine_sessions(dl,
                                                                 hash_patient_id=row.hash_patient_id,
                                                                 parameters=['fio2', 'peep', 'po2', 'po2_arterial', 'po2_unspecified'],
                                                                 start_timestamp=row.start_timestamp,
                                                                 end_timestamp=row.end_timestamp)

In [None]:
df_measurements

In [None]:

df_measurements['timestamp_to_split'] = pd.to_datetime(df_measurements['effective_timestamp']).dt.floor('60min')

In [None]:
df_effective_timestamp = pd.pivot_table(df_measurements,
                                            values='effective_timestamp',
                                            index=['timestamp_to_split'], # timestamp to group measurements on
                                            columns='pacmed_name',
                                            aggfunc=aggfunc_last # take the last measurement (why not the first?)
                                            ).reset_index()

In [None]:
df_effective_timestamp

In [None]:
df_effective_timestamp['start_timestamp'] = df_effective_timestamp.max(axis=1)

In [None]:
    # Group measurements on 'timestamp_to_split
df_measurements = pd.pivot_table(df_measurements,
                                     values='numerical_value', # stores the value of each measurement
                                     index=['timestamp_to_split'],
                                     columns='pacmed_name',
                                     aggfunc=aggfunc_last # takes the last value
                                     ).reset_index()

In [None]:
df_measurements

In [None]:
df_measurements['start_timestamp'] = df_effective_timestamp['start_timestamp']

In [None]:
df_measurements = df_measurements.dropna(axis=0, how="any").reset_index(drop=False)

In [None]:
df_measurements

In [None]:
type(row.hash_session_id)

In [None]:
['index'] + list(set(df_measurements.columns.to_list()) & set(['fio2', 'peep', 'po2', 'po2_arterial', 'po2_unspecified']))

In [None]:
    ### CONVERT ###
    # Populate artificial supine session with values from the original supine session. (automate, loop through additional columns)
    if len(df_measurements.index) == 0:
        df_measurements = pd.DataFrame([])
    else:
        df_measurements.loc[:, 'hash_patient_id'] = patient_id
        df_measurements.loc[:, 'treated'] = False
        df_measurements.loc[:, 'pacmed_origin_hospital'] = pacmed_origin_hospital
        df_measurements.loc[:, 'end_timestamp'] = end_timestamp

        df_measurements.loc[:, 'duration_hours'] = df_measurements['end_timestamp'] - df_measurements['start_timestamp']
        df_measurements.loc[:, 'duration_hours'] = df_measurements['duration_hours'].astype('timedelta64[h]')
        df_measurements.loc[:, 'duration_hours'] = df_measurements['duration_hours'].astype('int')
        df_measurements.loc[:, 'hash_session_id'] = session_id
        df_measurements.loc[:, 'index'] = df_measurements.index
        df_measurements.loc[:, 'hash_session_id'] = df_measurements.loc[:, 'hash_session_id'].astype('str') + \
                                                    str('_') + \
                                                    df_measurements.loc[:, 'index'].astype('str')

    return df_measurements

In [None]:
def aggfunc_last(x):
    if len(x) > 1:
        x = x.iloc[-1]

    return x

In [None]:
def load_measurements_to_split_supine_sessions(dl,
                                               hash_patient_id:object,
                                               parameters,
                                               start_timestamp,
                                               end_timestamp):
        """Loads parameters.
        """
        # get measurements to split the sessions on - move timedelta here // don't load measurements close to the session end

        df = dl.get_single_timestamp(patients=[hash_patient_id],
                                     parameters=parameters,
                                     columns=['pacmed_name',
                                              'pacmed_subname',
                                              'numerical_value',
                                              'effective_timestamp'],
                                     from_timestamp=start_timestamp,
                                     to_timestamp=end_timestamp)

        # Group 'po2_arterial' and 'po2_unspecified' together
        if {'po2_arterial'}.issubset(set(parameters)):
            if len(df[df.pacmed_name == 'po2_arterial'].index) > 0:
                df.loc[df.pacmed_name == 'po2_arterial', 'pacmed_name'] = 'po2'
        if {'po2_unspecified'}.issubset(set(parameters)):
            if len(df[df.pacmed_name == 'po2_unspecified'].index) > 0:
                df.loc[df.pacmed_name == 'po2_unspecified', 'pacmed_name'] = 'po2'

        return df

