In [None]:
%reset

In [None]:
import pandas as pd
import numpy as np

import sys, os

import seaborn as sns
import matplotlib.pyplot as plt


from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from scipy.stats import wasserstein_distance
from scipy import stats


# Analysis

In [None]:
os.chdir('/home/adam/adam/data/19012021/')
df = pd.read_csv('observational_data_8h_inclusion_all_outputs_1.csv')
df.start_timestamp = df.start_timestamp.astype('datetime64[ns]')
df.end_timestamp = df.end_timestamp.astype('datetime64[ns]')
#df.info(max_cols=200)


In [None]:
df[['fio2_inclusion_8h', 'peep_inclusion_8h', 'po2_inclusion_8h', 'pf_ratio_inclusion_8h']].describe()

INCLUDED = (df.pf_ratio_inclusion_8h > 0) & \
           (df.pf_ratio_inclusion_8h < 150) & \
           (df.peep_inclusion_8h >= 5) & \
           (df.fio2_inclusion_8h >= 60)
print(len(df.index))
df = df[INCLUDED]
print(len(df.index))
#df.filter(regex='outcome').iloc[:, 6:].info(max_cols=200)
df.info(max_cols=200)

#### 1. Bed rotation

In [None]:
patients = df['hash_patient_id'].unique().tolist()

In [None]:
from data_warehouse_utils.dataloader import DataLoader

dl = DataLoader()

In [None]:
df_position = dl.get_range_measurements(patients=patients,
                                        parameters=['position'],
                                        sub_parameters=['position_body'])

In [None]:
print(df_position['effective_value'].value_counts())

In [None]:
df_position_new = dl.get_range_measurements(patients=patients,
                                            parameters=['position'],
                                            sub_parameters=['position_bed'])

In [None]:
df_position_new = df_position_new.loc[(df_position_new.effective_value == '30_degrees') |
                                      (df_position_new.effective_value == '45_degrees') |
                                      (df_position_new.effective_value == 'bed_chair'),
                              ['start_timestamp', 'hash_patient_id']]

In [None]:
df_position_new.head()

In [None]:
print(df_position_new['pacmed_subname'].value_counts())
print(df_position_new['effective_value'].value_counts())

In [None]:
print('Position body:', df_position_new.loc[df_position_new.pacmed_subname == 'position_body', 'effective_value'].value_counts())
#print(df_position_new.loc[df_position_new.pacmed_subname == 'position_head', 'effective_value'].value_counts())
#print(df_position_new.loc[df_position_new.pacmed_subname == 'position_body_rotation', 'effective_value'].value_counts())
print('Position bed:', df_position_new.loc[df_position_new.pacmed_subname == 'position_bed', 'effective_value'].value_counts())
#print(df_position_new.loc[df_position_new.pacmed_subname == 'position_mobilization', 'effective_value'].value_counts())

#### 2. Number of short proning sessions

In [None]:
df_2 = df[['duration_hours', 'treated']]

In [None]:
print(len(df_2.index))
print(len(df_2[~df_2.treated & (df_2.duration_hours < 2)].index))
print(len(df_2[df_2.treated & (df_2.duration_hours < 2)].index))

In [None]:
print(len(df_2.index))
print(len(df_2[~df_2.treated & (df_2.duration_hours < 12)].index))
print(len(df_2[df_2.treated & (df_2.duration_hours < 12)].index))

#### 3. Do single patients generate many sessions?

In [None]:
print(df['hash_patient_id'].value_counts())
df['hash_patient_id'].value_counts().plot.hist()

In [None]:
df['hash_patient_id'].value_counts().describe(percentiles=[0.95, 0.99])


In [None]:
df['duration_hours'].describe(percentiles=[0.05, 0.95, 0.96, 0.97, 0.98, 0.99])

In [None]:
135 / 6371

In [None]:
2230 / 24


In [None]:
df[df.hash_patient_id == df['hash_patient_id'].value_counts().index[0]].sort_values(by=['start_timestamp']).info(max_cols=200)

#### 4. Number of patients in prone / supine

In [None]:
print(len(df['hash_patient_id'].unique()))
print(len(df.loc[df.treated, 'hash_patient_id'].unique()))
print(len(df.loc[~df.treated, 'hash_patient_id'].unique()))

#### 5. Extracting session data

In [None]:
os.chdir('/home/adam/adam/causal_inference')
from causal_inference.create_experiment.create_treatment import get_proning_data

In [None]:
df_proning = get_proning_data(dl)

In [None]:
df_proning.effective_value.value_counts()

In [None]:
df_proning_old = df_proning[df_proning.hash_patient_id.isin(patients)]
print(df_proning_old.effective_value.value_counts())

In [None]:
df_proning_old = df_proning_old[df_proning_old.effective_value == 'prone']

In [None]:
df_proning_old.head()

In [None]:
df_proning_old.duration_hours.describe()

In [None]:
df_proning_old = df_proning_old[df_proning_old.duration_hours > 2]

In [None]:
rotation = df_position_new.loc[df_position_new.effective_value == '30_degrees', ['start_timestamp', 'end_timestamp', 'hash_patient_id']]

In [None]:
rotation.head()

In [None]:
rotation.info()

In [None]:
def was_rotated(x, y, z, df):
    mask = (y <= df.start_timestamp) & (df.start_timestamp < z) & (df.hash_patient_id == x)
    df = df[mask].sort_values(by=['start_timestamp'], ascending=True)
    # return True if len(df.index) == 0 else False
    # return len(df.index)
    return 0 if len(df.index) == 0 else int(math.floor(((df.start_timestamp.iloc[0] - y).total_seconds()/(60*60))))

In [None]:
rotation_list = [was_rotated(x, y, z, rotation) for x, y, z in
            zip(df_proning_old['hash_patient_id'],
                df_proning_old['start_timestamp'],
                df_proning_old['end_timestamp'])]

In [None]:
rotation_list = [was_rotated(x, y, z, rotation) for x, y, z in
            zip(df.loc[:, 'hash_patient_id'],
                df.loc[:, 'start_timestamp'],
                df.loc[:, 'end_timestamp'])]

In [None]:
import math

error = []

for i in range(len(rotation_list)):
    if rotation_list[i] == 0:
        error.append(0)
    else:
        error.append(int(math.floor(rotation_list[i].total_seconds()/(60*60))))


In [None]:
print(len(error))
print(len(df.index))


In [None]:
error[(error < 8) & (error > 0) ].describe()

In [None]:
error[(error < 24) & (error > 0) ].describe()

In [None]:
245 / 1260

In [None]:
df['error'] = error

In [None]:
df.loc[(df.treated & (df.error > 0)), 'error'].describe()

In [None]:
df.loc[(df.treated & (df.error > 0) & (df.error < 24)), 'error'].describe()

In [None]:
df.loc[(df.treated & (df.error > 0) & (df.error == df.duration_hours)), 'error'].describe()

In [None]:
df.loc[(df.treated & (df.error > 0) & (df.error < 24)), 'duration_hours'] - df.loc[(df.treated & (df.error > 0) & (df.error < 24)), 'error']

In [None]:
df.loc[(df.treated & (df.error == False)), 'pf_ratio_12h_24h_manual_outcome']

In [None]:

df.info(max_cols=200)

In [None]:
diff = df.end_timestamp - df.start_timestamp

In [None]:
diff

In [None]:
import math

int(math.floor(diff.iloc[2].total_seconds()/(60*60)))