# Data exploration

## Libraries

In [1]:
# activate line execution
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# general
import numpy as np
import pandas as pd
import math

# plotly
import plotly.express as px  # (version 4.7.0 or higher)
import plotly.graph_objects as go
from plotly.subplots import make_subplots

PATH_DATA = './resources/data1/'

## reading csv files

In [2]:
dfs=dict()

dfs['admissions'] = pd.read_csv(PATH_DATA+'core/admissions.csv')
dfs['admissions'].columns

dfs['patients'] = pd.read_csv(PATH_DATA+'core/patients.csv')
dfs['patients'].columns

dfs['transfers'] = pd.read_csv(PATH_DATA+'core/transfers.csv')
dfs['transfers'].columns

Index(['subject_id', 'hadm_id', 'admittime', 'dischtime', 'deathtime',
       'admission_type', 'admission_location', 'discharge_location',
       'insurance', 'language', 'marital_status', 'ethnicity', 'edregtime',
       'edouttime', 'hospital_expire_flag'],
      dtype='object')

Index(['subject_id', 'gender', 'anchor_age', 'anchor_year',
       'anchor_year_group', 'dod'],
      dtype='object')

Index(['subject_id', 'hadm_id', 'transfer_id', 'eventtype', 'careunit',
       'intime', 'outtime'],
      dtype='object')

## Variables

In [3]:
list_var_cat = {'admissions':['insurance','ethnicity','hospital_expire_flag'],
                'patients':['gender']
}
list_var_cont = {'patients':['anchor_age']}

# single-variable

In [24]:
px.pie(dfs['admissions'],names='ethnicity')

In [61]:

bars = []
for col in list_var_cat["admissions"]:
    bars.append(go.Bar(
    name=col,
    x=dfs['admissions'][col].value_counts().index,
    y=dfs['admissions'][col].value_counts().values
))
    
bars.append(go.Bar(
name="Gender",
x=dfs['patients']["gender"].value_counts().index,
y=dfs['patients']["gender"].value_counts().values
))
plot = go.Figure(data=bars)

plot.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=list([
                dict(label="All",
                     method="update",
                     args=[{"visible": [True, True, True, False]},
                           {"title": "All",
                            }]),
                dict(label="Insurance",
                     method="update",
                     args=[{"visible": [True, False, False, False]},
                           {"title": "Insurance",
                            }]),
                dict(label="Ethnicity",
                     method="update",
                     args=[{"visible": [False, True, False, False]},
                           {"title": "Ethnicity",
                            }]),
                dict(label="Alive",
                     method="update",
                     args=[{"visible": [False, False, True, False]},
                           {"title": "Alive",
                            }]),
                dict(label="Gender",
                     method="update",
                     args=[{"visible": [False, False, False, True]},
                           {"title": "Gender",
                            }]),
            ]),
        )
    ])

In [58]:
px.violin(dfs['patients']['anchor_age'], y="anchor_age", box=True)

# Double-variable

In [72]:
go.Figure(go.Scatter(
name="Gender",
x=dfs['admissions']["insurance"],
y=dfs['admissions']["ethnicity"],
mode='markers',
marker_size=[40, 60, 80, 100]    
))

# multi-variable

In [73]:
go.Heatmap(
    x = dfs['admissions'].columns,
    y = dfs['admissions'].index,
    z = np.array(dfs['admissions'])
    colorscale=px.colors.diverging.RdBu,
    zmin=-1,
    zmax=1
)

NameError: name 'corr' is not defined