In [1]:
%config Completer.use_jedi=False

In [2]:
import numpy as np
import pandas as pd

##### Helper functions

In [4]:
def same_col_type(df):
    for c in df.columns.tolist():
        df[c] = df[c].astype(float)
        
    return df

In [5]:
def compare_probs(df_f, df_p):
    pd.testing.assert_frame_equal(df_f, df_p)

# Choice Probabilities

In [6]:
def clean_csv_cp(df):
    """
    Remove any actions which are not viable.
    """
    #actions
    df = df[df.d_admit + df.d_reroute <= 15]
    
    return df

### Felix's Choice Probabilities

In [7]:
df_felix = pd.read_csv('../../choice_probabilities/CCP_felix.csv')
df_felix.tail()

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,CCP
159995,15,24,24,11,0.0
159996,15,24,24,12,0.0
159997,15,24,24,13,0.0
159998,15,24,24,14,0.0
159999,15,24,24,15,0.0


In [8]:
df_felix = same_col_type(df_felix)
df_felix = clean_csv_cp(df_felix)

In [9]:
df_felix.tail()

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,CCP
159809,15.0,24.0,13.0,1.0,0.0
159810,15.0,24.0,13.0,2.0,0.0
159824,15.0,24.0,14.0,0.0,0.0
159825,15.0,24.0,14.0,1.0,0.0
159840,15.0,24.0,15.0,0.0,0.0


### Paul's Choice Probabilities

In [10]:
df_paul = pd.read_csv('../../choice_probabilities/cp.txt', delimiter=',', header=None)
df_paul.columns = ['s_ED', 's_ICU', 'd_admit', 'd_reroute', 'CCP']
df_paul = same_col_type(df_paul)

In [11]:
df_paul.tail()

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,CCP
54395,15.0,24.0,13.0,1.0,0.0
54396,15.0,24.0,13.0,2.0,0.0
54397,15.0,24.0,14.0,0.0,0.0
54398,15.0,24.0,14.0,1.0,0.0
54399,15.0,24.0,15.0,0.0,0.0


In [42]:
df_paul.to_csv('../../choice_probabilities/CPP_paul.csv', index=False)

### Choice Probability Comparison
>* Values need to be sorted the same way first.

In [12]:
df_fsorted = df_felix.sort_values(['s_ED', 's_ICU', 'd_admit', 'd_reroute'], ascending=[True, True, True, True])
df_fsorted.reset_index(inplace=True, drop=True)

In [13]:
df_psorted = df_paul.sort_values(['s_ED', 's_ICU', 'd_admit', 'd_reroute'], ascending=[True, True, True, True])
df_psorted.reset_index(inplace=True, drop=True)

In [14]:
compare_probs(df_fsorted, df_psorted)

AssertionError: DataFrame.iloc[:, 4] are different

DataFrame.iloc[:, 4] values are different (6.13971 %)
[left]:  [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...]
[right]: [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...]

In [15]:
df_psorted[-136:-120]

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,CCP
54264,15.0,24.0,0.0,0.0,1.35214e-22
54265,15.0,24.0,0.0,1.0,6.6895099999999996e-21
54266,15.0,24.0,0.0,2.0,1.9502799999999998e-19
54267,15.0,24.0,0.0,3.0,5.31937e-18
54268,15.0,24.0,0.0,4.0,1.44222e-16
54269,15.0,24.0,0.0,5.0,3.91024e-15
54270,15.0,24.0,0.0,6.0,1.06017e-13
54271,15.0,24.0,0.0,7.0,2.8744e-12
54272,15.0,24.0,0.0,8.0,7.79326e-11
54273,15.0,24.0,0.0,9.0,2.11296e-09


In [16]:
df_fsorted[-136:-120]

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,CCP
54264,15.0,24.0,0.0,0.0,5.1399999999999995e-42
54265,15.0,24.0,0.0,1.0,5.1100000000000006e-39
54266,15.0,24.0,0.0,2.0,2.9899999999999998e-36
54267,15.0,24.0,0.0,3.0,1.64e-33
54268,15.0,24.0,0.0,4.0,8.92e-31
54269,15.0,24.0,0.0,5.0,4.8600000000000005e-28
54270,15.0,24.0,0.0,6.0,2.6500000000000003e-25
54271,15.0,24.0,0.0,7.0,1.4400000000000002e-22
54272,15.0,24.0,0.0,8.0,7.849999999999999e-20
54273,15.0,24.0,0.0,9.0,4.27e-17


# Transition Probability

In [39]:
def clean_csv_tp(df):
    """
    Remove any state-action pairs which are not viable.
    """
    df = df[(df.d_admit <= 24 - df.s_ICU) & (df.d_admit + df.d_reroute <= df.s_ED)]
    
    return df

### Felix's Transition Probabilities

In [18]:
df_felix_t = pd.read_csv('../../transition_probabilities/transition_prob_felix.csv')
df_felix_t.tail()

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,s_new_ED,s_new_ICU,transition_prob
63999995,15.0,24.0,24.0,15.0,11.0,24.0,3.0096799999999997e-26
63999996,15.0,24.0,24.0,15.0,12.0,24.0,3.0096799999999997e-26
63999997,15.0,24.0,24.0,15.0,13.0,24.0,3.0096799999999997e-26
63999998,15.0,24.0,24.0,15.0,14.0,24.0,3.0096799999999997e-26
63999999,15.0,24.0,24.0,15.0,15.0,24.0,3.0096799999999997e-26


In [19]:
df_felix_t = same_col_type(df_felix_t)
df_felix_t = clean_csv_tp(df_felix_t)

In [20]:
df_felix_t.tail()

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,s_new_ED,s_new_ICU,transition_prob
63846395,15.0,24.0,0.0,15.0,11.0,24.0,0.000824
63846396,15.0,24.0,0.0,15.0,12.0,24.0,0.000824
63846397,15.0,24.0,0.0,15.0,13.0,24.0,0.000824
63846398,15.0,24.0,0.0,15.0,14.0,24.0,0.000824
63846399,15.0,24.0,0.0,15.0,15.0,24.0,0.000824


### Paul's Transition Probabilities

In [21]:
df_paul_t = pd.read_csv('../../transition_probabilities/tp.txt', delimiter=',', header=None)
df_paul_t.columns = ['s_ED', 's_ICU', 'd_admit', 'd_reroute', 's_new_ED', 's_new_ICU', 'transition_prob']
df_paul_t = same_col_type(df_paul_t)

In [43]:
df_paul_t.head()

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,s_new_ED,s_new_ICU,transition_prob
0,0.0,0.0,0.0,0.0,0.0,0.0,0.777245
1,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,2.0,0.0
3,0.0,0.0,0.0,0.0,0.0,3.0,0.0
4,0.0,0.0,0.0,0.0,0.0,4.0,0.0


In [44]:
df_paul_t.to_csv('../../transition_probabilities/transition_prob_paul.csv', index=False)

### Transition Probability Comparison
>* Values need to be sorted the same way first.

In [23]:
df_fsorted_t = df_felix_t.sort_values(['s_ED', 's_ICU', 'd_admit', 'd_reroute', 's_new_ED', 's_new_ICU'], 
                                  ascending=[True, True, True, True, True, True])
df_fsorted_t.reset_index(inplace=True, drop=True)

In [24]:
df_psorted_t = df_paul_t.sort_values(['s_ED', 's_ICU', 'd_admit', 'd_reroute', 's_new_ED', 's_new_ICU'], 
                                  ascending=[True, True, True, True, True, True])
df_psorted_t.reset_index(inplace=True, drop=True)

In [25]:
compare_probs(df_felix_t, df_paul_t)

AssertionError: DataFrame.index are different

DataFrame.index values are different (99.99423 %)
[left]:  Int64Index([       0,        1,        2,        3,        4,        5,
                   6,        7,        8,        9,
            ...
            63846390, 63846391, 63846392, 63846393, 63846394, 63846395,
            63846396, 63846397, 63846398, 63846399],
           dtype='int64', length=6936000)
[right]: RangeIndex(start=0, stop=6936000, step=1)

In [37]:
df_fsorted_t[:10]

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,s_new_ED,s_new_ICU,transition_prob
0,0.0,0.0,0.0,0.0,0.0,0.0,0.001639
1,0.0,0.0,0.0,0.0,0.0,1.0,0.023011
2,0.0,0.0,0.0,0.0,0.0,2.0,0.0
3,0.0,0.0,0.0,0.0,0.0,3.0,0.0
4,0.0,0.0,0.0,0.0,0.0,4.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0
6,0.0,0.0,0.0,0.0,0.0,6.0,0.0
7,0.0,0.0,0.0,0.0,0.0,7.0,0.0
8,0.0,0.0,0.0,0.0,0.0,8.0,0.0
9,0.0,0.0,0.0,0.0,0.0,9.0,0.0


In [38]:
df_psorted_t[:10]

Unnamed: 0,s_ED,s_ICU,d_admit,d_reroute,s_new_ED,s_new_ICU,transition_prob
0,0.0,0.0,0.0,0.0,0.0,0.0,0.777245
1,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,2.0,0.0
3,0.0,0.0,0.0,0.0,0.0,3.0,0.0
4,0.0,0.0,0.0,0.0,0.0,4.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0
6,0.0,0.0,0.0,0.0,0.0,6.0,0.0
7,0.0,0.0,0.0,0.0,0.0,7.0,0.0
8,0.0,0.0,0.0,0.0,0.0,8.0,0.0
9,0.0,0.0,0.0,0.0,0.0,9.0,0.0
