In [1]:
import numpy as np
import pandas as pd
from equiflow import TableFlows, TableCharacteristics, TableDrifts

In [2]:
np.random.seed(42)
n = 100000
data = {
    'age': np.random.randint(18, 80, size=n),
    'sofa': np.random.choice([0,1,2,3,4,5,6,7,8,9,10,15, np.nan], size=n),
    'race': np.random.choice(['White', 'Black', 'Asian', 'Hispanic', None],
                             size=n),
    'sex': np.random.choice(['Male', 'Female'], size=n),
    'english': np.random.choice(['Fluent', 'Limited', np.nan, None], size=n),
}

for i in range(1, 11):
    data[f'var{i}'] = np.random.randn(n)

df = pd.DataFrame(data)

In [3]:
data_0 = df.copy()
data_1 = data_0.loc[data_0.english.notnull()]
data_2 = data_1.loc[data_1.sofa.notnull()]

# ef = EquiFlow(dfs = [data_0, data_1, data_2])



In [4]:
TableFlows(
    dfs = [data_0, data_1, data_2],
    label_suffix=True,
    thousands_sep=False,
)

TypeError: __init__() should return None, not 'DataFrame'

In [None]:
TableCharacteristics(
    dfs = [data_0, data_1, data_2],
    # categorical = [], # currently not supported
    categorical = ['race','sex', 'english'],
    nonnormal = ['sofa'],
    normal = ['age'],
    # nonnormal = [],
    # normal = [],
    format_cat = 'N (%)',
    # format_cont = 'Mean ± SD',
    format_normal = 'Mean',
    format_nonnormal='Median [IQR]',
    missingness = True,
    decimals = 1,
    label_suffix = True,
    thousands_sep = True,
    rename={'race': 'Race and Ethnicity',
            'english': 'English Proficiency',
            'sex':'Sex',
            'sofa': 'SOFA',
            'age': 'Age',  
            }
)

Unnamed: 0_level_0,Unnamed: 1_level_0,Cohort,Cohort,Cohort
Unnamed: 0_level_1,Unnamed: 1_level_1,0,1,2
Variable,Value,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Overall,,100000,50022,46148
"Race and Ethnicity, N (%)",Hispanic,"20,024 (20.024)","10,011 (20.013)","9,197 (19.929)"
"Race and Ethnicity, N (%)",Asian,"19,952 (19.952)","9,918 (19.827)","9,145 (19.817)"
"Race and Ethnicity, N (%)",Black,"20,066 (20.066)","10,088 (20.167)","9,323 (20.202)"
"Race and Ethnicity, N (%)",White,"19,931 (19.931)","9,978 (19.947)","9,218 (19.975)"
"Race and Ethnicity, N (%)",Missing,"20,027 (20.027)","10,027 (20.045)","9,265 (20.077)"
"Sex, N (%)",Male,"50,052 (50.052)","25,049 (50.076)","23,089 (50.033)"
"Sex, N (%)",Female,"49,948 (49.948)","24,973 (49.924)","23,059 (49.967)"
"Sex, N (%)",Missing,0 (0.0),0 (0.0),0 (0.0)
"English Proficiency, N (%)",Fluent,"25,134 (25.134)","25,134 (50.246)","23,223 (50.323)"


In [None]:
TableDrifts(
    dfs=[data_0, data_1, data_2],
    categorical = ['race','sex', 'english'],
    # categorical=[],
    # nonnormal = ['sofa'],
    # normal = ['age'],
    nonnormal = [],
    normal = [],
    # missingness = True,
    decimals = 3,
    # label_suffix = True,
    # thousands_sep = False,
    rename={'race': 'Race and Ethnicity',
            'english': 'English Proficiency',
            'sex':'Sex',
            'sofa': 'SOFA',
            'age': 'Age',  
            }
)

Unnamed: 0_level_0,Cohort Flow,0 to 1,1 to 2
Variable,Value,Unnamed: 2_level_1,Unnamed: 3_level_1
Overall,,,
Race and Ethnicity,Hispanic,0.0,0.002
Race and Ethnicity,Asian,0.003,0.0
Race and Ethnicity,Black,0.003,0.001
Race and Ethnicity,White,0.0,0.001
Sex,Male,0.0,0.001
Sex,Female,0.0,0.001
English Proficiency,Fluent,0.537,0.002
English Proficiency,Limited,0.532,0.002


In [None]:
prop1 = [0.501]
prop1[1:]

[]