# INCREMENT-SOT CAZ-AVI

Análisis estadístico para la cohort **INCREMENT-SOT CAZ-AVI** para el estudio retrospectivo del tratamiento con CAZ-AVI frente al resto de terapias disponibles en diferentes centros.

## Control por Propensity-score

### Tratamiento BAT vs Tratamiento CAZ-AVI

In [3]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]

from pystats_utils.test.multivariant import LogisticRegression

result = LogisticRegression(dataframe = workDataframe,
                            classVariable = "Cohort_Cazavi",
                            targetVariables = ["Age", "Gender",
                                               "Biliary_stenosis", "CMV_disease",
                                               "Kidney_disease", "Post_transplant_dialysis",
                                               "Score_Charlson_AMN", "Acquisition",
                                               "SIRS_SevereORSchock", "Pitt_Score",
                                               "Source_infection_Urinary_tract",
                                               "Source_infection_Pneumonia",
                                               "SourceControl_before30days", "Surgical_debridement",
                                               "Carbapenemase", "Removal_Replacement_vascular_line",
                                               "Myocardial_Infarct", "Diabetes_Mellitus",
                                               "Chronic_Pulmonary_Disease"]).run()

result.prediction.name = "ps_cvsb"
dataframe = dataframe.join(result.prediction)
dataframe.to_excel("./test/database/database_ps.xlsx")

Optimization terminated successfully.
         Current function value: 0.550326
         Iterations 6


### Tratamiento CAZ-AVI mono vs Tratamiento CAZ-AVI combi

In [4]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Cazavi"] == "yes"]

from pystats_utils.test.multivariant import LogisticRegression

result = LogisticRegression(dataframe = workDataframe,
                            classVariable = "Cohort_Combi",
                            targetVariables = ["Age", "Gender",
                                               "Biliary_stenosis", "CMV_disease",
                                               "Kidney_disease", "Post_transplant_dialysis",
                                               "Score_Charlson_AMN", "Acquisition",
                                               "SIRS_SevereORSchock", "Pitt_Score",
                                               "Source_infection_Urinary_tract",
                                               "Source_infection_Pneumonia",
                                               "SourceControl_before30days", "Surgical_debridement",
                                               "Carbapenemase", "Removal_Replacement_vascular_line",
                                               "Myocardial_Infarct", "Diabetes_Mellitus",
                                               "Chronic_Pulmonary_Disease"]).run()

result.prediction.name = "ps_cvsm"
dataframe = dataframe.join(result.prediction)
dataframe.to_excel("./test/database/database_ps.xlsx")

         Current function value: 0.357314
         Iterations: 35


  result = func(self.values, **kwargs)


## Tablas

### Tabla 1

In [35]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]

from pystats_utils.pipeline import BivariantTable

result = BivariantTable(dataframe = workDataframe,
                        classVariable = "Cohort_Cazavi",
                        excludedVariables = ["Pt", "ps_cvsm", "Caso_REC_3digitos"]).run()

result.to_excel("tabla_1_raw.xlsx")

### Tabla 2.A

In [32]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]

from pystats_utils.test.multivariant import LogisticRegression

targetVariables = ["Gender",
                   "ICS_SOT_score",
                   "Cohort_Cazavi",
                   "ps_cvsb"]

result = LogisticRegression(dataframe = workDataframe,
                            classVariable = "Clinical_success_14d",
                            targetVariables = targetVariables).run()

result.params.to_excel("tabla_2a_params_raw.xlsx")
result.summary.to_excel("tabla_2a_metrics_raw.xlsx")

### Tabla 2.B

In [33]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]
workDataframe = workDataframe[workDataframe["ICS_SOT_score"] >= 8]

from pystats_utils.test.multivariant import LogisticRegression

targetVariables = ["Gender",
                   "Cohort_Cazavi",
                   "ps_cvsb"]

result = LogisticRegression(dataframe = workDataframe,
                            classVariable = "Clinical_success_14d",
                            targetVariables = targetVariables).run()

result.params.to_excel("tabla_2b_params_raw.xlsx")
result.summary.to_excel("tabla_2b_metrics_raw.xlsx")

### Tabla 3.A

In [10]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]

from pystats_utils.test.multivariant import LogisticRegression

targetVariables = ["Gender",
                   "ICS_SOT_score",
                   "Cohort_Cazavi",
                   "ps_cvsb"]

result = LogisticRegression(dataframe = workDataframe,
                            classVariable = "Clinical_success_30d",
                            targetVariables = targetVariables).run()

result.params.to_excel("tabla_3a_params_raw.xlsx")
result.summary.to_excel("tabla_3a_metrics_raw.xlsx")

### Tabla 3.B

In [9]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]
workDataframe = workDataframe[workDataframe["ICS_SOT_score"] >= 8]

from pystats_utils.test.multivariant import LogisticRegression

targetVariables = ["Gender",
                   "Cohort_Cazavi",
                   "ps_cvsb"]

result = LogisticRegression(dataframe = workDataframe,
                            classVariable = "Clinical_success_30d",
                            targetVariables = targetVariables).run()

result.params.to_excel("tabla_3b_params_raw.xlsx")
result.summary.to_excel("tabla_3b_metrics_raw.xlsx")

### Tabla 4.A

In [18]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]

from pystats_utils.test.multivariant import CoxPhRegression

targetVariables = ["Gender",
                   "Age",
                   "ICS_SOT_score",
                   "Cohort_Cazavi",
                   "ps_cvsb"]

result = CoxPhRegression(dataframe = workDataframe,
                         eventVariable = "Mortality_31d",
                         timeVariable = "Time2death_31d",
                         targetVariables = targetVariables).run()

result.params.to_excel("tabla_4a_raw.xlsx")

0.7627215551743853

### Tabla 4.B

In [19]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]
workDataframe = workDataframe[workDataframe["ICS_SOT_score"] >= 8]


from pystats_utils.test.multivariant import CoxPhRegression

targetVariables = ["Gender",
                   "Age",
                   "Cohort_Cazavi",
                   "ps_cvsb"]

result = CoxPhRegression(dataframe = workDataframe,
                         eventVariable = "Mortality_31d",
                         timeVariable = "Time2death_31d",
                         targetVariables = targetVariables).run()

result.params.to_excel("tabla_4b_raw.xlsx")

0.659217877094972

## Tablas suplementarias

### Tabla S1

0    1
1    2
2    3
dtype: int64

### Tabla S6

In [15]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]

from pystats_utils.pipeline import LogisticExploration

result = LogisticExploration(dataframe = workDataframe,
                             classVariable = "Clinical_success_14d",
                             excludedVariables = ["Pt", "Caso_REC_3digitos",
                                                  "Center"]).run()

result.to_excel("tabla_S6_raw.xlsx")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _

### Tabla S7

In [16]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]

from pystats_utils.pipeline import LogisticExploration

result = LogisticExploration(dataframe = workDataframe,
                             classVariable = "Clinical_success_30d",
                             excludedVariables = ["Pt", "Caso_REC_3digitos",
                                                  "Center"]).run()

result.to_excel("tabla_S7_raw.xlsx")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, 

### Tabla S8

In [17]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]

from pystats_utils.pipeline import CoxExploration


result = CoxExploration(dataframe = workDataframe,
                        eventVariable = "Mortality_31d",
                        timeVariable = "Time2death_31d",
                        excludedVariables = ["Pt", "Caso_REC_3digitos",
                                             "Center"]).run()

result.to_excel("tabla_S8_raw.xlsx")


>>> events = df['Mortality_31d'].astype(bool)
>>> print(df.loc[events, 'Center_Code_Italy_2'].var())
>>> print(df.loc[~events, 'Center_Code_Italy_2'].var())

A very low variance means that the column Center_Code_Italy_2 completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.


  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)

>>> events = df['Mortality_31d'].astype(bool)
>>> print(df.loc[events, 'Center_Code_Spain_5'].var())
>>> print(df.loc[~events, 'Center_Code_Spain_5'].var())

A very low variance means that the column Center_Code_Spain_5 completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.


  result = func(self.values, **kwargs)

>>> events = df['Mortality_31d'].astype(bool)
>>> print(df.loc[even

### Tabla S10.A

In [13]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Cazavi"] == "yes"]


from pystats_utils.test.multivariant import LogisticRegression

targetVariables = ["Gender",
                   "ICS_SOT_score",
                   "Cohort_Combi",
                   "ps_cvsm"]

result = LogisticRegression(dataframe = workDataframe,
                            classVariable = "Clinical_success_30d",
                            targetVariables = targetVariables).run()

result.params.to_excel("tabla_S10a_params_raw.xlsx")
result.summary.to_excel("tabla_S10a_metrics_raw.xlsx")

### Tabla S10.B

In [21]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Cazavi"] == "yes"]

targetVariables = ["Gender",
                   "Age",
                   "ICS_SOT_score",
                   "Cohort_Combi",
                   "ps_cvsm"]

from pystats_utils.test.multivariant import CoxPhRegression

result = CoxPhRegression(dataframe = workDataframe,
                         eventVariable = "Mortality_31d",
                         timeVariable = "Time2death_31d",
                         targetVariables = targetVariables).run()

result.params.to_excel("tabla_S10b_raw.xlsx")
result.model.concordance_index_

0.8180574555403557

## Tabla sin numerar

### Tabla KPC Éxito clínico 14 días

In [20]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]
workDataframe = workDataframe[workDataframe["Carbapenemase"] == "KPC"]

from pystats_utils.pipeline import LogisticExploration

result = LogisticExploration(dataframe = workDataframe,
                             classVariable = "Clinical_success_14d",
                             excludedVariables = ["Pt", "Caso_REC_3digitos",
                                                  "Center"]).run()

result.to_excel("tabla_kpc_cs14d_raw.xlsx")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modif

### Tabla KPC Éxito clínico 30 días

In [21]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]
workDataframe = workDataframe[workDataframe["Carbapenemase"] == "KPC"]

from pystats_utils.pipeline import LogisticExploration

result = LogisticExploration(dataframe = workDataframe,
                             classVariable = "Clinical_success_30d",
                             excludedVariables = ["Pt", "Caso_REC_3digitos",
                                                  "Center"]).run()

result.to_excel("tabla_kpc_cs30d_raw.xlsx")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, 

### Tabla KPC Mortalidad 30 días

In [22]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]
workDataframe = workDataframe[workDataframe["Carbapenemase"] == "KPC"]

from pystats_utils.pipeline import CoxExploration


result = CoxExploration(dataframe = workDataframe,
                        eventVariable = "Mortality_31d",
                        timeVariable = "Time2death_31d",
                        excludedVariables = ["Pt", "Caso_REC_3digitos",
                                             "Center"]).run()

result.to_excel("tabla_kpc_m30d_raw.xlsx")


>>> events = df['Mortality_31d'].astype(bool)
>>> print(df.loc[events, 'Center_Code_Italy_2'].var())
>>> print(df.loc[~events, 'Center_Code_Italy_2'].var())

A very low variance means that the column Center_Code_Italy_2 completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.


  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)

>>> events = df['Mortality_31d'].astype(bool)
>>> print(df.loc[events, 'Clinical_success_30d_yes'].var())
>>> print(df.loc[~events, 'Clinical_success_30d_yes'].var())

A very low variance means that the column Clinical_success_30d_yes completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.


  result = func(self.values, **kwargs)

>>> events = df['Mortality_31d'].astype(bool)
>>> print(df.loc[events, 'SOT_type_Multiorgan

### Tabla OXA-48 Éxito clínico 14 días

In [23]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]
workDataframe = workDataframe[workDataframe["Carbapenemase"] == "OXA-48"]

from pystats_utils.pipeline import LogisticExploration

result = LogisticExploration(dataframe = workDataframe,
                             classVariable = "Clinical_success_14d",
                             excludedVariables = ["Pt", "Caso_REC_3digitos",
                                                  "Center"]).run()

result.to_excel("tabla_oxa_cs14d_raw.xlsx")

  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, 

### Tabla OXA-48 Éxito clínico 30 días

In [24]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]
workDataframe = workDataframe[workDataframe["Carbapenemase"] == "OXA-48"]

from pystats_utils.pipeline import LogisticExploration

result = LogisticExploration(dataframe = workDataframe,
                             classVariable = "Clinical_success_30d",
                             excludedVariables = ["Pt", "Caso_REC_3digitos",
                                                  "Center"]).run()

result.to_excel("tabla_oxa_cs30d_raw.xlsx")

  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(average, modifier, msg_start, len(result))
  result = func(self.values, **kwargs)
  _warn_prf(

### Tabla OXA-48 Mortalidad 30 días

In [25]:
import pandas as pd

dataframe = pd.read_excel("./test/database/database_ps.xlsx")
workDataframe = dataframe[dataframe["Cohort_Treatment"] == "yes"]
workDataframe = workDataframe[workDataframe["Carbapenemase"] == "OXA-48"]

from pystats_utils.pipeline import CoxExploration


result = CoxExploration(dataframe = workDataframe,
                        eventVariable = "Mortality_31d",
                        timeVariable = "Time2death_31d",
                        excludedVariables = ["Pt", "Caso_REC_3digitos",
                                             "Center"]).run()

result.to_excel("tabla_oxa_m30d_raw.xlsx")


>>> events = df['Mortality_31d'].astype(bool)
>>> print(df.loc[events, 'Center_Code_Italy_1'].var())
>>> print(df.loc[~events, 'Center_Code_Italy_1'].var())

A very low variance means that the column Center_Code_Italy_1 completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.


  result = func(self.values, **kwargs)

>>> events = df['Mortality_31d'].astype(bool)
>>> print(df.loc[events, 'Center_Code_Spain_4'].var())
>>> print(df.loc[~events, 'Center_Code_Spain_4'].var())

A very low variance means that the column Center_Code_Spain_4 completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.


  result = func(self.values, **kwargs)

>>> events = df['Mortality_31d'].astype(bool)
>>> print(df.loc[events, 'Center_Code_Spain_5'].var())
>>> print(df.loc[~events, 'Center_Code_Spain

In [1]:
import pandas as pd

data = pd.read_csv("./test/database/example_python.csv", sep = ",")

from pystats_utils.test.value_comparison import MannWhitneyUTest
from pystats_utils.test.normality import AgostinoTest
from pystats_utils.test.homocedasticity import BartlettTest

a = BartlettTest(dataframe = data,
                 classVariable = "Diabetes",
                 targetVariable = "Body_mass_index").run()

a



Result
	test: Bartlett Test
	context: Homocedasticity
	pvalue: {'no vs. type_1': 0.029068072758625264, 'no vs. type_2': 1.005763608822125e-35, 'type_1 vs. type_2': 0.03707808737746657}
	statistic: {'no vs. type_1': 4.763577357707832, 'no vs. type_2': 155.6575725212558, 'type_1 vs. type_2': 4.346821389064249}

In [1]:
from pystats_utils.pipeline import BivariantTable
import pandas as pd

data = pd.read_csv("./test/database/example_python.csv", sep = ",")

a = BivariantTable(dataframe = data,
                   classVariable = "Diabetes").run()

a

Unnamed: 0,Variable,All,no,type_1,type_2,P_value,Test,Variable_type,Normality,Homocedasticity
0,High_BP_yes,7046 (46.976),5025 (40.874),166 (60.364),1855 (76.337),0.0,Pearson Chi Square Test,categorical,,
0,High_cholesterol_yes,6755 (45.036),4918 (40.003),180 (65.455),1657 (68.189),0.0,Pearson Chi Square Test,categorical,,
0,Cholesterol_check_pre5y_yes,14437 (96.253),11754 (95.608),273 (99.273),2410 (99.177),0.0,Pearson Chi Square Test,categorical,,
0,Body_mass_index,28.58 (24.00 - 32.00),27.84 (24.00 - 31.00),31.13 (26.00 - 35.00),32.03 (27.00 - 36.00),0.0,Mann Whitney U Test,numerical,No,No
0,Smoker_yes,6779 (45.196),5422 (44.103),140 (50.909),1217 (50.082),0.0,Pearson Chi Square Test,categorical,,
0,Stroke_yes,742 (4.947),468 (3.807),20 (7.273),254 (10.453),0.0,Pearson Chi Square Test,categorical,,
0,Heart_disease_or_attack_yes,1605 (10.701),1027 (8.354),40 (14.545),538 (22.14),0.0,Pearson Chi Square Test,categorical,,
0,Physical_activity_pre30d_yes,11012 (73.418),9326 (75.858),179 (65.091),1507 (62.016),0.0,Pearson Chi Square Test,categorical,,
0,Fruits_diet_yes,9043 (60.291),7541 (61.339),146 (53.091),1356 (55.802),0.0,Pearson Chi Square Test,categorical,,
0,Veggies_diet_yes,12045 (80.305),10006 (81.389),208 (75.636),1831 (75.35),0.0,Pearson Chi Square Test,categorical,,


In [10]:
a = {"a": {"aa": 2,
           "ab": 3},
     "b": {"ba" : 4,
           "bb" : 1}}

print([i for j in a.values() for i in j.values()])

[2, 3, 4, 1]


In [21]:
"{:.3f}".format(0.0001)

'0.000'