# Anemia en Diálisis Peritoneal

In [None]:
from lifelines import CoxPHFitter
from lifelines import CoxTimeVaryingFitter
from lifelines.utils import to_long_format
from lifelines.utils import add_covariate_to_timeline
import matplotlib.pyplot as plt
import pandas as pd
from sksurv.ensemble import RandomSurvivalForest

import anemiaLevanteDP as a

## Load Data

In [None]:
# Load the data
lab_base_df = pd.read_pickle('Analíticas.pkl')
pac__base_df = pd.read_pickle('Pacientes.pkl')
hosp_base_df = pd.read_pickle('Ingresos.pkl')
per_base_df = pd.read_pickle('Peritonitis.pkl')

# Merge labs with patients

merged_base_df = a.merge_dataframes(lab_base_df, pac__base_df, 'REGISTRO')

## Preprocesado de datos

In [None]:
# Cols to calculate baseline
baseline_cat_cols = ['SEXO', 'CKD_STAGE', 'ARRITMIA', 'ITU', 'TBC', 'DM', 'VC', 'DIVERT', 'NEO', 'CH', 'SIST', 'EPOC', 'CARDIO', 'VP', 'DISLIPEMIA', 'DIURETICO', 'CALCIOANTA', 'IECA', 'ARAII', 'BBLOQUEANTE', 'ABLOQUEANTE', 'ABBLOQUEANTE', 'AGONISTASC', 'VASODILATADOR']
baseline_num_cols = ['EDAD', 'CKD_CALC', 'GLUCOSA', 'UREA', 'CREATININA', 'URICO', 'SODIO', 'POTASIO', 'CALCIO', 'FOSFORO', 'HIERRO', 'TRANSFERRINA', 'IST', 'FERRITINA', 'COLESTEROL', 'TRIGLICERIDOS', 'HDL', 'LDL', 'LEUCOCITOS', 'NEUTROFILOS', 'LINFOCITOS', 'MONOCITOS', 'EOSINOFILOS', 'BASOFILOS', 'GRANULOCITOS', 'HEMATIES', 'HEMOGLOBINA', 'HEMATOCRITO', 'VCM', 'HCM', 'CHCM', 'PLAQUETAS', 'PLAQUETOCRITO', 'VPM']

# Cols to clean rows without info
basic_cols_lab = ['REGISTRO', 'FECHA', 'HEMOGLOBINA', 'HIERRO', 'TRANSFERRINA', 'IST', 'INICIO_DP', 'NACIMIENTO']
basic_cols_hosp = ['REGISTRO', 'FINGRESO']
basic_cols_per = ['REGISTRO', 'FECHA']

# Cols that must be deleted if it constains 0 as value

not_zero_cols = ['EDAD', 'GLUCOSA']

# Cols for cox model
bool_col_list = ['CARDIORENAL', 'PASO_A_HD', 'ARRITMIA', 'ITU', 'TBC', 'DM', 'VC', 'DIVERT', 'NEO', 'CH', 'SIST', 'EPOC', 'CARDIO', 'VP', 'DISLIPEMIA', 'FRAGNOS', 'DIURETICO', 'CALCIOANTA', 'IECA', 'ARAII', 'BBLOQUEANTE', 'ABLOQUEANTE', 'ABBLOQUEANTE', 'AGONISTASC', 'VASODILATADOR', 'OTROSFR', 'CIRUGIA']
covariate_list = ['HEMOGLOBINA', 'IST', 'HIERRO', 'FERRITINA']
exclude_list = ['TBC', 'CH', 'SIST', 'CARDIO', 'VP', 'VC', 'NEO', 'DIVERT']

In [None]:
# Clean NaNs for interesting columns
lab_df = a.clean_df(merged_base_df, basic_cols_lab, verbose=True)
lab_df = a.clean_df(lab_df, covariate_list, verbose=True)
hosp_df = a.clean_df(hosp_base_df, basic_cols_hosp, verbose=True)
per_df = a.clean_df(per_base_df, basic_cols_per, verbose=True)

# Select years
lab_df, hosp_df, per_df = a.filter_by_year([lab_df, hosp_df, per_df], ['FECHA', 'FINGRESO', 'FECHA'], 2010, 2023)

# Add anemia column based on 'HEMOGLOBINA' and 'SEXO' specified in KDIGO guidelines
a.add_anemia_column(lab_df)
a.add_age_column(lab_df)
a.add_ckd_column(lab_df)

# Delete rows that have 0 as values for not_zero_cols

lab_df = a.clean_zero_values(lab_df, not_zero_cols, verbose=True)

# Fill empty values for bool_cols
lab_df = a.bool_col_convert(lab_df, bool_col_list)

# Exclude patients with exclude_list
#lab_df = lab_df[lab_df['ICEDAD'] <= 11]
lab_df = a.exclude_patients(lab_df, exclude_list, verbose=True)

# Add days since the start of pd for every column in both dataframes
lab_df = a.add_days_since_start(lab_df, lab_df, 'FECHA')
hosp_df = a.add_days_since_start(lab_df, hosp_df, 'FINGRESO')
per_df = a.add_days_since_start(lab_df, per_df, 'FECHA')

# Now keep only the first year after the patients started PD
lab_df = a.filter_df(lab_df, 'days_since_start', 0, (365), verbose=True)
hosp_df = a.filter_df(hosp_df, 'days_since_start', 0, (365), verbose=True)
per_df = a.filter_df(per_df, 'days_since_start', 0, (365), verbose=True)

# Create the cox_df with all necessary data
cox_df = a.prepare_cox_df(lab_df, hosp_df, covariate_list)
cox_time_varying_df = a.cox_time_varying_prep(lab_df, hosp_df, covariate_list, 365)

cox_df_per = a.prepare_cox_df(lab_df, per_df, covariate_list)

## 6.1 Cox hemoglobina - hospitalización

In [None]:
# Initialize the CoxPHFitter

cph_hosp = CoxPHFitter()

# Fit the data to the model
cph_hosp.fit(cox_df, duration_col='finish_days', event_col='event_col')

# Print the summary of the model
cph_hosp.print_summary()

# Plot the coefficients of the model
cph_hosp.plot()
plt.title('Coxph anemia - hospitalización')
plt.show()

In [None]:
auc = a.evaluate_cox_model(cph_hosp, cox_df, 'finish_days', 'event_col', print_result=True)

In [None]:
a.calculate_residuals(cph_hosp, cox_df, martingale=True, schonenfeld=False)

In [None]:
a.cox_visualization(cph_hosp, cox_df, survival_function=False, baseline_survival=True, baseline_cumulative_hazard=True, assumption=True) 

### Cox IST stratification and ROC curve (covariates)

In [None]:
cph_models, aucs = a.stratified_cox(cox_df, 'finish_days', 'event_col', 'IST', [0, 20, 30, 999])

### Cox IST stratification and ROC curve (IST only, without covariates)

In [None]:
basic_cols_lab_strat = ['REGISTRO', 'FECHA', 'INICIO_DP', 'NACIMIENTO']
covariate_list_strat = ['IST']
bool_col_list_strat = []
exclude_list_strat = []
basic_cols_hosp_strat = ['REGISTRO', 'FINGRESO']



lab_strat_df = a.clean_df(merged_base_df, basic_cols_lab_strat, verbose=True)
hosp_strat_df = a.clean_df(pd.read_pickle('Ingresos.pkl'), basic_cols_hosp_strat, verbose=True)
lab_strat_df = a.clean_df(lab_strat_df, covariate_list_strat, verbose=True)

lab_strat_df = a.bool_col_convert(lab_strat_df, bool_col_list_strat)
lab_strat_df = a.exclude_patients(lab_strat_df, exclude_list_strat, verbose=True)

lab_strat_df = a.add_days_since_start(lab_strat_df, lab_strat_df, 'FECHA')
hosp_strat_df = a.add_days_since_start(lab_strat_df, hosp_strat_df, 'FINGRESO')

lab_strat_df = a.filter_df(lab_strat_df, 'days_since_start', 0, (365), verbose=True)
hosp_strat_df = a.filter_df(hosp_strat_df, 'days_since_start', 0, (365), verbose=True)

cox_strat_tsi_df = a.prepare_cox_df(lab_strat_df, hosp_df, covariate_list_strat)

cph_models, aucs = a.stratified_cox(cox_strat_tsi_df, 'finish_days', 'event_col', 'IST', [0, 20, 30, 999])

## 6.3 Cox hemoglobina - peritonitis

In [None]:
# Bin 'IST' and 'HIERRO' into discrete intervals
cox_df_per_bin = cox_df_per
cox_df_per_bin['IST_binned'] = pd.cut(cox_df_per_bin['IST'], bins=2, labels=False)
cox_df_per_bin['HIERRO_binned'] = pd.cut(cox_df_per_bin['HIERRO'], bins=2, labels=False)

# Initialize the CoxPHFitter
cph_per_bin = CoxPHFitter()

# Fit the data to the model with stratification on the binned variables
cph_per_bin.fit(cox_df_per_bin, duration_col='finish_days', event_col='event_col', strata=['IST_binned', 'HIERRO_binned'])

# Print the summary of the model
cph_per_bin.print_summary()

# Plot the coefficients of the model
cph_per_bin.plot()
plt.show()

#### Cox anemia-peritonitis residuals

In [None]:
a.calculate_residuals(cph_per_bin, cox_df_per_bin, martingale=True, schonenfeld=False)
a.cox_visualization(cph_per_bin, cox_df_per_bin, survival_function=False, baseline_survival=True, baseline_cumulative_hazard=True, assumption=True) 

### Cox anemia-peritonitis without binning

In [None]:
# Initialize the CoxPHFitter
cph_per = CoxPHFitter()

# Fit the data to the model with stratification on the binned variables
cph_per.fit(cox_df_per, duration_col='finish_days', event_col='event_col')

# Print the summary of the model
cph_per.print_summary()

# Plot the coefficients of the model
cph_per.plot()
plt.show()

#### Cox anemia - peritonitis without binning residuals

In [None]:
a.calculate_residuals(cph_per, cox_df_per, martingale=True, schonenfeld=False)
a.cox_visualization(cph_per, cox_df_per, survival_function=False, baseline_survival=True, baseline_cumulative_hazard=True, assumption=True) 

### Extended COX (covariates anemia - peritonitis)

In [None]:
extended_cox_df = a.prepare_extended_cox_df(lab_df, per_df, covariate_list, study_time=365)

# Initialize the CoxTimeVaryingFitter
ctv = CoxTimeVaryingFitter()

# Fit the data to the model
ctv.fit(extended_cox_df, id_col='REGISTRO', event_col='event_col', start_col='start_col', stop_col='finish_col')

# Print the summary of the model
ctv.print_summary()

# Plot the coefficients of the model
ctv.plot()
plt.show()

### Andersen-Gill anemia-peritonitis

In [None]:
# Prepare the data
# Clean NaNs for interesting columns
lab_per_df = a.clean_df(merged_base_df, basic_cols_lab, verbose=True)
lab_per_df = a.clean_df(lab_per_df, covariate_list, verbose=True)
per_df = a.clean_df(per_base_df, basic_cols_per, verbose=True)

# Select years
lab_per_df, per_df = a.filter_by_year([lab_per_df, per_df], ['FECHA', 'FECHA'], 2010, 2023)

# Add anemia column based on 'HEMOGLOBINA' and 'SEXO' specified in KDIGO guidelines
a.add_anemia_column(lab_per_df)
a.add_age_column(lab_per_df)
a.add_ckd_column(lab_per_df)

# Delete rows that have 0 as values for not_zero_cols

lab_per_df = a.clean_zero_values(lab_per_df, not_zero_cols, verbose=True)

# Fill empty values for bool_cols
lab_per_df = a.bool_col_convert(lab_per_df, bool_col_list)

# Exclude patients with exclude_list
exclude_per_list = ['ITU', 'EPOC', 'NEO', 'SIST']
#exclude_list = ['TBC', 'CH', 'SIST', 'CARDIO', 'VP', 'VC', 'NEO', 'DIVERT']
lab_per_df = a.exclude_patients(lab_per_df, exclude_per_list, verbose=True)

# Add days since the start of pd for every column in both dataframes
lab_per_df = a.add_days_since_start(lab_per_df, lab_per_df, 'FECHA')
per_df = a.add_days_since_start(lab_per_df, per_df, 'FECHA')

# Now keep only the first year after the patients started PD
lab_per_df = a.filter_df(lab_per_df, 'days_since_start', 0, (365), verbose=True)
per_df = a.filter_df(per_df, 'days_since_start', 0, (365), verbose=True)

ag_df = a.prepare_andersen_gill_df(lab_per_df, per_df, covariate_list, study_time=365)

# Initialize the CoxTimeVaryingFitter
ctv = CoxTimeVaryingFitter()

# Fit the data to the model
ctv.fit(ag_df, id_col='REGISTRO', event_col='event_col', start_col='start_col', stop_col='finish_col')

# Print the summary of the model
ctv.print_summary()

# Plot the coefficients of the model
ctv.plot()
plt.show()

### Extended COX diff filter

In [None]:
extended_cox_df = a.prepare_extended_cox_df(lab_per_df, per_df, covariate_list, study_time=365)

# Initialize the CoxTimeVaryingFitter
ctv = CoxTimeVaryingFitter()

# Fit the data to the model
ctv.fit(ag_df, id_col='REGISTRO', event_col='event_col', start_col='start_col', stop_col='finish_col')

# Print the summary of the model
ctv.print_summary()

# Plot the coefficients of the model
ctv.plot()
plt.show()

### Cox IST stratification and ROC curve (covariates hb-peritonitis)

## Random Survival Forest

In [None]:
rsf_df, y =a.prepare_rsf_df(lab_df, hosp_df, covariate_list)


rsf = RandomSurvivalForest(n_estimators=1000)
rsf.fit(rsf_df, y)

## Prevalencia de anemia en pacientes con ERC en DP

### Anemia prevalence stacked and by gener

In [None]:
anemia_cols = ['HEMOGLOBINA', 'HIERRO', 'IST', 'FERRITINA', 'REGISTRO', 'FECHA']
anemia_df = a.clean_df(merged_base_df, anemia_cols, verbose=True)
a.add_anemia_column(anemia_df)
anemia_df = a.filter_by_year([anemia_df], ['FECHA'], 2010, 2023)[0]

ale_results_readable, female_results_readable, total_results_readable, df = a.anemia_prevalence_stack(anemia_df, [10, 11, 13], [10, 11, 12], [2010, 2023])

## Tendencias marcadores de anemia

In [None]:
# Calculate the average time between labs

time_between_labs = a.lab_freq_stats(anemia_df, print_avg=True, print_patient=False)

In [None]:
time_trend_cols = ['HEMOGLOBINA', 'HIERRO', 'IST', 'FERRITINA', 'CKD_CALC']
basic_cols = ['REGISTRO', 'FECHA','HEMOGLOBINA', 'HIERRO', 'IST', 'FERRITINA', 'INICIO_DP', 'NACIMIENTO']

anemia_df = a.clean_df(merged_base_df, basic_cols, verbose=True)
a.add_age_column(anemia_df)
a.add_ckd_column(anemia_df)

time_trend_df = a.time_trend_analysis(anemia_df, 64, time_trend_cols, 365, plot_results=True, t_test=True, normality_test=False)

In [None]:
a.stationary_test(time_trend_df, col_str='_avg')

## Baseline

In [None]:
baseline = a.calculate_baseline(lab_df, baseline_cat_cols, baseline_num_cols)
a.print_baseline(baseline)