# Anemia en Diálisis Peritoneal

In [None]:
from lifelines import CoxPHFitter
from lifelines import CoxTimeVaryingFitter
import matplotlib.pyplot as plt
import pandas as pd

import anemiaLevanteDP as a

## Load Data

In [None]:
#file_path = ['/home/jovyan/work/Data/LevanteDP/LevanteDPC.xls']
file_path = ['/home/jovyan/work/Data/LevanteDP/LevanteDPC.xls', '/home/jovyan/work/Data/LevanteDP/LevanteDPPeset.xlsx']

# Load data from excel file
dfs_base = a.load_excel_files(file_path)
merged_df = a.merge_dataframes(dfs_base['Analíticas'], dfs_base['Pacientes'], 'REGISTRO')

# Save them as pickle for fast loading in testing
merged_df.to_pickle('merged_df.pkl')
dfs_base['Ingresos'].to_pickle('Ingresos.pkl')

In [None]:
# Load the data
merged_df = pd.read_pickle('merged_df.pkl')
dfs_base['Ingresos'] = pd.read_pickle('Ingresos.pkl')

## Preprocesado de datos

In [None]:
# Cols to calculate baseline
baseline_cat_cols = ['SEXO', 'CKD_STAGE', 'ARRITMIA', 'ITU', 'TBC', 'DM', 'VC', 'DIVERT', 'NEO', 'CH', 'SIST', 'EPOC', 'CARDIO', 'VP', 'DISLIPEMIA', 'DIURETICO', 'CALCIOANTA', 'IECA', 'ARAII', 'BBLOQUEANTE', 'ABLOQUEANTE', 'ABBLOQUEANTE', 'AGONISTASC', 'VASODILATADOR']
baseline_num_cols = ['EDAD', 'CKD_CALC', 'GLUCOSA', 'UREA', 'CREATININA', 'URICO', 'SODIO', 'POTASIO', 'CALCIO', 'FOSFORO', 'HIERRO', 'TRANSFERRINA', 'IST', 'FERRITINA', 'COLESTEROL', 'TRIGLICERIDOS', 'HDL', 'LDL', 'LEUCOCITOS', 'NEUTROFILOS', 'LINFOCITOS', 'MONOCITOS', 'EOSINOFILOS', 'BASOFILOS', 'GRANULOCITOS', 'HEMATIES', 'HEMOGLOBINA', 'HEMATOCRITO', 'VCM', 'HCM', 'CHCM', 'PLAQUETAS', 'PLAQUETOCRITO', 'VPM']

# Cols to clean rows without info
basic_cols_lab = ['REGISTRO', 'FECHA', 'HEMOGLOBINA', 'INICIO_DP']
basic_cols_hosp = ['REGISTRO', 'FINGRESO']
basic_cols_per = ['REGISTRO', 'FECHA']

# Cols for cox model
bool_col_list = ['CARDIORENAL', 'PASO_A_HD', 'ARRITMIA', 'ITU', 'TBC', 'DM', 'VC', 'DIVERT', 'NEO', 'CH', 'SIST', 'EPOC', 'CARDIO', 'VP', 'DISLIPEMIA', 'FRAGNOS', 'DIURETICO', 'CALCIOANTA', 'IECA', 'ARAII', 'BBLOQUEANTE', 'ABLOQUEANTE', 'ABBLOQUEANTE', 'AGONISTASC', 'VASODILATADOR', 'OTROSFR', 'CIRUGIA']
covariate_list = ['HEMOGLOBINA', 'IST', 'HIERRO']
exclude_list = ['CARDIO']

In [None]:
# Clean NaNs for interesting columns
lab_df = a.clean_df(merged_df, basic_cols_lab, verbose=True)
lab_df = a.clean_df(lab_df, covariate_list, verbose=True)
hosp_df = a.clean_df(dfs_base['Ingresos'], basic_cols_hosp, verbose=True)
per_df = a.clean_df(dfs_base['Peritonitis'], basic_cols_per, verbose=True)

# Select years
lab_df, hosp_df, per_df = a.filter_by_year([lab_df, hosp_df, per_df], ['FECHA', 'FINGRESO', 'FECHA'], 2009, 2024)


# Add anemia column based on 'HEMOGLOBINA' and 'SEXO' specified in KDIGO guidelines
a.add_anemia_column(lab_df)
a.add_age_column(lab_df)
a.add_ckd_column(lab_df)

# Fill empty values for bool_cols
lab_df = a.bool_col_convert(lab_df, bool_col_list)

# Exclude patients with exclude_list
#lab_df = lab_df[lab_df['ICEDAD'] <= 11]
lab_df = a.exclude_patients(lab_df, exclude_list, verbose=True)

# Add days since the start of pd for every column in both dataframes
lab_df = a.add_days_since_start(lab_df, lab_df, 'FECHA')
hosp_df = a.add_days_since_start(lab_df, hosp_df, 'FINGRESO')
per_df = a.add_days_since_start(lab_df, per_df, 'FECHA')


# Now keep only the first year after the patients started PD
lab_df = a.filter_df(lab_df, 'days_since_start', 0, (365), verbose=True)
hosp_df = a.filter_df(hosp_df, 'days_since_start', 0, (365), verbose=True)
per_df = a.filter_df(per_df, 'days_since_start', 0, (365), verbose=True)

# Create the cox_df with all necessary data
cox_df = a.prepare_cox_df(lab_df, hosp_df, covariate_list)
cox_time_varying_df = a.cox_time_varying_prep(lab_df, hosp_df, covariate_list, 365)

cox_df_per = a.prepare_cox_df(lab_df, per_df, covariate_list)

In [None]:
print(lab_df['CARDIO'].head())

## 6.1 Cox hemoglobina - hospitalización

In [None]:
# Initialize the CoxPHFitter

cph_hosp = CoxPHFitter()

# Fit the data to the model
cph_hosp.fit(cox_df, duration_col='finish_days', event_col='event_col')

# Print the summary of the model
cph_hosp.print_summary()

# Plot the coefficients of the model
cph_hosp.plot()
plt.title('Coxph anemia - hospitalización')
plt.show()

In [None]:
a.calculate_residuals(cph_hosp, cox_df, martingale=True, schonenfeld=False)

In [None]:
a.cox_visualization(cph_hosp, cox_df, survival_function=False, baseline_survival=True, baseline_cumulative_hazard=True, assumption=True) 

## 6.3 Cox hemoglobina - peritonitis

In [None]:
# Initialize the CoxPHFitter

cph_per = CoxPHFitter()

# Fit the data to the model
cph_per.fit(cox_df_per, duration_col='finish_days', event_col='event_col')

# Print the summary of the model
cph_per.print_summary()

# Plot the coefficients of the model
cph_per.plot()
plt.show()

In [None]:
a.calculate_residuals(cph_per, cox_df_per, martingale=True, schonenfeld=False)

In [None]:
a.cox_visualization(cph_per, cox_df_per, survival_function=False, baseline_survival=True, baseline_cumulative_hazard=True, assumption=True) 

## Prevalencia de anemia en pacientes con ERC en DP

In [None]:
# Reload the full dataframe without filtering

anemia_cols = ['HEMOGLOBINA', 'HIERRO', 'IST', 'FERRITINA', 'REGISTRO', 'FECHA']
merged_df = pd.read_pickle('merged_df.pkl')
anemia_df = a.clean_df(merged_df, anemia_cols, verbose=True)
a.add_anemia_column(anemia_df)
anemia_df = a.filter_by_year([anemia_df], ['FECHA'], 2009, 2023)[0]
anemia_prevalence = a.anemia_prevalence(anemia_df, print_results=True, print_graph=True, tendency=True)

## Tendencias marcadores de anemia

In [None]:
# Calculate the average time between labs

time_between_labs = a.lab_freq_stats(anemia_df, print_avg=True, print_patient=False)

In [None]:
time_trend_cols = ['HEMOGLOBINA', 'HIERRO', 'IST', 'FERRITINA', 'CKD_CALC']

a.add_age_column(anemia_df)
a.add_ckd_column(anemia_df)

time_trend_df = a.time_trend_analysis(anemia_df, 64, time_trend_cols, 365, plot_results=True, t_test=True)

In [None]:
a.stationary_test(time_trend_df, col_str='_avg')

In [None]:
print(time_trend_df)

## Baseline

In [None]:
baseline = a.calculate_baseline(lab_df, baseline_cat_cols, baseline_num_cols)
a.print_baseline(baseline)