## Python Log KaplanMeierFitter

In [1]:
## Import the KaplanMeierFitter from lifelines and pandas library
import pandas as pd
from lifelines import KaplanMeierFitter

In [2]:
## Data used
## Creat the dataframe and printing the top rows from data source
df = pd.read_sas('whas500.sas7bdat')
df.head()

Unnamed: 0,ID,AGE,GENDER,HR,SYSBP,DIASBP,BMI,CVD,AFB,SHO,CHF,AV3,MIORD,MITYPE,YEAR,LOS,DSTAT,LENFOL,FSTAT
0,1.0,83.0,0.0,89.0,152.0,78.0,25.54051,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,5.0,0.0,2178.0,0.0
1,2.0,49.0,0.0,84.0,120.0,60.0,24.023979,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,5.0,0.0,2172.0,0.0
2,3.0,70.0,1.0,83.0,147.0,88.0,22.1429,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,5.0,0.0,2190.0,0.0
3,4.0,70.0,0.0,65.0,123.0,76.0,26.63187,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,10.0,0.0,297.0,1.0
4,5.0,70.0,0.0,63.0,135.0,85.0,24.41255,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,6.0,0.0,2131.0,0.0


In [3]:
## Example Code
## Choose the right columns for analysis
df = df[["LENFOL", "AFB", "FSTAT"]]

In [4]:
## Change follow-up days to years for better visualization
df['LENFOL'] = df['LENFOL']/365.25
df['LENFOL'] = df['LENFOL'].round(decimals = 2)

In [5]:
## Create of two dataframe when AFB=1.0 and AFB=0.0
afb_yes = df[df['AFB'] == 1.0]
afb_no  = df[df['AFB'] == 0.0]

In [6]:
## Create Kaplan-Meier estimator
kmf_yes = KaplanMeierFitter()
kmf_no = KaplanMeierFitter()

In [7]:
## Fit Kaplan-Meier estimator to data
kmf_yes.fit(afb_yes['LENFOL'], event_observed=afb_yes['FSTAT'])
kmf_no.fit(afb_no['LENFOL'], event_observed=afb_no['FSTAT'])

<lifelines.KaplanMeierFitter:"KM_estimate", fitted with 422 total observations, 254 right-censored observations>

In [8]:
## Calculate confidence interval for the survival function
confidence_interval_yes = kmf_yes.confidence_interval_survival_function_
confidence_interval_no = kmf_no.confidence_interval_survival_function_

In [9]:
## The estimated survival function if AFB=1.0
kmf_yes.survival_function_

Unnamed: 0_level_0,KM_estimate
timeline,Unnamed: 1_level_1
0.00,0.974359
0.01,0.935897
0.02,0.910256
0.03,0.884615
0.04,0.858974
...,...
5.54,0.314880
5.65,0.314880
5.70,0.314880
5.96,0.314880


In [13]:
## The estimated survival function if AFB=0.0
kmf_no.survival_function_

Unnamed: 0_level_0,KM_estimate
timeline,Unnamed: 1_level_1
0.00,0.985782
0.01,0.957346
0.02,0.936019
0.03,0.924171
0.04,0.921801
...,...
5.94,0.459356
5.95,0.459356
6.00,0.459356
6.44,0.229678


In [14]:
## The lower and upper confidence intervals for the survival function when AFB=1.0 and AFB=0.0
confidence_interval_yes = kmf_yes.confidence_interval_survival_function_
confidence_interval_no = kmf_no.confidence_interval_survival_function_

In [15]:
## Calculate log transformation of the confidence interval
log_transformed_ci = confidence_interval_yes.applymap(lambda x: -1 * (1 / x - 1))

ZeroDivisionError: float division by zero

In [16]:
## Printing the result of confidence intervals for the survival function when AFB=1.0
print("Log Transformation of Confidence Interval ABF=Yes:")
confidence_interval_yes

Log Transformation of Confidence Interval ABF=Yes:


Unnamed: 0,KM_estimate_lower_0.95,KM_estimate_upper_0.95
0.00,0.901347,0.993525
0.01,0.852833,0.972806
0.02,0.820953,0.956175
0.03,0.789996,0.938225
0.04,0.759836,0.919304
...,...,...
5.54,0.195172,0.441634
5.65,0.195172,0.441634
5.70,0.195172,0.441634
5.96,0.195172,0.441634


In [14]:
## Printing the result of confidence intervals for the survival function when AFB=0.0
print("Log Transformation of Confidence Interval ABF=No:")
confidence_interval_no

Log Transformation of Confidence Interval ABF=No:


Unnamed: 0,KM_estimate_lower_0.95,KM_estimate_upper_0.95
0.00,0.968628,0.993587
0.01,0.933150,0.972911
0.02,0.908081,0.955672
0.03,0.894472,0.945764
0.04,0.891771,0.943761
...,...,...
5.94,0.345753,0.565624
5.95,0.345753,0.565624
6.00,0.345753,0.565624
6.44,0.021751,0.568185


In [15]:
## Predict of the KM_estimate survival at 1,3 and 5 year if AFB=1.0
kmf_yes.predict([1,3,5], interpolate = True).round(3)




1    0.639
3    0.455
5    0.315
Name: KM_estimate, dtype: float64

In [16]:
## KM_estimate on 1,2,3 quartile if AFB=1.0
kmf_yes.percentile([0.25,0.50,0.75])




Unnamed: 0,KM_estimate
0.25,6.43
0.5,2.37
0.75,0.26


In [17]:
## Median of the KM_estimate survival if AFB=1.0
kmf_yes.median_survival_time_

2.37

In [18]:
## Predict of the KM_estimate survival at 1,3 and 5 year if AFB=0.0
kmf_no.predict([1,3,5], interpolate = True).round(3)




1    0.739
3    0.642
5    0.530
Name: KM_estimate, dtype: float64

In [19]:
## KM_estimate on 1,2,3 quartile if AFB=0.0
kmf_no.percentile([0.25,0.50,0.75])




Unnamed: 0,KM_estimate
0.25,6.44
0.5,5.91
0.75,0.94


In [20]:
## Median of the KM_estimate survival if AFB=0.0
kmf_no.median_survival_time_

5.91