# Concordance, predictive ability, and explained variance

In [1]:
import numpy as np
import pandas as pd

from scipy.stats import norm, rankdata
from lifelines import CoxPHFitter

Read the data

In [2]:
DatOriginal = pd.read_csv("DatasetsMedicalResearch/Survival of multiple myeloma patients.dat", sep="\s+")
DatOriginal = DatOriginal.drop('patient', axis=1)

  DatOriginal = pd.read_csv("DatasetsMedicalResearch/Survival of multiple myeloma patients.dat", sep="\s+")


In [3]:
DatOriginal.head()

Unnamed: 0,time,status,age,sex,bun,ca,hb,pcells,protein
0,13,1,66,1,25,10,14.6,18,1
1,52,0,66,1,13,11,12.0,100,0
2,6,1,53,2,15,13,11.4,33,1
3,40,1,69,1,10,10,10.2,30,1
4,10,1,65,1,20,10,13.2,66,0


---

In [4]:
def ConcordDiscrimPerform(model, dat):

    n = len(dat)
    LogPartialHazard = model.predict_log_partial_hazard(dat)

    # Concordance indeces
    cindex = model.concordance_index_

    kstat = 0
    for i, etai in enumerate(LogPartialHazard[:-1]):
        for etaj in LogPartialHazard[i+1:]:
            if etai != etaj:
                kstat += 1/(1+np.exp(-np.abs(etaj-etai)))
    kstat = 2 * kstat / (n*(n-1))

    # Predictive ability
    dataux = dat.copy()
    dataux['NormalScore'] = norm.ppf((rankdata(LogPartialHazard)-3/8) / (n+1/4))
    dataux['NormalScore'] = dataux['NormalScore'] / np.sqrt(8/np.pi)

    dstat = CoxPHFitter().fit(dataux, "time", "status", formula="NormalScore").summary[['coef', 'se(coef)']].values[0]

    # Explained variance
    vP = np.var(LogPartialHazard)
    rP = vP / (vP+np.pi**2/6)

    d0 = dstat[0] / np.sqrt(8/np.pi)
    rD = d0**2 / (d0**2+np.pi**2/6)

    return pd.DataFrame(
        {
            'estimate': ['concordance', 'concordance', 'discrimination', 'performance', 'performance'],
            'value': [cindex, kstat, dstat[0], rP, rD],
            'std':[None, None, dstat[1], None, None],
        },
        index=['c_index', 'K_index', 'D_stat', 'R2_P', 'R2_D']
    )

---

In [5]:
Model = CoxPHFitter().fit(DatOriginal, "time", "status", formula="bun+hb")

ConcordDiscrimPerform(Model, DatOriginal).round(3)

Unnamed: 0,estimate,value,std
c_index,concordance,0.67,
K_index,concordance,0.675,
D_stat,discrimination,0.999,0.351
R2_P,performance,0.293,
R2_D,performance,0.192,


In [6]:
ModelFull = CoxPHFitter().fit(DatOriginal, "time", "status")

ConcordDiscrimPerform(ModelFull, DatOriginal).round(3)

Unnamed: 0,estimate,value,std
c_index,concordance,0.705,
K_index,concordance,0.687,
D_stat,discrimination,1.209,0.342
R2_P,performance,0.334,
R2_D,performance,0.259,
