# Weighted concordance index

In [4]:
import pandas as pd

In [86]:
example_data = pd.DataFrame({
    "event_times": [1,2,0.5,3,4],
    "predicted_scores": [1,0,2,2,0.5],
    "event_observed": [1,1,1,0,0]})

In [87]:
example_data.sort_values("event_times")

Unnamed: 0,event_times,predicted_scores,event_observed
2,0.5,2.0,1
0,1.0,1.0,1
1,2.0,0.0,1
3,3.0,2.0,0
4,4.0,0.5,0


In [105]:
def weighted_concordance(event_times, predicted_scores, event_observed, samp_fraction):

    
    example_data = pd.DataFrame({
    "event_times": event_times,
    "predicted_scores": predicted_scores,
    "event_observed": event_observed}).sort_values("event_times").reset_index(drop = True)
    
    n_case_case = 0
    n_case_control = 0
    total_case_pairs = 0
    total_control_pairs = 0
    
    for i in range(len(example_data)):
        if example_data.at[i,"event_observed"]:
            for j in range(i+1,len(example_data)):
                if example_data.at[j,"event_observed"]:
                    if example_data.at[i,"predicted_scores"] > example_data.at[j,"predicted_scores"]:
                        n_case_case = n_case_case + 1
                    elif example_data.at[i,"predicted_scores"] == example_data.at[j,"predicted_scores"]:
                        n_case_case = n_case_case + 0.5
                    total_case_pairs = total_case_pairs + 1
                else:
                    if example_data.at[i,"predicted_scores"] > example_data.at[j,"predicted_scores"]:
                        n_case_control = n_case_control + 1
                    elif example_data.at[i,"predicted_scores"] == example_data.at[j,"predicted_scores"]:
                        n_case_control = n_case_control + 0.5
                    total_control_pairs = total_control_pairs + 1
    return((n_case_case + 1/samp_fraction * n_case_control )/ (total_case_pairs + 1/samp_fraction * total_control_pairs))

In [106]:
weighted_concordance(example_data["event_times"],example_data["predicted_scores"],example_data["event_observed"],0.5)

0.5333333333333333

In [109]:
# importing module
import sys

# appending a path
sys.path.append('C:/Users/thseale/Documents/Python Packages')
from lifelines.utils import concordance_index

In [111]:
concordance_index(example_data["event_times"],-example_data["predicted_scores"],example_data["event_observed"])

0.6111111111111112

In [112]:
weighted_concordance(example_data["event_times"],example_data["predicted_scores"],example_data["event_observed"],1)

0.6111111111111112

In [None]:
def concordance_index(event_times, predicted_scores, event_observed=None) -> float:
    """
    Calculates the concordance index (C-index) between a series
    of event times and a predicted score. The first is the real survival times from
    the observational data, and the other is the predicted score from a model of some kind.

    The c-index is the average of how often a model says X is greater than Y when, in the observed
    data, X is indeed greater than Y. The c-index also handles how to handle censored values
    (obviously, if Y is censored, it's hard to know if X is truly greater than Y).


    The concordance index is a value between 0 and 1 where:

    - 0.5 is the expected result from random predictions,
    - 1.0 is perfect concordance and,
    - 0.0 is perfect anti-concordance (multiply predictions with -1 to get 1.0)

    The calculation internally done is

    >>> (pairs_correct + 0.5 * pairs_tied) / admissable_pairs

    where ``pairs_correct`` is the number of pairs s.t. if ``t_x > t_y``, then ``s_x > s_y``, pairs,
    ``pairs_tied`` is the number of pairs where ``s_x = s_y``, and ``admissable_pairs`` is all possible pairs. The subtleties
    are in how censored observation are handled (ex: not all pairs can be evaluated due to censoring).


    Parameters
    ----------
    event_times: iterable
         a length-n iterable of observed survival times.
    predicted_scores: iterable
        a length-n iterable of predicted scores - these could be survival times, or hazards, etc. See https://stats.stackexchange.com/questions/352183/use-median-survival-time-to-calculate-cph-c-statistic/352435#352435
    event_observed: iterable, optional
        a length-n iterable censoring flags, 1 if observed, 0 if not. Default None assumes all observed.

    Returns
    -------
    c-index: float
      a value between 0 and 1.

    References
    -----------
    Harrell FE, Lee KL, Mark DB. Multivariable prognostic models: issues in
    developing models, evaluating assumptions and adequacy, and measuring and
    reducing errors. Statistics in Medicine 1996;15(4):361-87.

    Examples
    --------
    .. code:: python

        from lifelines.utils import concordance_index
        cph = CoxPHFitter().fit(df, 'T', 'E')
        concordance_index(df['T'], -cph.predict_partial_hazard(df), df['E'])

    """
    event_times, predicted_scores, event_observed = _preprocess_scoring_data(event_times, predicted_scores, event_observed)
    num_correct, num_tied, num_pairs = _concordance_summary_statistics(event_times, predicted_scores, event_observed)

    return _concordance_ratio(num_correct, num_tied, num_pairs)