In [1]:
import pandas as pd
import numpy as np
from math import *
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('../CSV/table.csv')
data[['time','join','surrender','death']]

Unnamed: 0,time,join,surrender,death
0,16.0,12.0,1.0,0.0
1,11.0,10.0,0.0,1.0
2,13.0,10.0,1.0,0.0
3,14.0,13.0,0.0,1.0
4,19.0,15.0,1.0,0.0
5,8.0,1.0,1.0,0.0
6,18.0,3.0,0.0,1.0
7,2.0,0.0,1.0,0.0
8,4.0,0.0,1.0,0.0
9,11.0,3.0,0.0,1.0


In [3]:
def get_event_times():
    x = {}
    e = []
    for times in data[data['death']==1]['time']:
        x[times] = 0 #Initialize the values at 0
        
    for times in data[data['death']==1]['time']:
        x[times] += 1 #Now increment to see how many there actually are
        
    a = [event_time for event_time in x]
    a.sort()
    
    for event_time in a:
        e.append([event_time, x[event_time]])
           
    return e


value_numberof = get_event_times()
value_numberof
# [[time, death_count], [time, death_count], ...]

[[2.0, 1],
 [4.0, 1],
 [5.0, 2],
 [7.0, 1],
 [8.0, 1],
 [11.0, 2],
 [14.0, 1],
 [17.0, 2],
 [18.0, 2]]

In [4]:
def joined_between(a,b):
    return len(data[data['join']<b][data['join']>=a])

def surrendered_between(a,b):
    return len(data[data['time']<b][data['time']>=a][data[data['time']<b][data['time']>=a]['surrender']==1])

In [5]:
def get_risk_set():
    r = []
    x = 0
    for join_time in data['join']:
        if join_time == 0:
            x+=1
    r.append(x)
    i = 0
    last = 1e-50 #Avoid certain equality with 0 issues
    for t in value_numberof:
        
        number_surrender = surrendered_between(last, t[0])
        number_join = joined_between(last, t[0])
        
        if i >= 1:
            number_died_last_period = deaths_were
            x = r[i] + number_join - number_surrender - number_died_last_period
            
        else: 
            x = r[i] + number_join - number_surrender
            
        r.append(x)
        
        deaths_were = t[1]
        i+=1
        last = t[0]
    
    return(r)

risk_set = get_risk_set()

In [6]:
risk_index = [0]

for event_times in value_numberof:
    risk_index.append(event_times[0])
    
event_index = risk_index[1:]

event_values = []

for event_deaths in value_numberof:
    event_values.append(event_deaths[1])

In [7]:
r = pd.DataFrame(risk_set, index=risk_index, columns = ['risk set'])
s = pd.DataFrame(event_values, index=event_index, columns = ['events'])

table = pd.DataFrame.merge(r,s,how='left',left_index=True,right_index=True)

In [8]:
kaplan_meier = []
kaplan_meier_index = event_index

last_value = 1

for time in kaplan_meier_index:
    estimate = last_value * (table['risk set'][time]-table['events'][time])/(table['risk set'][time])
    last_value = estimate
    kaplan_meier.append(estimate)
    
kaplan_meier = pd.DataFrame(kaplan_meier, index=kaplan_meier_index, columns = ['K-M Estimate'])

In [9]:
table = pd.DataFrame.merge(table, kaplan_meier, how='left', left_index=True, right_index=True)
table.index.name = 'x'

In [10]:
nelson_aalen = []
nelson_aalen_index = event_index

last_value = 0

for time in nelson_aalen_index:
    estimate = last_value + table['events'][time]/table['risk set'][time]
    last_value = estimate
    nelson_aalen.append(estimate)
    
nelson_aalen = pd.DataFrame(nelson_aalen, index=nelson_aalen_index, columns = ['N-A Estimate'])

In [11]:
table = pd.DataFrame.merge(table, nelson_aalen, how='left', left_index=True, right_index=True)

In [12]:
table

Unnamed: 0_level_0,risk set,events,K-M Estimate,N-A Estimate
x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,5,,,
2.0,9,1.0,0.888889,0.111111
4.0,15,1.0,0.82963,0.177778
5.0,14,2.0,0.711111,0.320635
7.0,11,1.0,0.646465,0.411544
8.0,8,1.0,0.565657,0.536544
11.0,7,2.0,0.40404,0.822258
14.0,6,1.0,0.3367,0.988925
17.0,7,2.0,0.2405,1.274639
18.0,4,2.0,0.12025,1.774639
