In [1]:
import pandas as pd
import plotly.plotly as py
import cufflinks as cf
from lifelines import KaplanMeierFitter
cf.go_offline()

In [2]:
data=pd.read_csv('survival.csv')
data=data.dropna()
data.head()

Unnamed: 0,ID,Name,Position,Age,Height(cm),Weight(kgs),Preferred Foot,Weak Foot,Nationality,Continent,...,LWB,LDM,CDM,RDM,RWB,LB,LCB,CB,RCB,RB
0,158023,L. Messi,RF,31,173.727522,72.137028,Left,4.0,Argentina,America,...,64,61,61,61,64,59,47,47,47,59
1,20801,Cristiano Ronaldo,ST,33,188.966778,83.025636,Right,4.0,Portugal,Europe,...,65,61,61,61,65,61,53,53,53,61
2,190871,Neymar Jr,LW,26,179.823225,68.0538,Right,5.0,Brazil,America,...,65,60,60,60,65,60,47,47,47,60
3,193080,De Gea,GK,27,195.062481,76.220256,Right,3.0,Spain,Europe,...,50,50,50,50,50,50,50,50,50,50
4,192985,K. De Bruyne,RCM,27,155.7452,69.868568,Right,5.0,Belgium,Europe,...,77,77,77,77,77,73,66,66,66,73


In [3]:
data['date_column'] = pd.to_datetime(data['Joined'])

In [4]:
data['year'] = pd.DatetimeIndex(data['Joined']).year
data['month'] = pd.DatetimeIndex(data['Joined']).month
data['day'] = pd.DatetimeIndex(data['Joined']).day

In [5]:
data.head()

Unnamed: 0,ID,Name,Position,Age,Height(cm),Weight(kgs),Preferred Foot,Weak Foot,Nationality,Continent,...,RWB,LB,LCB,CB,RCB,RB,date_column,year,month,day
0,158023,L. Messi,RF,31,173.727522,72.137028,Left,4.0,Argentina,America,...,64,59,47,47,47,59,2004-07-01,2004,7,1
1,20801,Cristiano Ronaldo,ST,33,188.966778,83.025636,Right,4.0,Portugal,Europe,...,65,61,53,53,53,61,2018-07-10,2018,7,10
2,190871,Neymar Jr,LW,26,179.823225,68.0538,Right,5.0,Brazil,America,...,65,60,47,47,47,60,2017-08-03,2017,8,3
3,193080,De Gea,GK,27,195.062481,76.220256,Right,3.0,Spain,Europe,...,50,50,50,50,50,50,2011-07-01,2011,7,1
4,192985,K. De Bruyne,RCM,27,155.7452,69.868568,Right,5.0,Belgium,Europe,...,77,73,66,66,66,73,2015-08-30,2015,8,30


In [6]:
retirement = []

for i in data['Age']:
    if i > 29:
        retirement.append(1)
    else:
        retirement.append(0)
        

data['Retirement'] = retirement

In [7]:
data['Year'] = pd.DatetimeIndex(data['Contract_Expiry']).year

In [8]:
finish = []

for i in data['Year']:
        if i > 2022:
            finish.append(2043 - i)
        else:
            finish.append(2043 - i)
        

data['Expire'] = finish

In [9]:
null_cols=data.isnull().sum()
null_cols[null_cols>0]

Series([], dtype: int64)

In [10]:
def survival(data, group_field, time_field, event_field):
    model = KaplanMeierFitter()
    results = []
    
    for i in data[group_field].unique():
        group = data[data[group_field]==i]
        T = group[time_field]
        E = group[event_field]
        model.fit(T, E, label=str(i))
        results.append(model.survival_function_)
    
    survival = pd.concat(results, axis=1)
    return survival

In [11]:
rates=survival(data, 'Position', 'Age', 'Retirement')
rates.iplot(kind='line', xTitle='Retiro (años)', 
            yTitle='Tasa de retiro', title='Tiempo de carrera por posición')

In [12]:
rates=survival(data, 'Position', 'Expire', 'Retirement')
rates.iplot(kind='line', xTitle='Retiro (años)', 
            yTitle='Tasa de retiro', title='Tiempo de carrera por posición')

In [13]:
rates=survival(data, 'Age', 'Expire', 'Retirement')
rates.iplot(kind='line', xTitle='Retiro (años)', 
            yTitle='Tasa de retiro', title='Tiempo de carrera por posición')

In [14]:
rates=survival(data, 'Nationality', 'Expire', 'Retirement')
rates.iplot(kind='line', xTitle='Retiro (años)', 
            yTitle='Tasa de retiro', title='Tiempo de carrera por posición')

In [15]:
rates=survival(data, 'Nationality', 'Age', 'Retirement')
rates.iplot(kind='line', xTitle='Retiro (años)', 
            yTitle='Tasa de retiro', title='Tiempo de carrera por posición')

In [16]:
rates=survival(data, 'Preferred Foot', 'Age', 'Retirement')
rates.iplot(kind='line', xTitle='Retiro (años)', 
            yTitle='Tasa de retiro', title='Tiempo de carrera por posición')

In [17]:
rates=survival(data, 'Inter_Rep', 'Year', 'Retirement')
rates.iplot(kind='line', xTitle='Retiro (años)', 
            yTitle='Tasa de retiro', title='Tiempo de carrera por posición')