## 生存分析

一般使用KM算法记性单一变量拟合，同事可以分变量预测效果。

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from onekey_algo import OnekeyDS as okds

# 设置绘图参数。
plt.rcParams['figure.figsize'] = (10.0, 8.0)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False

mydir = okds.survival
data = pd.read_csv(mydir)
data.head()

### KM Estimator
To estimate the survival function, we first will use the Kaplan-Meier Estimate, defined:

$\hat{S}(t) = \prod_{t_i \lt t} \frac{n_i - d_i}{n_i} $

where $d_i$ are the number of death events at time $t$ and $n_i$ is the number of subjects at risk of death just prior to time t.

In [None]:
from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()

### 定义生存时间和最终状态
Other ways to estimate the survival function in lifelines are discussed below.

For this estimation, we need the duration each leader was/has been in office, and whether or not they were observed to have left office (leaders who died in office or were in office in 2008, the latest date this data was record at, do not have observed death events)

In [None]:
T = data["duration"]
E = data[["result"]]

kmf.fit(T, event_observed=E)

In [None]:
from matplotlib import pyplot as plt

kmf.plot_survival_function()
plt.title('Survival function of political regimes')
plt.show()

In [None]:
ax = plt.subplot(111)

dem = (data["smoke"] == 1)

kmf.fit(T[dem], event_observed=E[dem], label="smoke")
kmf.plot_survival_function(ax=ax)

kmf.fit(T[~dem], event_observed=E[~dem], label="Non-smoke")
kmf.plot_survival_function(ax=ax)

plt.title("Lifespans of different global regimes");

In [None]:
import numpy as np

ax = plt.subplot(111)

t = np.linspace(20, 80, 41)
kmf.fit(T[dem], event_observed=E[dem], timeline=t, label="Democratic Regimes")
ax = kmf.plot_survival_function(ax=ax)

kmf.fit(T[~dem], event_observed=E[~dem], timeline=t, label="Non-democratic Regimes")
ax = kmf.plot_survival_function(ax=ax)

plt.title("Lifespans of different global regimes");

In [None]:
regime_types = data['Tstage'].unique()

for i, regime_type in enumerate(regime_types):
    ax = plt.subplot(2, 2, i + 1)

    ix = data['Tstage'] == regime_type
    kmf.fit(T[ix], E[ix], label=regime_type)
    kmf.plot_survival_function(ax=ax, legend=False)

    plt.title(regime_type)
    plt.xlim(20, 83)

    if i==0:
        plt.ylabel('Frac. in power after $n$ years')

plt.tight_layout()

In [None]:
kmf = KaplanMeierFitter().fit(T, E, label="all_regimes")
kmf.plot_survival_function(at_risk_counts=True)
plt.tight_layout()