In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

from gbmsurv import (
    WeibGBMSurvivalModel,
    LogNormGBMSurvivalModel,
    GBMSurvivalModel,
)

# Example

In [3]:
n_samples = 1000

n_features = 5
features = np.random.rand(n_samples, n_features)

coefficients = np.array([10, 0.1, 1.e-7, -0.15, 1.e-7])
lambda_val = 1.e+2
v_lambda = 12
U = np.random.uniform(0, 1, n_samples)
time = (-np.log(U) / (lambda_val * np.exp(np.dot(features, coefficients)))) ** (1/v_lambda)

delta = np.random.choice([0, 1], size=n_samples, p=[0.2, 0.8])

target = np.zeros((2, len(time)))
target[1] = time

target[0] = delta
target = target.transpose()

target = np.array(list(map(tuple, target)), dtype=[('delta', '?'), ('time', 'f8')])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

## Weibull parametric model

In [5]:
weib = WeibGBMSurvivalModel(n_estimators=100, initial_params=False)
weib.fit(X_train, y_train)

predicted_proba = weib.predict(X_test)

In [6]:
weib.score(X_test, y_test)

0.8006327150921159

In [7]:
weib.get_params()

{'learning_rate': 0.1,
 'n_estimators': 100,
 'max_depth': 7,
 'random_seed': 42,
 'lambda_val': 1,
 'alpha': 0,
 'colsample_bytree': 1,
 'colsample_bylevel': 1,
 'colsample_bynode': 1,
 'max_leaves': 0,
 'max_bin': 256,
 'min_child_weight': 1,
 'subsample': 1,
 'initial_params': False}

## Log-normal parametric model

In [8]:
lognorm = LogNormGBMSurvivalModel(n_estimators=100, learning_rate=1e-3)
lognorm.fit(X_train, y_train)

predicted_proba = lognorm.predict(X_test)

In [9]:
lognorm.score(X_test, y_test)

0.8436821537125488

In [10]:
lognorm.get_params()

{'learning_rate': 0.001,
 'n_estimators': 100,
 'max_depth': 7,
 'random_seed': 42,
 'lambda_val': 1,
 'alpha': 0,
 'colsample_bytree': 1,
 'colsample_bylevel': 1,
 'colsample_bynode': 1,
 'max_leaves': 0,
 'max_bin': 256,
 'min_child_weight': 1,
 'subsample': 1,
 'initial_params': True}

## Non-parametric model

In [11]:
gbmsurv = GBMSurvivalModel(n_estimators=100, interval_grid='time_distribution_based')
gbmsurv.fit(X_train, y_train)

predicted_proba = gbmsurv.predict(X_test)

In [12]:
gbmsurv.score(X_test, y_test)

0.8775510204081632

In [13]:
gbmsurv.get_params()

{'learning_rate': 0.1,
 'n_estimators': 100,
 'max_depth': 7,
 'random_seed': 42,
 'lambda_val': 1,
 'alpha': 0,
 'num_intervals': 10,
 'colsample_bytree': 1,
 'colsample_bylevel': 1,
 'colsample_bynode': 1,
 'max_leaves': 0,
 'max_bin': 256,
 'min_child_weight': 1,
 'subsample': 1,
 'interval_grid': 'time_distribution_based'}