###### Installing scikit-survival package

In [4]:
!pip install scikit-survival

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-survival
  Obtaining dependency information for scikit-survival from https://files.pythonhosted.org/packages/73/b2/2d15319792af55a0dbd8c3610461603c3bf4bba23b16c8763e47d3ddcc5b/scikit_survival-0.22.2-cp311-cp311-win_amd64.whl.metadata
  Using cached scikit_survival-0.22.2-cp311-cp311-win_amd64.whl.metadata (49 kB)
Collecting ecos (from scikit-survival)
  Obtaining dependency information for ecos from https://files.pythonhosted.org/packages/67/1f/165ca12f4b3de6bd3fe8b16695013d28e88808effeff37858427ae56f449/ecos-2.0.13-cp311-cp311-win_amd64.whl.metadata
  Using cached ecos-2.0.13-cp311-cp311-win_amd64.whl.metadata (8.2 kB)
Collecting osqp!=0.6.0,!=0.6.1 (from scikit-survival)
  Obtaining dependency information for osqp!=0.6.0,!=0.6.1 from https://files.pythonhosted.org/packages/5d/9a/6bcf0d1ada180ee36d2a813bf75473f5246884688a3f5e710ef755c1f35e/osqp-0.6.5-cp311-cp311-win_amd64.whl.metadata
  Usi

In [22]:
## Importing libraries

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder

from sksurv.datasets import load_gbsg2
from sksurv.preprocessing import OneHotEncoder
from sksurv.ensemble import RandomSurvivalForest

### Loading dataset

In [66]:
X, y = load_gbsg2()
X_df = pd.DataFrame(X)
y_df = pd.DataFrame(y)

event_indicator_train = np.array(y_df["cens"], dtype=bool)
time_of_event_train = np.array(y_df["time"], dtype=float)


y_df= np.array(list(zip(event_indicator_train, time_of_event_train)), dtype=[('event', bool), ('time', float)])
# print(y_df)

## create an instance of the ordinalEncoder class and fit_transform
Oencoder = OrdinalEncoder(categories=[['I','II','III']])
grade_str=Oencoder.fit_transform(X_df[['tgrade']])
X_df = X_df.drop("tgrade",axis=1)

## create an instance of the OneHotEncoder class and fit_transform
oneHotEncoder=OneHotEncoder()
X_df = oneHotEncoder.fit_transform(X_df)
X_df['tgrade']=grade_str

In [67]:
## splitting train and test data
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.25, random_state=20)

### Using the training data, we fit a Random Survival Forest comprising 1000 trees.

In [68]:
rsf = RandomSurvivalForest(
    n_estimators=1000, min_samples_split=10, min_samples_leaf=15, n_jobs=-1, random_state=20
)
rsf.fit(X_train, y_train)

### Checking the performance of the model by evaluating it on the test data.

In [69]:
rsf.score(X_test, y_test)

0.6759696016771488

### The Model performance is 67.6%

In [72]:
X_test_sorted = X_test.sort_values(by=["pnodes", "age"])

### Prediction

In [73]:
pd.Series(rsf.predict(X_test_sorted))

0       91.477609
1      102.897552
2       75.883786
3       58.014876
4       84.345308
          ...    
167    137.738642
168    153.487373
169    170.502092
170    171.210066
171    148.691835
Length: 172, dtype: float64

###### The predicted risk scores indicate that risk for the last patients is quite a bit higher than that of the first patients.