In [1]:
import pandas as pd
import numpy as np
from lifelines import KaplanMeierFitter, CoxPHFitter
from lifelines.statistics import logrank_test
from sksurv.ensemble import RandomSurvivalForest
from sksurv.metrics import concordance_index_censored
from sklearn.model_selection import train_test_split

df = pd.read_csv("Data/RADCURE_Clinical_v04_20241219.csv")

df = df.dropna(subset=["survival_time", "event", "age", "tumor_stage", "treatment_type"])
df["event"] = df["event"].astype(bool)

kmf = KaplanMeierFitter()

group1 = df[df["tumor_stage"] == "Stage I"]
group2 = df[df["tumor_stage"] == "Stage IV"]

kmf.fit(group1["survival_time"], group1["event"])
kmf.plot_survival_function()

kmf.fit(group2["survival_time"], group2["event"])
kmf.plot_survival_function()

logrank_test(group1["survival_time"], group2["survival_time"], event_observed_A=group1["event"], event_observed_B=group2["event"])

cph_df = df[["survival_time", "event", "age", "tumor_stage", "treatment_type"]].copy()
cph_df = pd.get_dummies(cph_df, drop_first=True)

cph = CoxPHFitter()
cph.fit(cph_df, duration_col="survival_time", event_col="event")
cph.print_summary()
cph.check_assumptions(cph_df)

rsf_df = df[["survival_time", "event", "age", "tumor_stage", "treatment_type"]].copy()
rsf_df = pd.get_dummies(rsf_df, drop_first=True)

X = rsf_df.drop(columns=["survival_time", "event"])
y = rsf_df[["event", "survival_time"]]
y_struct = np.array([(e, t) for e, t in zip(y["event"], y["survival_time"])], dtype=[("event", bool), ("time", float)])

X_train, X_test, y_train, y_test = train_test_split(X, y_struct, test_size=0.2, random_state=42)

rsf = RandomSurvivalForest(n_estimators=100, min_samples_split=10, min_samples_leaf=15, random_state=42)
rsf.fit(X_train, y_train)

c_index_rsf = concordance_index_censored(y_test["event"], y_test["time"], rsf.predict(X_test))[0]

cph_test = cph_df.drop(columns=["survival_time", "event"])
cph_pred = cph.predict_partial_hazard(cph_test)
c_index_cph = concordance_index_censored(df["event"], df["survival_time"], cph_pred)[0]

ModuleNotFoundError: No module named 'pandas'