In [None]:
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import font_manager as fm, rcParams
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sksurv.svm import FastSurvivalSVM
from sksurv.metrics import concordance_index_censored, cumulative_dynamic_auc
from sklearn.metrics import roc_curve, auc

geo_path = "./dataset/GEO_clinical_genes.xlsx"
tcga_path = "./dataset/TCGA_clinical_genes.xlsx"
df = pd.read_excel(geo_path)

df['event'] = df['CSS'].apply(lambda x: 1 if x == 'Dead' else 0)  
df.drop(columns=['CSS'], inplace=True) 


categorical_vars = df.select_dtypes(include=['object', 'category']).columns.tolist()
df = pd.get_dummies(df, columns=categorical_vars, drop_first=True)


train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

X_train = train_df.drop(columns=['Survival_months', 'event']).values
X_test = test_df.drop(columns=['Survival_months', 'event']).values

y_train = np.array([(event, time) for event, time in zip(train_df['event'], train_df['Survival_months'])], dtype=[('event', '?'), ('time', '<f8')])
y_test = np.array([(event, time) for event, time in zip(test_df['event'], test_df['Survival_months'])], dtype=[('event', '?'), ('time', '<f8')])

In [None]:
svm_model = FastSurvivalSVM(alpha=0.1, rank_ratio=0.1, max_iter=100, tol=1e-5, random_state=42)
svm_model.fit(X_train, y_train)

train_preds = -svm_model.predict(X_train)
test_preds = -svm_model.predict(X_test)

c_index_train = concordance_index_censored(y_train['event'], y_train['time'], train_preds)[0]
c_index_test = concordance_index_censored(y_test['event'], y_test['time'], test_preds)[0]

print(f"Train C-index: {c_index_train:.4f}")
print(f"Test C-index: {c_index_test:.4f}")

In [None]:
time_points = [12, 36, 60]
plt.figure(figsize=(8, 6), dpi=500)

for t in time_points:
    y_true = ((test_df['Survival_months'] <= t) & (test_df['event'] == 1)).astype(int)
    fpr, tpr, _ = roc_curve(y_true, test_preds)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"{t} months (AUC = {roc_auc:.2f})")

plt.plot([0, 1], [0, 1], 'k--', lw=1)
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('SVM Survival Analysis: 1-year, 3-year, and 5-year ROC Curves')
plt.legend(loc='lower right')
plt.show()

In [None]:
t_min = 1
t_max = 60
t_points = np.arange(t_min, t_max + 1)  
auc_values = []

for t in t_points:
    y_true = ((test_df['Survival_months'] <= t) & (test_df['event'] == 1)).astype(int)
    if (y_true.sum() == 0) or (y_true.sum() == len(y_true)):
        auc_values.append(np.nan)
    else:
        fpr, tpr, thresholds = roc_curve(y_true, test_preds)
        auc_val = auc(fpr, tpr)
        auc_values.append(auc_val)

plt.figure(figsize=(8, 6), dpi=500)
plt.ylim([0.6, 0.9])
plt.plot(t_points, auc_values, marker='o', linestyle='-')
plt.xlabel('Time (months)')
plt.ylabel('AUC')
plt.title('SVM Survival Analysis: AUC Over Time')
plt.grid(True)
plt.show()

In [None]:
#  DataFrame
rsf_auc_df = pd.DataFrame({
    'Month': t_points,
    'SVM_AUC': auc_values
})

rsf_auc_df.to_excel("./Log/SVM_AUC_by_month.xlsx", index=False)