In [None]:
!pip install pandas
!pip install numpy
!pip install lifelines

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lifelines import ExponentialAFTFitter
from lifelines.utils import concordance_index

# Load your dataset
data = pd.read_csv('/data/Recurrence free survival in breast cancer patients.csv')

# Instantiate the ExponentialAFTFitter
exp_aft_fitter = ExponentialAFTFitter()
exp_aft_fitter.fit(data, duration_col='time', event_col='status')

# Display the model summary
exp_aft_fitter.print_summary()

# Calculate the cumulative hazard only at the last observed time for each individual
predicted_hazards = exp_aft_fitter.predict_cumulative_hazard(data).iloc[:, -1]  # Select last column only

# Check for and filter out NaNs from predicted_hazards (if any)
valid_idx = ~np.isnan(predicted_hazards)
cleaned_times = data['time'][valid_idx]
cleaned_status = data['status'][valid_idx]
cleaned_hazards = predicted_hazards[valid_idx]

# Calculate the C-index for model evaluation
c_index = concordance_index(cleaned_times, -cleaned_hazards, cleaned_status)
print(f"C-index: {c_index}")

# Generate time points for plotting
time_points = np.linspace(0, data['time'].max(), 100)

# Predict survival function for a single observation (e.g., the first row of the dataset)
survival_probs = exp_aft_fitter.predict_survival_function(data.iloc[0:1], times=time_points)

# Plot the Survival Function
plt.figure(figsize=(10, 6))
plt.plot(time_points, survival_probs.iloc[:, 0], label='Exponential AFT Survival Function')
plt.title('Survival Function (Exponential AFT Model)')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.grid(True)
plt.legend()
plt.show()

# Predict cumulative hazard function for a single observation (e.g., the first row of the dataset)
cumulative_hazards = exp_aft_fitter.predict_cumulative_hazard(data.iloc[0:1], times=time_points)

# Plot the Cumulative Hazard Function
plt.figure(figsize=(10, 6))
plt.plot(time_points, cumulative_hazards.iloc[:, 0], label="Exponential AFT Cumulative Hazard Function")
plt.title("Cumulative Hazard Function (Exponential AFT Model)")
plt.xlabel("Time")
plt.ylabel("Cumulative Hazard")
plt.grid(True)
plt.legend()
plt.show()