In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lifelines import KaplanMeierFitter
from matplotlib.gridspec import GridSpec
import os

# Load Excel
df = pd.read_excel("your_data.xlsx")
df.columns = df.columns.str.strip()
df = df.dropna(subset=['DLI to FU', 'Death Status'])

# Time and Events
T = df['DLI to FU']
E = df['Death Status']
risk_times = np.arange(0, 11, 1)

# Calculation Number at risk
def get_at_risk(times, T_array):
    return [np.sum(T_array >= t) for t in times]

# Kaplan-Meier-Fitter
kmf = KaplanMeierFitter()
kmf.fit(T, event_observed=E)

# Layout with GridSpec
fig = plt.figure(figsize=(10, 8))
gs = GridSpec(4, 1, height_ratios=[4, 0.5, 1, 0.1], hspace=0.35)

ax_km = fig.add_subplot(gs[0])
ax_title = fig.add_subplot(gs[1])
ax_table = fig.add_subplot(gs[2])

# KM Curve
kmf.plot(ax=ax_km, ci_show=False, color='black', linewidth=2, label=f"Overall survival (n={len(df)})")

# Median Line
median_surv = kmf.median_survival_time_
ax_km.axhline(0.5, color='gray', linestyle='--', linewidth=1)
ax_km.axvline(median_surv, color='red', linestyle='--', linewidth=1.5, label=f"Median: {median_surv:.2f} years")

# Axis & Layout
ax_km.set_xlabel("Time from DLI infusion (years)", fontsize=14)
ax_km.set_ylabel("Overall survival probability", fontsize=14)
ax_km.set_xlim(0, 10)
ax_km.set_ylim(0, 1)
ax_km.set_xticks(risk_times)
ax_km.set_yticks(np.linspace(0, 1, 11))
ax_km.spines[['top', 'right']].set_visible(False)
ax_km.tick_params(labelsize=12)
ax_km.legend(frameon=False, fontsize=11)

# Title for table
ax_title.axis("off")
ax_title.text(0.5, 0.5, "Number at risk", fontsize=13, ha="center", va="center")

# Table
ax_table.axis("off")
at_risk_values = get_at_risk(risk_times, T)

table = ax_table.table(
    cellText=[at_risk_values],
    rowLabels=["All patients"],
    colLabels=[str(t) for t in risk_times],
    loc="center",
    cellLoc="center"
)
table.scale(1.2, 1.4)
table.auto_set_font_size(False)
table.set_fontsize(11)

# Remove Frame
for key, cell in table.get_celld().items():
    cell.set_linewidth(0)



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lifelines import KaplanMeierFitter
from matplotlib.gridspec import GridSpec
import os

# Load Excel
df = pd.read_excel("your_data.xlsx")
df.columns = df.columns.str.strip()
df = df.dropna(subset=["Time", "Death Status", "Diagnosis"])

# Mapping Diagnosis
diagnosis_map = {0: "AML", 1: "sAML", 2: "MDS", 3: "ALL"}
df["Diagnosis"] = df["Diagnosis"].map(diagnosis_map)

# Variables
time_col = "Time"
event_col = "Death Status"
group_col = "Diagnosis"
groups = sorted(df[group_col].unique())
colors = ["darkblue", "purple", "darkgreen", "darkred"]
risk_ticks = np.arange(0, 11, 1)

# Calculation Number at risk
def get_at_risk(kmf, times):
    out = []
    for t in times:
        rows = kmf.event_table[kmf.event_table.index <= t]
        val = rows.iloc[-1]["at_risk"] if not rows.empty else kmf.event_table.iloc[0]["at_risk"]
        out.append(int(val))
    return out

# Layout: GridSpec 
fig = plt.figure(figsize=(10, 9))
gs = GridSpec(4, 1, height_ratios=[4, 0.5, 1, 0.1], hspace=0.35)

ax_km = fig.add_subplot(gs[0])
ax_title = fig.add_subplot(gs[1])
ax_table = fig.add_subplot(gs[2])

# Kaplan-Meier-Fits and At-Risk-Tabele
kmf = KaplanMeierFitter()
risk_table_combined = pd.DataFrame(index=risk_ticks)

for i, grp in enumerate(groups):
    data = df[df[group_col] == grp]
    label = f"{grp} (n={len(data)})"
    
    kmf.fit(data[time_col], event_observed=data[event_col], label=label)
    kmf.plot(ax=ax_km, ci_show=False, color=colors[i], linewidth=2.5)
    
    # Median line
    median = kmf.median_survival_time_
    ax_km.axvline(median, ls='--', lw=1, color=colors[i], alpha=0.35)
    
    # At risk Calculation
    risk_table_combined[grp] = get_at_risk(kmf, risk_ticks)

# Axis & Layout
ax_km.set_xlabel("Time from DLI infusion (years)", fontsize=14)
ax_km.set_ylabel("Survival probability", fontsize=14)
ax_km.set_ylim(0, 1)
ax_km.set_xticks(risk_ticks)
ax_km.set_yticks(np.linspace(0, 1, 11))
ax_km.spines[["top", "right"]].set_visible(False)
ax_km.tick_params(labelsize=12)
ax_km.legend(frameon=False, fontsize=11, title="Diagnosis", title_fontsize=11)

# Title for Table
ax_title.axis("off")
ax_title.text(0.5, 0.5, "Number at risk", fontsize=13, ha="center", va="center")

# Table
ax_table.axis("off")
table_data = [risk_table_combined[grp].tolist() for grp in groups]
table = ax_table.table(cellText=table_data,
                       rowLabels=groups,
                       colLabels=[str(t) for t in risk_ticks],
                       loc="center",
                       cellLoc="center")
table.scale(1.2, 1.4)
table.auto_set_font_size(False)
table.set_fontsize(11)

# Remove Frame
for key, cell in table.get_celld().items():
    cell.set_linewidth(0)




In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test
from matplotlib.gridspec import GridSpec
import os

# Load Excel
df = pd.read_excel("your_data.xlsx")
df.columns = df.columns.str.strip()

# Mapping (0 = preemptive, 1 = therapeutic)
df["Preemptive vs. Therapeutic"] = df["Preemptive vs. Therapeutic"].map({
    0: "Preemptive DLI", 
    1: "Therapeutic DLI"
})

# Variable names
time_col = "Time"
event_col = "Death Status"
group_col = "Preemptive vs. Therapeutic"


# Groups and Colours
groups = sorted(df[group_col].unique())
colors = ["darkgreen", "purple"]
risk_ticks = np.arange(0, int(df[time_col].max()) + 1)

# Calculation Number at Risk 
def get_at_risk_direct(df_group, time_col, risk_times):
    return [(df_group[time_col] > t).sum() for t in risk_times]

# Layout with GridSpec (KM-Plot + "Number at risk"-Table)
fig = plt.figure(figsize=(10, 9))
gs = GridSpec(4, 1, height_ratios=[4, 0.5, 1, 0.1], hspace=0.35)

ax_km = fig.add_subplot(gs[0])
ax_title = fig.add_subplot(gs[1])
ax_table = fig.add_subplot(gs[2])

# KM-Fits & Number-at-risk-Table Preparation
kmf = KaplanMeierFitter()
risk_table_combined = pd.DataFrame(index=risk_ticks)

for i, grp in enumerate(groups):
    data = df[df[group_col] == grp]
    label = f"{grp} (n={len(data)})"
    
    # KM-Curve
    kmf.fit(data[time_col], event_observed=data[event_col], label=label)
    kmf.plot(ax=ax_km, ci_show=False, color=colors[i], linewidth=2.5)
    
    # Median Line
    median = kmf.median_survival_time_
    ax_km.axvline(median, ls='--', lw=1, color=colors[i], alpha=0.35)
    
    # Calculation Number at risk 
    risk_table_combined[grp] = get_at_risk_direct(data, time_col, risk_ticks)

# Log-rank Test
g1, g2 = [df[df[group_col] == g] for g in groups]
p = logrank_test(
    g1[time_col], g2[time_col],
    event_observed_A=g1[event_col],
    event_observed_B=g2[event_col]
).p_value

p_text = f"Log-rank p < 0.0001" if p < 0.0001 else f"Log-rank p = {p:.4f}"
ax_km.text(0.98, 0.06, p_text, ha="right", va="bottom",
           transform=ax_km.transAxes, fontsize=12,
           bbox=dict(facecolor="white", alpha=0.7, edgecolor="none"))

# Axis and Layout
ax_km.set_xlabel("Time from DLI infusion (years)", fontsize=14)
ax_km.set_ylabel("Survival probability", fontsize=14)
ax_km.set_ylim(0, 1)
ax_km.set_yticks(np.linspace(0, 1, 11))
ax_km.spines[["top", "right"]].set_visible(False)
ax_km.tick_params(labelsize=12)
ax_km.legend(frameon=False, fontsize=11, title="Treatment strategy", title_fontsize=11)

# Table Title
ax_title.axis("off")
ax_title.text(0.5, 0.5, "Number at risk", fontsize=13, ha="center", va="center")

# Table Number at risk
ax_table.axis("off")
table_data = [risk_table_combined[grp].tolist() for grp in groups]
table = ax_table.table(cellText=table_data,
                       rowLabels=groups,
                       colLabels=[""] * len(risk_ticks),
                       loc="center",
                       cellLoc="center")
table.scale(1.2, 1.4)
table.auto_set_font_size(False)
table.set_fontsize(11)

# Remove frame
for key, cell in table.get_celld().items():
    cell.set_linewidth(0)



In [None]:
import pandas as pd
from lifelines import CoxPHFitter

# Load Excel
df = pd.read_excel("your_data.xlsx")
df.columns = df.columns.str.strip()

# Time and Events
time_col = "Time"
event_col = "Death Status"

# Encode categorical variables
df_encoded = pd.get_dummies(df, columns=['Sex', 'GvHD'], drop_first=True)

# List of all variables except event and time  
excluded = [time_col, event_col, 'Diagnosis']
covariates = [col for col in df_encoded.columns if col not in excluded]

# Collect results
results = []

for cov in covariates:
    df_uni = df_encoded[[time_col, event_col, cov]].dropna()

    cph = CoxPHFitter()
    cph.fit(df_uni, duration_col=time_col, event_col=event_col)
    summary = cph.summary
    summary['Variable'] = cov
    results.append(summary)

# Collect and sort
univariate_results = pd.concat(results)
univariate_results = univariate_results.set_index('Variable')

# Output
print("\n Univariate Cox-Regression:")
print(univariate_results[['coef', 'exp(coef)', 'p', 'exp(coef) lower 95%', 'exp(coef) upper 95%']])


In [None]:
import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
import matplotlib.pyplot as plt

# Load Excel
df = pd.read_excel("your_data.xlsx")
df.columns = df.columns.str.strip()  # Remove extra spaces

# Clean and prepare data
required_cols = [
    'Death Status', 'Age', 'Sex', 'GvHD',
    'Time to Relapse', 'Preemptive vs. Therapeutic', 'Time'
]
df = df.dropna(subset=required_cols)

# Encode categorical variables
df = pd.get_dummies(df, columns=['Sex', 'GvHD'], drop_first=True)

# Variable Definition
selected_vars = [
    'Age',
    'Time to Relapse',
    'Preemptive vs. Therapeutic',
    'Sex_1',         
    'GvHD_1',  
    'Time',
    'Death Status'
]
df_model = df[selected_vars].copy()

# Fit Cox model
cph = CoxPHFitter()
cph.fit(df_model, duration_col='Time', event_col='Death Status')

# Summary
print(" Cox Proportional Hazards Model Summary:\n")
cph.print_summary(decimals=3)

# Plot
plt.figure(figsize=(8, 6))
cph.plot(hazard_ratios=True)
plt.title("Hazard Ratios with 95% Confidence Intervals", fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()
