In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test
from lifelines.utils import median_survival_times
import os

# Load Data
df = pd.read_excel("your_data.xlsx")
df.columns = df.columns.str.strip()

time_col = "Time"
event_col = "Survival"
group_col = "DLI vs 2nd allo-HCT"
df = df.dropna(subset=[time_col, event_col, group_col])
df[group_col] = df[group_col].map({0: "Therapeutic DLI", 1: "2nd allo-HCT"})

# Setup
colors = {"2nd allo-HCT": "#1f77b4", "Therapeutic DLI": "#d62728"}
time_bins = np.arange(0, int(df[time_col].max()) + 1, 1)
risk_table = pd.DataFrame(index=time_bins)

# Figure Setup
fig, (ax_km, ax_table) = plt.subplots(2, 1, figsize=(10, 8), gridspec_kw={"height_ratios": [4, 1]})
kmf = KaplanMeierFitter()
median_dict, ci_dict = {}, {}

# Kaplan-Meier-Kurve + Risk Table
for grp in ["2nd allo-HCT", "Therapeutic DLI"]:
    sub = df[df[group_col] == grp]
    kmf.fit(sub[time_col], sub[event_col], label=f"{grp} (n = {len(sub)})")
    kmf.plot(ax=ax_km, ci_show=True, ci_alpha=0.2, color=colors[grp], linewidth=2)

    # Median OS
    med = kmf.median_survival_time_
    ci_df = median_survival_times(kmf.confidence_interval_)
    lo, hi = ci_df.iloc[0]
    median_dict[grp] = med
    ci_dict[grp] = (lo, hi)

    # Number at risk calculation
    at_risk = [np.sum((sub[time_col] >= t)) for t in time_bins]
    risk_table[grp] = at_risk


# Log-rank
g1, g2 = [df[df[group_col] == g] for g in ["2nd allo-HCT", "Therapeutic DLI"]]
p = logrank_test(g1[time_col], g2[time_col], event_observed_A=g1[event_col], event_observed_B=g2[event_col]).p_value
ax_km.text(0.95, 0.05, f"Log-rank p = {p:.4f}", transform=ax_km.transAxes, ha="right", va="bottom",
           fontsize=13, bbox=dict(facecolor="white", edgecolor="grey", alpha=0.8))

# Styling
ax_km.set_xlabel("Time from intervention (years)", fontsize=14)
ax_km.set_ylabel("Overall survival probability", fontsize=14)
ax_km.set_ylim(0, 1)
ax_km.grid(axis="y", linestyle="--", alpha=0.4)
ax_km.spines[["top", "right"]].set_visible(False)
ax_km.tick_params(labelsize=12)
ax_km.legend(title="Treatment group", frameon=False, fontsize=12, title_fontsize=13)

# Number at Risk
ax_table.axis("off")
col_labels = [str(t) for t in risk_table.index]
table_data = [risk_table[grp].tolist() for grp in ["2nd allo-HCT", "Therapeutic DLI"]]
row_labels = ["2nd allo-HCT", "Therapeutic DLI"]
ax_table.table(cellText=table_data, rowLabels=row_labels, colLabels=col_labels,
               loc="center", cellLoc="center")

# Descriptive Statistic

print("\n Descriptive Statistic:")
for grp in ["2nd allo-HCT", "Therapeutic DLI"]:
    sub = df[df[group_col] == grp]
    n_total = len(sub)
    n_events = sub[event_col].sum()
    n_censored = n_total - n_events
    med_os = median_dict[grp]
    ci_lo, ci_hi = ci_dict[grp]

    print(f"\n {grp}")
    print(f"   - Patientenanzahl        : {n_total}")
    print(f"   - Ereignisse (Todesfälle): {n_events}")
    print(f"   - Zensiert               : {n_censored}")
    print(f"   - Median OS              : {med_os:.2f} Jahre "
          f"(95% CI: {ci_lo:.2f} – {ci_hi:.2f})")

plt.tight_layout()



In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.exceptions import ConvergenceError

# Load Data
df = pd.read_excel("your_data.xlsx")
df.columns = df.columns.str.strip()

# Time and event variable
time_col = "Time"
event_col = "Survival"


# Exclude Variables
excluded_vars = [time_col, event_col]

# Collect results
results = []

for col in df.columns:
    if col not in excluded_vars:
        temp_df = df[[time_col, event_col, col]].copy()
        cph = CoxPHFitter()
        try:
            cph.fit(temp_df, duration_col=time_col, event_col=event_col)
            summary = cph.summary.loc[col]
            results.append({
                "Variable": col,
                "Hazard Ratio": summary["exp(coef)"],
                "95% CI Lower": summary["exp(coef) lower 95%"],
                "95% CI Upper": summary["exp(coef) upper 95%"],
                "p-value": summary["p"]
            })
        except ConvergenceError:
            print(f"  Problem at '{col}' – skip.")
        except Exception as e:
            print(f"  Mistake at '{col}': {e}")

# Show collected results
cox_results = pd.DataFrame(results).sort_values(by="p-value")
print("\n Univariate Cox-Regressionsergebnisse:\n")
if not cox_results.empty:
    print(cox_results.to_string(index=False, float_format="%.3f"))
else:
    print(" No valid model could be calculated.")


In [None]:
import pandas as pd
from lifelines import CoxPHFitter

# Load Data
df = pd.read_excel("your_data.xlsx")
df.columns = df.columns.str.strip()
df = df[[time_col, event_col] + covs].copy()

#Columns
time_col  = "Time"
event_col = "Survival"
covs = ["Intensity", "Age", "Time to Relapse", "ECOG"]


# Make sure that all variables are numerical
for c in [time_col, event_col] + covs:
    df[c] = pd.to_numeric(df[c], errors="coerce")


# Cox-Modell calculation
cph = CoxPHFitter()
cph.fit(df, duration_col=time_col, event_col=event_col)
cph.print_summary(decimals=3)  

In [None]:
import pandas as pd, numpy as np
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import pairwise_distances
import matplotlib.pyplot as plt

# 1)  Data and constants
MODEL_NAME = "C"                         
FILE        = "your_data.xlsx"           
TREAT_COL   = "Treatment"                
MATCH_VARS  = ["ECOG", "Time to Relapse"]  
CALIPER_F   = 0.2                        


OUTCOME_VARS = ["Time", "Survival"]      


# 2)  Read Data
df0 = pd.read_excel(FILE)
df0.columns = df0.columns.str.strip()

df  = df0[[TREAT_COL] + MATCH_VARS].dropna().copy()


# 3)  Propensity-Score-Model (Logistic Regression)
X_std = StandardScaler().fit_transform(df[MATCH_VARS])
y     = df[TREAT_COL]

logit = LogisticRegression(max_iter=1000)
logit.fit(X_std, y)

df["pscore"]      = logit.predict_proba(X_std)[:, 1]
df["logit_score"] = logit.decision_function(X_std)
caliper           = CALIPER_F * df["logit_score"].std()


# 4)  1:1-Matching (Nearest Neighbour, without Replacement)
treated  = df[df[TREAT_COL] == 1]
control  = df[df[TREAT_COL] == 0]

dist = pairwise_distances(
    treated["logit_score"].values.reshape(-1,1),
    control["logit_score"].values.reshape(-1,1)
)

t_idx, c_idx = [], []
for i, row in enumerate(dist):
    j = row.argmin()
    if row[j] <= caliper and control.index[j] not in c_idx:
        t_idx.append(treated.index[i])
        c_idx.append(control.index[j])

matched = df.loc[t_idx + c_idx].copy()
matched["pair_id"] = np.repeat(range(len(t_idx)), 2)

print(f"\n  Modell {MODEL_NAME}: {len(t_idx)} Paare ({len(matched)} Patienten) gematcht")

# 5)  Balance (Standardized Mean Difference)
def smd(a, b): 
    return (a.mean() - b.mean()) / np.sqrt(0.5*(a.var() + b.var()))

rows = []
for v in MATCH_VARS:
    before = smd(treated[v], control[v])
    after  = smd(matched[matched[TREAT_COL]==1][v],
                 matched[matched[TREAT_COL]==0][v])
    rows.append([v, round(before,3), round(after,3)])

bal = pd.DataFrame(rows, columns=["Variable","SMD_before","SMD_after"])
print("\nSMD (|SMD|<0.1 = gute Balance):")
print(bal)

# 6)  Love-Plot
fig, ax = plt.subplots(figsize=(6, max(2, len(MATCH_VARS)*0.45)))
ax.hlines(y=bal["Variable"], xmin=bal["SMD_before"], xmax=bal["SMD_after"])
ax.scatter(bal["SMD_before"], bal["Variable"], label="Before", marker="o")
ax.scatter(bal["SMD_after"],  bal["Variable"], label="After",  marker="s")
ax.axvline(0,    ls="--", color="grey")
ax.axvline(0.1,  ls=":",  color="grey")
ax.axvline(-0.1, ls=":",  color="grey")
ax.set_xlabel("Standardized Mean Difference")
ax.set_title(f"Covariate Balance – Modell {MODEL_NAME}")
ax.legend()
plt.tight_layout(); plt.show()

# 7)  Save matched Dataset
out_base = f"matched_model_{MODEL_NAME}"
out_path = Path(f"{out_base}.xlsx")

matched.to_excel(out_path, index=False)
print(f"\n💾  Gematchter Datensatz gespeichert: {out_path.resolve()}")




In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test
from lifelines.plotting import add_at_risk_counts
import os

# 1) Load matched Data 
df = pd.read_excel("matched_model_C_full.xlsx")
TREAT, TIME, EVENT = "Treatment", "Time", "Survival"
LABELS  = {0: "Therapeutic DLI", 1: "2nd allo-HCT"}
COLORS  = {0: "blue", 1: "darkorange"}

# 2) Plot-Setup 
fig, ax = plt.subplots(figsize=(10, 7))
kmf_list = []

for g in [0, 1]:
    mask = df[TREAT] == g
    kmf = KaplanMeierFitter()
    kmf.fit(df.loc[mask, TIME], df.loc[mask, EVENT], label=LABELS[g])
    kmf.plot_survival_function(ax=ax, ci_show=True,
                               linewidth=2, color=COLORS[g])
    kmf_list.append(kmf)

    median_ci = median_survival_times(kmf.confidence_interval_)
    median    = kmf.median_survival_time_
    ci_lower  = median_ci.iloc[0, 0]
    ci_upper  = median_ci.iloc[0, 1]

    # Number Events and Censoring
    n_total = mask.sum()
    n_event = df.loc[mask, EVENT].sum()
    n_censored = n_total - n_event

    print(f"{LABELS[g]}:")
    print(f"  → Median OS = {median:.2f} years (95% CI: {ci_lower:.2f} – {ci_upper:.2f})")
    print(f"  → Events: {int(n_event)}   |   Censored: {int(n_censored)}   |   Total: {n_total}\n")

# 3) Patient-at-risk 
add_at_risk_counts(*kmf_list, ax=ax)

# 4) Log-Rank-Test & Description 
mask0 = df[TREAT] == 0
mask1 = df[TREAT] == 1
p_val = logrank_test(df.loc[mask0, TIME], df.loc[mask1, TIME],
                     df.loc[mask0, EVENT], df.loc[mask1, EVENT]).p_value

ax.text(0.95, 0.03, f"Log-rank p = {p_val:.3f}", transform=ax.transAxes,
        ha='right', va='bottom', fontsize=11)

# 5) Layout 
ax.set_xlim(left=0)
ax.set_ylim(0, 1.05)
ax.set_xlabel("Time (years)", fontsize=12)
ax.set_ylabel("Overall Survival", fontsize=12)
ax.legend(loc="lower left", frameon=False)
ax.grid(False)
plt.tight_layout()



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# 0) Load Data and prepare

excel_path = "your_data.xlsx"   
t_limit    = 2.0       
out_file   = "your_figure.tiff"     
df = pd.read_excel(excel_path)
df.columns = df.columns.str.strip()
df["Group"] = df["Treatment"].map({0: "DLI", 1: "2nd allo-HCT"})


# 1) CIF per group and event type
def compute_cif(data, event_code, group_col,
                time_col="Time", status_col="Mortality"):
    res = {}
    for grp in data[group_col].unique():
        sub = (data[data[group_col] == grp]
               .copy()
               .sort_values(time_col))
        times, status = sub[time_col].values, sub[status_col].values
        uniq_t = np.unique(times)

        cif_vals, surv_vals = [], []
        cum_hazard, cif = 0.0, 0.0
        for t in uniq_t:
            at_risk          = (times >= t).sum()
            all_events       = ((times == t) & (status != 0)).sum()
            events_of_type   = ((times == t) & (status == event_code)).sum()

            hazard      = all_events / at_risk
            sub_hazard  = events_of_type / at_risk
            cif        += sub_hazard * np.exp(-cum_hazard)
            cum_hazard += hazard

            cif_vals.append(cif)
            surv_vals.append(np.exp(-cum_hazard))

        res[grp] = (uniq_t, np.array(cif_vals), np.array(surv_vals))
    return res


# 2) CIF & Survival function calculation
cif_rrm = compute_cif(df, event_code=1, group_col="Group")
cif_nrm = compute_cif(df, event_code=2, group_col="Group")


# 3) RMST until t-limit calculation
def rmst(t, surv, t_star):
    t = np.concatenate([[0], t, [t_star]])
    s = np.concatenate([[1], surv, [surv[-1]]])
    mask = t <= t_star
    return np.trapz(s[mask], t[mask])

rmst_dict = {}
for grp in cif_rrm:
    t = cif_rrm[grp][0]
    surv_total = 1.0 - (cif_rrm[grp][1] + cif_nrm[grp][1])
    rmst_dict[grp] = rmst(t, surv_total, t_limit)

print(f"\n🔹 RMST bis {t_limit} Jahre")
for g, v in rmst_dict.items():
    print(f"   {g:12s}: {v:.2f} Jahre")


# 5) 1- and 2 year incidence from CIF
def incidence(cif_tuple, year):
    t, cif_vals = cif_tuple[0], cif_tuple[1]
    return np.interp(year, t, cif_vals, left=0, right=cif_vals[-1])

for year in [1, 2]:
    print(f"\n🔹 Inzidenz bei {year} Jahr(en)")
    for grp in df["Group"].unique():
        print(f"   {grp:12s}: "
              f"RRM {incidence(cif_rrm[grp], year):.2%}  |  "
              f"NRM {incidence(cif_nrm[grp], year):.2%}")


# 6) Plot & TIFF-Export
plt.figure(figsize=(10, 6))
for grp in cif_rrm:
    t, rrm, _ = cif_rrm[grp]
    _, nrm, _ = cif_nrm[grp]
    plt.step(t, rrm, where="post", label=f"{grp} – RRM", linewidth=2)
    plt.step(t, nrm, where="post", label=f"{grp} – NRM",
             linestyle="--", linewidth=2)

plt.xlim(0)
plt.ylim(0)
plt.xlabel("Time (years)")
plt.ylabel("Cumulative incidence")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.4)
plt.tight_layout()
plt.savefig(out_file, dpi=600, format="tiff")
plt.show()




In [None]:
#R Code
# Load libraries
library(readxl)
library(cmprsk)
library (survival)

# Load Excel
data <- read_excel("your_data.xlsx")
data <- read_excel("C:your_path_to_your_data.xlsx")

# Define Variables
event <- data$Mortality
time <- data$Time
group <- data$Treatment

# Fine–Gray Model Calculation
fg_model <- crr(ftime = time, fstatus = event, cov1 = data.frame(Treatment = group))
summary(fg_model)

# CIF Calculation and Plot
cif <- cuminc(ftime = time, fstatus = event, group = group)

plot(cif, lty = 1, col = c("blue", "red"), lwd = 2,
     xlab = "Time (years)", ylab = "Cumulative incidence",
     main = "Relapse-related mortality by treatment")

legend("bottomright", legend = c("DLI", "2nd allo-HCT"),
       col = c("blue", "red"), lwd = 2)

fg_model_nrm <- crr(
  ftime = time,
  fstatus = event,
  cov1 = data.frame(Treatment = group),
  failcode = 2,      # ← Jetzt Fokus auf NRM
  cencode = 0        # ← Zensierung bleibt gleich
)

summary(fg_model_nrm)
                     
