In [None]:
import pickle
with open("dataset", "rb") as fp: 
   data = pickle.load(fp)

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
 
from lifelines.fitters.coxph_fitter import CoxPHFitter
from lifelines.statistics import proportional_hazard_test
from lifelines import KaplanMeierFitter

In [None]:
new_data = []

for i in data:
    if i[3] != "2025" and i[3] != "2024":
        PG = SG = SF = PF = C = 0.0

        pos = i[2] 

        if pos == "Guard":
            PG = 1.0
        elif pos == "Forward":
            SF = 1.0
        elif pos in ["Guard-Forward", "Forward-Guard"]:
            SG = 0.5
            SF = 0.5
        elif pos in ["Forward-Center", "Center-Forward"]:
            PF = 0.5
            C = 0.5
        elif pos == "Center":
            C = 1.0

        i_new = i + [PG, SG, SF, PF, C]
        i_new[3] = int(i_new[3])

        new_data.append(i_new)


In [None]:
df = pd.DataFrame(new_data, columns=['Height', 'Weight', 'Position', 'Season', 'Age', 'ID', 'Games Played', 'Minutes Per Game', 'Field Goals Made', 
                                 'Field Goals Attempted', '3 Pointers Made', '3 Pointers Attempted', 'Rebounds', 'Assists', 'Turnovers', 
                                 'Steals', 'Blocks', 'Blocked Shots', 'Fouls', 'Fouls Drawn', 'Points', '<8 ft shots', '8-16 ft shots', 
                                 '16-24 ft shots', '24+ ft shots', 'Usage Rate', 'Season-ending Injury', 'PG', 'SG', 'SF', 'PF', 'C'])

In [None]:
important_df = df[['Height', 'Weight', 'Season', 'Age', 'Games Played', 'Minutes Per Game', 'Field Goals Made', 
                   'Field Goals Attempted', '3 Pointers Made', '3 Pointers Attempted', 'Rebounds', 'Assists', 'Turnovers', 
                   'Steals', 'Blocks', 'Blocked Shots', 'Fouls', 'Fouls Drawn', 'Points', '<8 ft shots', '8-16 ft shots', 
                   '16-24 ft shots', '24+ ft shots', 'Usage Rate', 'Season-ending Injury', 'PG', 'SG', 'SF', 'PF', 'C']]

In [None]:
# alpha value 
cph = CoxPHFitter(alpha=0.05, penalizer=0.0001, l1_ratio=0.5)
cph.fit(important_df, 'Games Played', 'Season-ending Injury')

In [None]:
cph.summary


In [None]:
import pandas as pd

result = cph.summary[["exp(coef)", "exp(coef) lower 95%", "exp(coef) upper 95%", "p"]]

result_rounded = result.round(3)

print(result_rounded.to_string())

from tabulate import tabulate
print(tabulate(result_rounded, headers=["Hazard Ratio", "95% CI (Lower)", "95% CI (Upper)", "P-Value"], tablefmt="fancy_grid"))


In [None]:
cph.check_assumptions(important_df, p_value_threshold=0.05, show_plots=True)

In [None]:
mean_profile = important_df.drop(columns=['Games Played','Season-ending Injury']).mean().to_frame().T
avg_surv = cph.predict_survival_function(mean_profile)


In [None]:
import matplotlib.pyplot as plt
baseline_survival = cph.baseline_survival_

plt.figure(figsize=(16,10))

plt.plot(avg_surv.index.to_numpy(), avg_surv.values, label="Average All-Star")
#plt.plot(survival_func4.index.to_numpy(), survival_func4.values, label="3 point taker")
plt.xlabel("Games Played")
plt.ylabel("Survival Probability")
plt.legend()
plt.title("Average All-Star Survival")
plt.show()

In [None]:
import matplotlib.pyplot as plt
baseline_survival = cph.baseline_survival_

plt.figure(figsize=(16,10))
plt.plot(baseline_survival.index.to_numpy(), baseline_survival.values, label="Baseline Survival")

example_player = important_df.mean().to_frame().T
example_player["Fouls Drawn"] = 7.57

example2_player = important_df.mean().to_frame().T
example2_player["Fouls Drawn"] = 3.78

example3_player = important_df.mean().to_frame().T
example3_player["Fouls Drawn"] = 3.53

example4_player = important_df.mean().to_frame().T
example4_player["<8 ft shots"] = 0.0
example4_player["8-16 ft shots"] = 0.0
example4_player["16-24 ft shots"] = 0.0
example4_player[">24 ft shots"] = 1.0

survival_func = cph.predict_survival_function(example_player)
survival_func2 = cph.predict_survival_function(example2_player)
survival_func3 = cph.predict_survival_function(example3_player)
survival_func4 = cph.predict_survival_function(example4_player)

# Plot adjusted survival curve
plt.plot(survival_func.index.to_numpy(), survival_func.values, label="7.57 Fouls Drawn (Giannis) ")
plt.plot(survival_func2.index.to_numpy(), survival_func2.values, label="3.78 Fouls Drawn (Barnes)")
plt.plot(survival_func3.index.to_numpy(), survival_func3.values, label="3.53 Fouls Drawn (Curry)")
#plt.plot(survival_func4.index.to_numpy(), survival_func4.values, label="3 point taker")
plt.xlabel("Games Played")
plt.ylabel("Survival Probability")
plt.legend()
plt.title("Baseline vs Adjusted Survival Curves")
plt.show()

In [None]:
curry_test = pd.DataFrame([[74, 185, 2025, 36, 70, 32.18, 8.06, 17.97, 4.44, 11.2, 
                           4.43, 6.01, 2.86, 1.14, 0.43, 0.67, 1.36, 3.53, 24.54, 
                           23.0, 10.0, 5.0, 62.0, 33.627031113038626, 0, 1.0, 0.0, 0.0, 0.0, 0.0]], columns=['Height', 'Weight', 'Season', 
                                'Age', 'Games Played', 'Minutes Per Game', 'Field Goals Made', 
                                 'Field Goals Attempted', '3 Pointers Made', '3 Pointers Attempted', 'Rebounds', 'Assists', 'Turnovers', 
                                 'Steals', 'Blocks', 'Blocked Shots', 'Fouls', 'Fouls Drawn', 'Points', '<8 ft shots', '8-16 ft shots', 
                                 '16-24 ft shots', '24+ ft shots', 'Usage Rate', 'Season-ending Injury', 'PG', 'SG', 'SF', 'PF', 'C'])

In [None]:
giannis_test = pd.DataFrame([[83, 243, 2025, 31, 67, 34.16, 11.84, 19.69, 0.21, 0.94, 11.91, 
                              6.46, 3.07, 0.87, 1.16, 1.13, 2.31, 7.57, 30.39, 68.0, 12.0, 15.0, 5.0, 37.09292555453789, 0, 0.0, 0.0, 0.0, 1.0, 0.0]], 
                                 columns=['Height', 'Weight', 'Season', 
                                'Age', 'Games Played', 'Minutes Per Game', 'Field Goals Made', 
                                 'Field Goals Attempted', '3 Pointers Made', '3 Pointers Attempted', 'Rebounds', 'Assists', 'Turnovers', 
                                 'Steals', 'Blocks', 'Blocked Shots', 'Fouls', 'Fouls Drawn', 'Points', '<8 ft shots', '8-16 ft shots', 
                                 '16-24 ft shots', '24+ ft shots', 'Usage Rate', 'Season-ending Injury', 'PG', 'SG', 'SF', 'PF', 'C'])

In [None]:
barnes_test = pd.DataFrame([[79, 237, 2025, 24, 65, 32.83, 7.29, 16.35, 1.17, 4.31, 7.72, 5.82, 2.83, 1.43, 0.97,
                             0.62, 1.74, 3.78, 19.26, 42.0, 24.0, 8.0, 26.0, 29.991556520780684, 0, 0.0, 0.0, 1.0, 0.0, 0.0]], 
                           columns=['Height', 'Weight', 'Season', 
                                'Age', 'Games Played', 'Minutes Per Game', 'Field Goals Made', 
                                 'Field Goals Attempted', '3 Pointers Made', '3 Pointers Attempted', 'Rebounds', 'Assists', 'Turnovers', 
                                 'Steals', 'Blocks', 'Blocked Shots', 'Fouls', 'Fouls Drawn', 'Points', '<8 ft shots', '8-16 ft shots', 
                                 '16-24 ft shots', '24+ ft shots', 'Usage Rate', 'Season-ending Injury', 'PG', 'SG', 'SF', 'PF', 'C'])

In [None]:
import matplotlib.pyplot as plt
baseline_survival = cph.baseline_survival_

plt.figure(figsize=(16,10))
plt.plot(baseline_survival.index.to_numpy(), baseline_survival.values, label="Baseline Survival")

survival_func = cph.predict_survival_function(curry_test)
survival_func2 = cph.predict_survival_function(giannis_test)
survival_func3 = cph.predict_survival_function(barnes_test)
survival_func4 = cph.predict_survival_function(example4_player)

# Plot adjusted survival curve
plt.plot(survival_func.index.to_numpy(), survival_func.values, label="2025 Giannis")
plt.plot(survival_func2.index.to_numpy(), survival_func2.values, label="2025 Barnes")
plt.plot(survival_func3.index.to_numpy(), survival_func3.values, label="2025 Curry")
#plt.plot(survival_func4.index.to_numpy(), survival_func4.values, label="3 point taker")
plt.xlabel("Games Played")
plt.ylabel("Survival Probability")
plt.legend()
plt.title("Baseline vs Adjusted Survival Curves")
plt.show()

In [None]:
survival_func

In [None]:
fantasy_test = pd.DataFrame([[73, 164, 2025, 26, 76, 36.04, 7.45, 18.11, 2.87, 8.45, 3.11, 11.58, 4.67, 1.2, 0.16, 0.87, 1.91, 5.66, 24.22, 28.999999999999996, 19.0, 6.0, 46.0, 36.8675732646684, 0, 1.0, 0.0, 0.0, 0.0, 0.0], 
                               [76, 186, 2025, 22, 82, 32.89, 7.41, 17.52, 2.85, 8.06, 4.6, 3.44, 2.48, 0.87, 0.33, 1.29, 1.55, 3.02, 21.01, 34.0, 9.0, 11.0, 46.0, 27.918616896052473, 0, 0.0, 1.0, 0.0, 0.0, 0.0], 
                               [79, 240, 2025, 27, 74, 36.57, 6.61, 13.88, 2.31, 6.22, 4.84, 2.22, 1.38, 1.49, 0.88, 1.2, 2.32, 2.36, 18.0, 45.0, 7.000000000000001, 4.0, 45.0, 20.218090331823277, 0, 0.0, 0.0, 1.0, 0.0, 0.0], 
                               [83, 240, 2025, 36, 62, 36.53, 9.55, 18.13, 2.58, 6.0, 6.03, 4.24, 3.06, 0.81, 1.24, 0.52, 1.68, 5.34, 26.56, 20.0, 38.0, 9.0, 33.0, 29.43261175553662, 0, 0.0, 0.0, 0.5, 0.5, 0.0], 
                               [81, 243, 2025, 26, 82, 28.0, 5.51, 7.8, 0.0, 0.06, 9.73, 1.93, 1.18, 0.94, 0.89, 0.57, 1.55, 3.13, 13.45, 87.0, 12.0, 1.0, 1.0, 16.2970693406768, 0, 0.0, 0.0, 0.0, 0.0, 1.0], 
                               [78, 206, 2025, 28, 75, 37.27, 8.72, 18.93, 2.44, 7.35, 4.07, 7.05, 2.93, 0.89, 0.21, 0.83, 2.64, 5.39, 25.64, 23.0, 25.0, 12.0, 39.0, 33.54551895561792, 0, 0.5, 0.5, 1.0, 0.0, 0.0], 
                               [80, 210, 2025, 29, 57, 31.58, 6.23, 13.11, 2.79, 7.16, 4.33, 3.4, 1.74, 0.93, 0.44, 0.63, 1.82, 2.98, 18.77, 28.999999999999996, 10.0, 6.0, 55.00000000000001, 24.875828750690985, 0, 0.0, 1.0, 0.0, 0.0, 0.0], 
                               [75, 185, 2025, 27, 17, 34.0, 7.35, 16.47, 1.53, 5.59, 4.29, 6.82, 2.41, 1.47, 0.29, 0.65, 2.71, 3.29, 19.71, 34.0, 22.0, 11.0, 33.0, 31.034191108161572, 0, 1.0, 0.0, 0.0, 0.0, 0.0], 
                               [84, 260, 2025, 29, 57, 29.58, 6.37, 10.16, 0.02, 0.05, 9.6, 2.77, 1.95, 1.16, 1.25, 0.61, 3.09, 2.28, 14.46, 72.0, 26.0, 1.0, 0.0, 19.19915775377338, 0, 0.0, 0.0, 0.0, 0.0, 1.0], 
                               [84, 252, 2025, 26, 40, 30.14, 6.62, 11.7, 0.15, 0.8, 10.15, 1.6, 1.73, 0.78, 0.97, 0.38, 2.2, 1.48, 14.4, 56.99999999999999, 25.0, 11.0, 7.000000000000001, 18.875997098436162, 0, 0.0, 0.0, 0.0, 0.0, 1.0], 
                               [75, 200, 2025, 27, 65, 31.6, 6.34, 14.45, 2.14, 6.58, 3.77, 5.57, 2.45, 0.94, 0.58, 0.83, 2.43, 2.29, 17.18, 34.0, 15.0, 5.0, 45.0, 28.38777175403318, 0, 0.0, 0.5, 0.5, 0.0, 0.0],
                               [77, 200, 2025, 24, 64, 30.99, 6.16, 13.89, 2.47, 6.7, 3.98, 2.86, 1.38, 1.31, 0.52, 0.45, 1.69, 1.53, 16.27, 23.0, 20.0, 10.0, 48.0, 23.87432979748212, 0, 0.0, 0.5, 0.5, 0.0, 0.0]], 
                            columns=['Height', 'Weight', 'Season', 
                                'Age', 'Games Played', 'Minutes Per Game', 'Field Goals Made', 
                                 'Field Goals Attempted', '3 Pointers Made', '3 Pointers Attempted', 'Rebounds', 'Assists', 'Turnovers', 
                                 'Steals', 'Blocks', 'Blocked Shots', 'Fouls', 'Fouls Drawn', 'Points', '<8 ft shots', '8-16 ft shots', 
                                 '16-24 ft shots', '24+ ft shots', 'Usage Rate', 'Season-ending Injury', 'PG', 'SG', 'SF', 'PF', 'C'])

In [None]:
fantasy_test.iloc[0]

In [None]:
fantasy_picks = ['Trae Young', 'Jalen Green', 'OG Anunoby', 'Kevin Durant', 'Jarrett Allen', 'Devin Booker', 'Cameron Johnson', "De'Aaron Fox", 'Jakob Poeltl', 'Deandre Ayton', 'Malik Monk', 'Devin Vassell']

In [None]:
import matplotlib.pyplot as plt
baseline_survival = cph.baseline_survival_

# Plot baseline
plt.figure(figsize=(16,10))
plt.plot(baseline_survival.index.to_numpy(), baseline_survival.values, label="Baseline Survival")

for i in range(len(fantasy_picks)):
    survival_func = cph.predict_survival_function(fantasy_test.iloc[i])
    print(fantasy_picks[i] + ':', survival_func.iloc[77, 0])
    
    plt.plot(survival_func.index.to_numpy(), survival_func.values, label=f"2025 {fantasy_picks[i]}")

plt.xlabel("Games Played")
plt.ylabel("Survival Probability")
plt.legend()
plt.title("Baseline vs Adjusted Survival Curves")
plt.show()

In [None]:
table_view = pd.DataFrame([["Trae Young", 0.9067999733742572], 
                           ["Devin Booker", 0.8285557035650096], 
                           ["De'Aaron Fox", 0.922963414095947],
                           ["Kevin Durant", 0.3607880423391926],
                           ["Jarrett Allen", 0.982203993978336],
                           ["Jalen Green", 0.7408110027028585], 
                           ["OG Anunoby", 0.8925162212703198],
                           ["Jakob Poeltl", 0.9681631214091581],
                           ["Malik Monk", 0.9262361584885017],
                           ["Deandre Ayton", 0.9562617246288591],
                           ["Devin Vassell", 0.9288135921418312],
                           ["Cameron Johnson", 0.9287382565273111]], 
                           columns=["Player", "Survival Likelihood"])
table_view = table_view.round(3)
table_view.iloc[:, 1] = table_view.iloc[:, 1] * 100.0
table_view.index +=1 
print(tabulate(table_view, headers=["Likelihood of surviving 82 games (%)"], tablefmt="fancy_grid"))

In [None]:
summary = cph.summary[["exp(coef)", "exp(coef) lower 95%", "exp(coef) upper 95%"]]
summary = summary.sort_values("exp(coef)")

plt.figure(figsize=(10, 7))
y_positions = range(len(summary))
plt.xlim(0, 3)
plt.errorbar(
    summary["exp(coef)"], y_positions,
    xerr=[summary["exp(coef)"] - summary["exp(coef) lower 95%"],
          summary["exp(coef) upper 95%"] - summary["exp(coef)"]],
    fmt="o", color="black", ecolor="gray", capsize=3
)

plt.axvline(x=1, color="red", linestyle="--")

plt.yticks(y_positions, summary.index)
plt.xlabel("Hazard Ratio (HR)")
plt.title("Hazard Ratios with 95% Confidence Intervals")
plt.grid(axis="x", linestyle="--", alpha=0.6)

plt.tight_layout()
plt.show()