In [None]:
import pandas as pd
import numpy as np


In [None]:
df = pd.read_csv("data/NBA_freethrows.csv")



In [None]:
df.columns


In [None]:
df['score_diff'] = abs(df['scoreHome'] - df['scoreAway'])


In [None]:
df['pressure'] = ((df['period'] == 4) & (df['score_diff'] <= 5)).astype(int)


In [None]:
df[['ft_made', 'pressure', 'score_diff', 'period']].head(20)


In [None]:
df.groupby('pressure')['ft_made'].mean()


In [None]:
df['actionType'].value_counts().head(15)


In [None]:
df_ft = df[
    (df['actionType'] == 'freethrow') &
    (df['shotResult'].isin(['Made', 'Missed']))
].copy()


In [None]:
df_ft['ft_made'].mean()


In [None]:
df_ft['score_diff'] = (df_ft['scoreHome'] - df_ft['scoreAway']).abs()


In [None]:
df_ft['close_game'] = (df_ft['score_diff'] <= 5).astype(int)


In [None]:
df_ft['late_game'] = (df_ft['period'] == 4).astype(int)


In [None]:
df_ft['pressure'] = ((df_ft['close_game'] == 1) & (df_ft['late_game'] == 1)).astype(int)


In [None]:
df_ft.groupby('pressure')['ft_made'].mean()


In [None]:
df_ft['clock'].head(10)


In [None]:
df_ft['clock_seconds'] = (
    df_ft['clock']
    .str.replace('PT', '', regex=False)
    .str.replace('S', '', regex=False)
)

# split minutes and seconds
df_ft[['min_rem', 'sec_rem']] = df_ft['clock_seconds'].str.split('M', expand=True)

df_ft['min_rem'] = pd.to_numeric(df_ft['min_rem'], errors='coerce')
df_ft['sec_rem'] = pd.to_numeric(df_ft['sec_rem'], errors='coerce')

df_ft['time_remaining_sec'] = df_ft['min_rem']*60 + df_ft['sec_rem']


In [None]:
df_ft['clutch'] = (
    (df_ft['period'] == 4) &
    (df_ft['time_remaining_sec'] <= 120) &
    (df_ft['score_diff'] <= 5)
).astype(int)


In [None]:
df_ft.groupby('clutch')['ft_made'].mean()


In [None]:
df_ft['clutch'].value_counts()

In [None]:
import statsmodels.api as sm

X = df_ft[['clutch']]
X = sm.add_constant(X)

y = df_ft['ft_made']

model = sm.Logit(y, X).fit()
print(model.summary())


In [None]:
# baseline skill = non-clutch FT% for each player
baseline = (
    df_ft[df_ft['clutch'] == 0]
    .groupby('playerName')['ft_made']
    .mean()
    .rename('H_baseline')
)

df_hs = df_ft.join(baseline, on='playerName')

# drop players with no baseline estimate
df_hs = df_hs.dropna(subset=['H_baseline'])


In [None]:
df_hs['D'] = df_hs['clutch']              # drive / pressure
df_hs['H'] = df_hs['H_baseline']          # habit strength
df_hs['DH'] = df_hs['D'] * df_hs['H']     # interaction


In [None]:
df_hs['H_bin'] = pd.qcut(df_hs['H'], q=4, labels=['Low H','Mid-Low H','Mid-High H','High H'])

summary = (
    df_hs.groupby(['H_bin','clutch'])['ft_made']
    .mean()
    .unstack()
)

summary['drop_under_pressure'] = summary[0] - summary[1]
summary


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

X = df_hs[['H','D','DH']]
y = df_hs['ft_made']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

pred = clf.predict_proba(X_test)[:,1]
roc_auc_score(y_test, pred), dict(zip(X.columns, clf.coef_[0]))


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.lineplot(
    data=df_hs,
    x='H',
    y='ft_made',
    hue='clutch',
    estimator='mean'
)
plt.xlabel("Habit Strength (Baseline FT%)")
plt.ylabel("FT Make Probability")
plt.title("Pressure Effects Across Skill Levels")
plt.show()


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Bin habit strength into quartiles
df_hs['H_bin'] = pd.qcut(df_hs['H'], q=4, labels=['Low H', 'Mid-Low H', 'Mid-High H', 'High H'])

# Compute means
plot_df = (
    df_hs
    .groupby(['H_bin', 'clutch'])['ft_made']
    .mean()
    .reset_index()
)

# Plot
sns.pointplot(
    data=plot_df,
    x='H_bin',
    y='ft_made',
    hue='clutch',
    dodge=True,
    capsize=0.1
)

plt.ylim(0.6, 0.9)
plt.ylabel("FT Make Probability")
plt.xlabel("Habit Strength (Binned)")
plt.title("Pressure Effects Across Skill Levels")
plt.show()


In [None]:
df_ft.columns


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# 1) Make sure ft_made and clutch are numeric 0/1
df_ft = df_ft.copy()
df_ft["ft_made"] = pd.to_numeric(df_ft["ft_made"], errors="coerce")
df_ft["clutch"] = pd.to_numeric(df_ft["clutch"], errors="coerce")

# 2) Create H = each player's baseline FT% in NON-clutch situations
# (Baseline ability estimate)
H_by_player = (
    df_ft[df_ft["clutch"] == 0]
    .groupby("playerName")["ft_made"]
    .mean()
)

df_ft["H"] = df_ft["playerName"].map(H_by_player)

# 3) Drop rows where H couldn't be computed (players who only appear in clutch, rare)
df_ft = df_ft.dropna(subset=["H", "ft_made", "clutch"])

# 4) Bin baseline ability into quartiles
df_ft["H_bin"] = pd.qcut(
    df_ft["H"],
    q=4,
    labels=["Low ability", "Mid-Low ability", "Mid-High ability", "High ability"]
)

# 5) Compute mean FT% by ability bin and clutch context
plot_df = (
    df_ft.groupby(["H_bin", "clutch"])["ft_made"]
    .mean()
    .reset_index()
)

wide = plot_df.pivot(index="H_bin", columns="clutch", values="ft_made")
wide = wide.reindex(["Low ability", "Mid-Low ability", "Mid-High ability", "High ability"])

# 6) Plot (clearer labels)
fig, ax = plt.subplots(figsize=(8, 5))

ax.plot(wide.index, wide[0] * 100, marker="o", label="Non-clutch (0)")
ax.plot(wide.index, wide[1] * 100, marker="o", label="Clutch (1)")

ax.set_title("Pressure Effects on Free-Throw % Across Skill Levels")
ax.set_xlabel("Baseline Free-Throw Ability (grouped by skill tiers)")
ax.set_ylabel("Free-Throw %")
ax.set_ylim(60, 90)
ax.legend(title="Clutch Indicator")

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# 1) Make sure ft_made and clutch are numeric 0/1
df_ft = df_ft.copy()
df_ft["ft_made"] = pd.to_numeric(df_ft["ft_made"], errors="coerce")
df_ft["clutch"] = pd.to_numeric(df_ft["clutch"], errors="coerce")

# 2) Create H = each player's baseline FT% in NON-clutch situations
# (Baseline ability estimate)
H_by_player = (
    df_ft[df_ft["clutch"] == 0]
    .groupby("playerName")["ft_made"]
    .mean()
)

df_ft["H"] = df_ft["playerName"].map(H_by_player)

# 3) Drop rows where H couldn't be computed (players who only appear in clutch, rare)
df_ft = df_ft.dropna(subset=["H", "ft_made", "clutch"])

# 4) Bin baseline ability into quartiles
df_ft["H_bin"] = pd.qcut(
    df_ft["H"],
    q=4,
    labels=["Weak", "Moderately Weak", "Moderately Strong", "Strong"]
)

# 5) Compute mean FT% by ability bin and clutch context
plot_df = (
    df_ft.groupby(["H_bin", "clutch"])["ft_made"]
    .mean()
    .reset_index()
)

wide = plot_df.pivot(index="H_bin", columns="clutch", values="ft_made")
wide = wide.reindex(["Weak", "Moderately Weak", "Moderately Strong", "Strong"])

# 6) Plot (clearer labels)
fig, ax = plt.subplots(figsize=(8, 5))

ax.plot(wide.index, wide[0] * 100, marker="o", label="Non-clutch (0)")
ax.plot(wide.index, wide[1] * 100, marker="o", label="Clutch (1)")

ax.set_title("Pressure Effects on Free-Throw Percentage Across Skill Levels")
ax.set_xlabel("Baseline Free-Throw Ability (grouped by skill tiers)", fontsize=12)
ax.set_ylabel("Free-Throw Percentage", fontsize=13)
ax.set_ylim(60, 90)
ax.legend(title="Clutch Indicator")

plt.tight_layout()
plt.show()