In [None]:
!pip install polars 
!pip install seaborn 
!pip install scikit-learn


In [1]:
import numpy as np
import polars as pl
import seaborn as sns
from sklearn.linear_model import LinearRegression

In [3]:
np.random.seed(42)

# True parameters
beta0 = 3       # Intercept
beta1 = 2       # Effect of CoachChange
beta2 = 1.5     # Effect of Date (e.g., time in season)
n = 1000
noise_sd = 2

# Assume 20 teams
n_teams = 20
team_ids = np.random.choice(range(n_teams), size=n)

# Create random true team effects
true_team_effects = np.random.normal(0, 1, size=n_teams)

# Simulate confounders and treatment
sim_data = pl.DataFrame({
    "Date": np.random.uniform(1, 38, size=n),
    "Team": team_ids,
})

# Simulate CoachChange with some dependency on Date
sim_data = sim_data.with_columns(
    (pl.col("Date") > 19).cast(int).alias("CoachChange")
)

# --- KEY FIX HERE ---
# Simulate Points using map_elements instead of apply
sim_data = sim_data.with_columns(
    (
        beta0
        + beta1 * pl.col("CoachChange")
        + beta2 * pl.col("Date")
        + pl.col("Team").map_elements(lambda t: true_team_effects[int(t)], return_dtype=pl.Float64)
        + np.random.normal(0, noise_sd, size=n)
    ).alias("Points")
)

# Prepare predictors for regression
X = sim_data.select(["CoachChange", "Date"]).to_numpy()

# One-hot encode Team (dropping first team to avoid multicollinearity)
team_one_hot = np.zeros((n, n_teams - 1))
for i in range(n):
    team_idx = team_ids[i]
    if team_idx != 0:  # drop first team
        team_one_hot[i, team_idx - 1] = 1

# Combine predictors
X = np.hstack([X, team_one_hot])
y = sim_data["Points"].to_numpy()

# Fit linear model
model = LinearRegression(fit_intercept=True)
model.fit(X, y)

# Print estimated vs. true
print(f"Estimated Intercept: {model.intercept_:.2f}")
print(f"Estimated Coefficients for [CoachChange, Date]: {model.coef_[:2]}")
print(f"True Coefficients: [CoachChange: {beta1}, Date: {beta2}]")


Estimated Intercept: 1.97
Estimated Coefficients for [CoachChange, Date]: [2.24278747 1.48301682]
True Coefficients: [CoachChange: 2, Date: 1.5]
