In [None]:
!pip install polars 
!pip install seaborn 
!pip install scikit-learn


In [1]:
import numpy as np
import polars as pl
import seaborn as sns
from sklearn.linear_model import LinearRegression

In [None]:
np.random.seed(42)

# True parameter values
beta0 = 3   # Intercept
beta1 = 2   # Effect of CoachChange
beta2 = 5   # Effect of Team_Strength
beta3 = -3  # Effect of Opponent_Strength
beta4 = 1   # Effect of Match_Location
n = 1000    # Sample size
noise_sd = 2  # Standard deviation of random noise

# Simulate predictors directly
sim_data = pl.DataFrame({
    "CoachChange": np.random.choice([0, 1], size=n), 
    "Team_Strength": np.random.uniform(50, 100, size=n),
    "Opponent_Strength": np.random.uniform(50, 100, size=n),
    "Match_Location": np.random.choice([0, 1], size=n)
})

# Simulate the outcome variable (Points) directly
sim_data = sim_data.with_columns([
    (
        beta0
        + beta1 * pl.col("CoachChange")
        + beta2 * pl.col("Team_Strength")
        + beta3 * pl.col("Opponent_Strength")
        + beta4 * pl.col("Match_Location")
        + np.random.normal(0, noise_sd, size=n)
    ).alias("Points")
])

# Prepare the data for regression
X = sim_data.select(["CoachChange", "Team_Strength", "Opponent_Strength", "Match_Location"]).to_numpy()
y = sim_data["Points"].to_numpy()

# Fit the linear model
model = LinearRegression(fit_intercept=True)
model.fit(X, y)

# Print the coefficients
print(f"Intercept: {model.intercept_}")
print(f"Coefficients: {model.coef_}")

# Compare to true parameter values
print(f"True Intercept: {beta0}")
print(f"True Coefficients: {beta1}, {beta2}, {beta3}, {beta4}")


Intercept: 3.8713185154581424
Coefficients: [ 1.84126369  4.99143831 -3.00177498  0.9777615 ]
True Intercept: 3
True Coefficients: 2, 5, -3, 1
