## Simulation

In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

# Génération de données synthétiques
np.random.seed(42)
n = 150
beta1 = 2
x = np.random.uniform(0, 5, n)
epsilon = np.random.normal(0, 1, n)
beta2 =  0.16
y = beta1 * x + beta2 * x**2 + epsilon

x_reshape = x.reshape(-1, 1)

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(x_reshape, y, test_size=0.2, random_state=42)

# Modèle d'entraînement
model = LinearRegression()
model.fit(X_train, y_train)

# Évaluation du modèle
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"MSE: {mse:.4f}")


x_train_quad = np.column_stack((X_train, X_train**2))
model_a2 = LinearRegression().fit(x_train_quad, y_train)
x_test_quad = np.column_stack((X_test, X_test**2))
y_pred_a2 = model_a2.predict(x_test_quad)
mse2 = mean_squared_error(y_test, y_pred_a2)
print(f"MSE2: {mse2:.4f}")


MSE: 0.5724
MSE2: 0.4743


## Competition

In [None]:
from sklearn.utils import resample

# Données de compétition
np.random.seed(42)
n = 150
beta1 = 2
x = np.random.uniform(0, 5, n)
epsilon = np.random.normal(0, 1, n)
beta2 =  0.16
y = beta1 * x + beta2 * x**2 + epsilon

x_reshape = x.reshape(-1, 1)

# Division en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(x_reshape, y, test_size=0.2, random_state=42)

# Bootstrapping
n_bootstraps = 1000
bootstrap_scores = []

for _ in range(n_bootstraps):
    X_resampled, y_resampled = resample(X_train, y_train)
    model.fit(X_resampled, y_resampled)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    bootstrap_scores.append(mse)

mean_mse = np.mean(bootstrap_scores)
std_mse = np.std(bootstrap_scores)
print(f"Bootstrap MSE: {mean_mse:.4f} ± {std_mse:.4f}")


## Real World

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.utils import resample

# Données du monde réel
np.random.seed(42)
n = 150
beta1 = 2
x = np.random.uniform(0, 5, n)
epsilon = np.random.normal(0, 1, n)
beta2 =  0.16
y = beta1 * x + beta2 * x**2 + epsilon

x_reshape = x.reshape(-1, 1)

# Bootstrapping
n_bootstraps = 1000
bootstrap_scores = []

for _ in range(n_bootstraps):
    X_resampled, y_resampled = resample(X, y)
    model.fit(X_resampled, y_resampled)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    bootstrap_scores.append(mse)

mean_mse = np.mean(bootstrap_scores)
std_mse = np.std(bootstrap_scores)
print(f"Bootstrap MSE: {mean_mse:.4f} ± {std_mse:.4f}")

# Validation croisée
cv_scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
mean_cv_mse = -np.mean(cv_scores)
std_cv_mse = np.std(cv_scores)
print(f"Cross-Validation MSE: {mean_cv_mse:.4f} ± {std_cv_mse:.4f}")
