In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


In [3]:
def k_fold_cv(model, X, y, k, metric=mean_squared_error):
  

    n = len(y)  
    indices = np.arange(n)  
    np.random.shuffle(indices)  
    fold_size = n // k  
    scores = []  

    for i in range(k):
       
        val_idx = indices[i * fold_size:(i + 1) * fold_size]
        
        train_idx = np.setdiff1d(indices, val_idx)

        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        model.fit(X_train, y_train)

        predictions = model.predict(X_val)

        score = metric(y_val, predictions)
        scores.append(score) 

    return np.mean(scores)


In [4]:
def bootstrap(model, X, y, B, metric=mean_squared_error):
   
    n = len(y)  
    scores = []  

    for _ in range(B):
        indices = np.random.choice(np.arange(n), size=n, replace=True)
        out_of_bag = np.setdiff1d(np.arange(n), indices)

        X_train, y_train = X[indices], y[indices]
        X_oob, y_oob = X[out_of_bag], y[out_of_bag]

        model.fit(X_train, y_train)

        if len(out_of_bag) > 0:
            predictions = model.predict(X_oob)
            score = metric(y_oob, predictions) 
            scores.append(score)  

    return np.mean(scores)


In [5]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression

X, y = make_regression(n_samples=100, n_features=5, noise=0.1, random_state=42)

model = LinearRegression()

k = 5
cv_score = k_fold_cv(model, X, y, k)
print(f"Average k-Fold Cross-Validation Error (MSE): {cv_score}")

B = 10
bootstrap_score = bootstrap(model, X, y, B)
print(f"Average Bootstrapping Error (MSE): {bootstrap_score}")


Average k-Fold Cross-Validation Error (MSE): 0.010101531424416669
Average Bootstrapping Error (MSE): 0.00998579537023119
