In [1]:
import numpy as np
import scipy.stats as stats
from sklearn.utils import resample
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
def bootstrap_metrics(model, X_train, y_train, X_test, y_test, n_iterations=200):
    mse_scores = []
    r2_scores = []
    
    for _ in range(n_iterations):
        #Generating bootstrap samples
        X_resampled, y_resampled = resample(X_train, y_train, random_state=42)
        
        # fitting the modeal for each generated sample
        model.fit(X_resampled, y_resampled)
        
        # predicting
        y_pred_resampled = model.predict(X_test)
        
        # calculating metrics
        mse_scores.append(mean_squared_error(y_test, y_pred_resampled))
        r2_scores.append(r2_score(y_test, y_pred_resampled))
    
    #Then we calculate the 95% CI
    mse_lower = np.percentile(mse_scores, 2.5)
    mse_upper = np.percentile(mse_scores, 97.5)
    
    r2_lower = np.percentile(r2_scores, 2.5)
    r2_upper = np.percentile(r2_scores, 97.5)
    
    return (mse_lower, mse_upper), (r2_lower, r2_upper)

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_bootstrap_distribution(mse_scores, r2_scores):
    # Crear subgráficos para visualizar las distribuciones de MSE y R²
    fig, ax = plt.subplots(1, 2, figsize=(14, 6))
    
    # MSE Histogram
    sns.histplot(mse_scores, kde=True, ax=ax[0], color="blue", bins=30)
    ax[0].set_title('Bootstrap Distribution of MSE')
    ax[0].set_xlabel('Mean Squared Error (MSE)')
    ax[0].set_ylabel('Frequency')
    
    # R2 Histogram
    sns.histplot(r2_scores, kde=True, ax=ax[1], color="green", bins=30)
    ax[1].set_title('Bootstrap Distribution of R²')
    ax[1].set_xlabel('R-squared (R²)')
    ax[1].set_ylabel('Frequency')
    
    # Mostrar el gráfico
    plt.tight_layout()
    plt.show()

In [11]:
import preprocessing
import my_model
import bootstrap
# Load and preprocess the data
X_train, X_test, y_train, y_test = preprocessing.load_and_preprocess_data('../data/DataTransformed.csv')
# Calcular las métricas con bootstrap
mse_scores = []
r2_scores = []
for _ in range(200):
    # Calcular bootstrap
    X_resampled, y_resampled = resample(X_train, y_train, random_state=42)
    model.fit(X_resampled, y_resampled)
    y_pred_resampled = model.predict(X_test)
    mse_scores.append(mean_squared_error(y_test, y_pred_resampled))
    r2_scores.append(r2_score(y_test, y_pred_resampled))

# Visualizar las distribuciones
plot_bootstrap_distribution(mse_scores, r2_scores)

AttributeError: module 'model' has no attribute 'fit'