In [3]:
import pandas as pd
data = pd.read_csv('data.txt', delim_whitespace=True)
y_data = pd.read_csv('Y.txt', delim_whitespace=True)  # assuming Y.txt has a similar format
print(data.head())
print(y_data.head())



   1.3911941e+000  2.4867570e-005  9.4800498e-001  2.0621103e+000  \
0        1.111800        0.000020        0.931299        1.847535   
1        0.888517        0.000016        0.916051        1.665100   
2        0.710075        0.000016        0.901771        1.508772   
3        0.567471        0.000016        0.888154        1.373805   
4        0.453505        0.000016        0.875008        1.256457   

   9.1807675e-002  2.5920308e-002  1.2164798e+000  8.3430861e+000  
0        0.090284        0.017671        1.181045        8.253957  
1        0.088796        0.012010        1.146325        8.165857  
2        0.087342        0.008140        1.112309        8.078732  
3        0.085918        0.005503        1.078984        7.992552  
4        0.084523        0.003712        1.046337        7.907297  
   1.9539000e+000
0          1.9256
1          1.8972
2          1.8689
3          1.8406
4          1.8406


In [4]:
# Check the length of both datasets to ensure they align
print("Length of simulation data:", len(data))
print("Length of observation data:", len(y_data))


calibration_data = data.iloc[:3000, :]
evaluation_data = data.iloc[3000:, :]
calibration_obs = y_data.iloc[:3000, :]
evaluation_obs = y_data.iloc[3000:, :]


Length of simulation data: 13149
Length of observation data: 13149


In [5]:
from sklearn.metrics import mean_squared_error
from math import sqrt

# Function to calculate RMSE for each model
def calculate_rmse(model_outputs, observed_data):
    rmse_scores = []
    for column in model_outputs.columns:
        rmse = sqrt(mean_squared_error(observed_data, model_outputs[column]))
        rmse_scores.append(rmse)
    return rmse_scores

# Calculate RMSE for calibration and evaluation phases
rmse_cal = calculate_rmse(calibration_data, calibration_obs)
rmse_eval = calculate_rmse(evaluation_data, evaluation_obs)

print("Calibration RMSE for each model:", rmse_cal)
print("Evaluation RMSE for each model:", rmse_eval)


Calibration RMSE for each model: [31.405220753998154, 19.018637623421213, 18.83273796299311, 17.498699539304706, 26.0298864318581, 20.01389052179046, 19.030473465568054, 16.273959980856745]
Evaluation RMSE for each model: [50.39485633879129, 25.26210165919862, 28.721305647598417, 27.481983615786966, 42.17904136428525, 32.87489142938715, 31.011413522564016, 21.95681607952398]


<p><span style="font-size: 18pt;"><strong>#2.4 Equal Weights Averaging (EWA)</strong></span></p>

In [6]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import sqrt

calibration_data = data.iloc[:3000, :]
evaluation_data = data.iloc[3000:, :]
calibration_obs = y_data.iloc[:3000]
evaluation_obs = y_data.iloc[3000:]

# Compute the averaged model output (Equal Weights Averaging)
average_model_output_cal = calibration_data.mean(axis=1)
average_model_output_eval = evaluation_data.mean(axis=1)

# Calculate RMSE for the averaged model output
calibration_rmse = sqrt(mean_squared_error(calibration_obs, average_model_output_cal))
evaluation_rmse = sqrt(mean_squared_error(evaluation_obs, average_model_output_eval))

print("Calibration RMSE for EWA:", calibration_rmse)
print("Evaluation RMSE for EWA:", evaluation_rmse)


Calibration RMSE for EWA: 17.902309918682075
Evaluation RMSE for EWA: 26.789392484119954


In [7]:
############# 2.4 #############

<p><span style="font-size: 18pt;"><strong>#2.4 Bates-Granger model averaging method</strong></span></p>

In [8]:
#2.4 Bates-Granger model averaging method
import numpy as np

# Calculate variance of residuals for each model
variances = []
for i in range(calibration_data.shape[1]):  # Iterate over each model column
    residuals = calibration_data.iloc[:, i] - calibration_obs.iloc[:, 0]
    variance = np.mean(residuals**2)
    variances.append(variance)

# Print calculated variances for verification
print("Variances:", variances)


Variances: [986.2878906073563, 361.7085770510129, 354.6720191827619, 306.2044855668628, 677.5549876554303, 400.555813818214, 362.15892032368976, 264.8417734585269]


In [9]:
from sklearn.metrics import mean_squared_error
from math import sqrt

# Initialize lists to store RMSEs
rmse_cal_list = []
rmse_eval_list = []

# Calculate RMSE for each model
for i in range(calibration_data.shape[1]):
    model_cal_rmse = sqrt(mean_squared_error(calibration_obs, calibration_data.iloc[:, i]))
    model_eval_rmse = sqrt(mean_squared_error(evaluation_obs, evaluation_data.iloc[:, i]))
    rmse_cal_list.append(model_cal_rmse)
    rmse_eval_list.append(model_eval_rmse)

# Print the RMSE values for each model
print("Calibration RMSEs:", rmse_cal_list)
print("Evaluation RMSEs:", rmse_eval_list)


Calibration RMSEs: [31.405220753998154, 19.018637623421213, 18.83273796299311, 17.498699539304706, 26.0298864318581, 20.01389052179046, 19.030473465568054, 16.273959980856745]
Evaluation RMSEs: [50.39485633879129, 25.26210165919862, 28.721305647598417, 27.481983615786966, 42.17904136428525, 32.87489142938715, 31.011413522564016, 21.95681607952398]


In [10]:
# Calculate Bates-Granger weights
weights = [1 / variance for variance in variances]
normalized_weights = [weight / sum(weights) for weight in weights]

# Print normalized weights
print("Bates-Granger Weights:", normalized_weights)


Bates-Granger Weights: [0.04976614950782335, 0.13569971445492973, 0.13839194514081088, 0.1602972945705052, 0.07244246078324626, 0.12253910423578898, 0.13553097236387668, 0.18533235894301892]


In [11]:
# Weighted average of model outputs
weighted_average_cal = np.sum(calibration_data * normalized_weights, axis=1)
weighted_average_eval = np.sum(evaluation_data * normalized_weights, axis=1)

from sklearn.metrics import mean_squared_error
from math import sqrt

# Calculate RMSE for the weighted averages
rmse_cal = sqrt(mean_squared_error(calibration_obs, weighted_average_cal))
rmse_eval = sqrt(mean_squared_error(evaluation_obs, weighted_average_eval))

# Print RMSE results
print(" BGA Calibration RMSE:", rmse_cal)
print(" BGA Evaluation RMSE:", rmse_eval)



 BGA Calibration RMSE: 16.966986617810438
 BGA Evaluation RMSE: 24.96931610731972


<p><span style="font-size: 18pt;"><strong>#2.4 Information Criterion Averaging (ICA)<br /><br /></strong></span></p>

In [12]:


import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
from math import sqrt


# Split the data for calibration and evaluation
calibration_data = data.iloc[:3000, :]
evaluation_data = data.iloc[3000:, :]
calibration_obs = y_data.iloc[:3000]
evaluation_obs = y_data.iloc[3000:]

# Lists to store models, AICs, BICs, and predictions
models = []
AICs = []
BICs = []
calibration_predictions = []
evaluation_predictions = []

# 
for col in calibration_data.columns:
    # Add constant to include intercept
    X_cal = sm.add_constant(calibration_data[col])
    X_eval = sm.add_constant(evaluation_data[col])

    # Fit model on calibration data
    model = sm.OLS(calibration_obs, X_cal).fit()
    models.append(model)
    AICs.append(model.aic)
    BICs.append(model.bic)

    # Predict on calibration and evaluation data
    calibration_predictions.append(model.predict(X_cal))
    evaluation_predictions.append(model.predict(X_eval))

# Convert AICs and BICs to weights
min_aic = min(AICs)
min_bic = min(BICs)
weights_aic = np.exp(-(np.array(AICs) - min_aic) / 2)
weights_bic = np.exp(-(np.array(BICs) - min_bic) / 2)
weights_aic /= weights_aic.sum()
weights_bic /= weights_bic.sum()

# Compute weighted averages of predictions
weighted_avg_cal_aic = np.average(calibration_predictions, weights=weights_aic, axis=0)
weighted_avg_eval_aic = np.average(evaluation_predictions, weights=weights_aic, axis=0)
weighted_avg_cal_bic = np.average(calibration_predictions, weights=weights_bic, axis=0)
weighted_avg_eval_bic = np.average(evaluation_predictions, weights=weights_bic, axis=0)

# Calculate RMSE for calibration and evaluation
rmse_cal_aic = sqrt(mean_squared_error(calibration_obs, weighted_avg_cal_aic))
rmse_eval_aic = sqrt(mean_squared_error(evaluation_obs, weighted_avg_eval_aic))
rmse_cal_bic = sqrt(mean_squared_error(calibration_obs, weighted_avg_cal_bic))
rmse_eval_bic = sqrt(mean_squared_error(evaluation_obs, weighted_avg_eval_bic))

# Output results
print("Calibration RMSE AIC:", rmse_cal_aic)
print("Evaluation RMSE AIC:", rmse_eval_aic)
print("Calibration RMSE BIC:", rmse_cal_bic)
print("Evaluation RMSE BIC:", rmse_eval_bic)
print(weights_aic)


Calibration RMSE AIC: 16.199021015538243
Evaluation RMSE AIC: 21.729613891266805
Calibration RMSE BIC: 16.199021015538243
Evaluation RMSE BIC: 21.729613891266805
[0.00000000e+000 1.62860541e-197 1.32843374e-188 1.31739391e-098
 0.00000000e+000 2.37747933e-272 2.70478861e-206 1.00000000e+000]


In [13]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
from math import sqrt


# Fit models for the entire dataset, one per column
models = []
AICs = []
for col in data.columns:
    X = sm.add_constant(data[[col]])  
    model = sm.OLS(y_data, X).fit()
    models.append(model)
    AICs.append(model.aic)

# Calculate weights from AIC
min_aic = min(AICs)
weights_aic = np.exp(-(np.array(AICs) - min_aic) / 2)
weights_aic /= weights_aic.sum()

# Output the AIC weights
print("AIC Weights (Betas):")
for idx, weight in enumerate(weights_aic, 1):
    print(f"Beta_{idx}: {weight:.4f}")

# Prepare predictions using the correct subsets of data
full_predictions = [m.predict(sm.add_constant(data[[data.columns[i]]])) for i, m in enumerate(models)]

# Now handle the calibration and evaluation data separately
calibration_predictions = np.column_stack([p[:3000] for p in full_predictions])
evaluation_predictions = np.column_stack([p[3000:] for p in full_predictions])

# Compute weighted averages
weighted_avg_cal = np.average(calibration_predictions, weights=weights_aic, axis=1)
weighted_avg_eval = np.average(evaluation_predictions, weights=weights_aic, axis=1)

# Calculate RMSE
rmse_cal = sqrt(mean_squared_error(y_data.iloc[:3000], weighted_avg_cal))
rmse_eval = sqrt(mean_squared_error(y_data.iloc[3000:], weighted_avg_eval))

# Print RMSE results
print("\nCalibration RMSE:", rmse_cal)
print("Evaluation RMSE:", rmse_eval)


AIC Weights (Betas):
Beta_1: 0.0000
Beta_2: 0.0000
Beta_3: 0.0000
Beta_4: 0.0000
Beta_5: 0.0000
Beta_6: 0.0000
Beta_7: 0.0000
Beta_8: 1.0000

Calibration RMSE: 16.315458052956362
Evaluation RMSE: 21.594831189824628


In [14]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
from math import sqrt



# Fit models for the entire dataset, one per column
models = []
BICs = []
for col in data.columns:
    X = sm.add_constant(data[[col]])  
    model = sm.OLS(y_data, X).fit()
    models.append(model)
    BICs.append(model.bic)

# Calculate weights from BIC
min_bic = min(BICs)
weights_bic = np.exp(-(np.array(BICs) - min_bic) / 2)
weights_bic /= weights_bic.sum()

# Output the BIC weights
print("BIC Weights (Betas):")
for idx, weight in enumerate(weights_bic, 1):
    print(f"Beta_{idx}: {weight:.4f}")

# Prepare predictions using the correct subsets of data
full_predictions = [m.predict(sm.add_constant(data[[data.columns[i]]])) for i, m in enumerate(models)]

# Now handle the calibration and evaluation data separately
calibration_predictions = np.column_stack([p[:3000] for p in full_predictions])
evaluation_predictions = np.column_stack([p[3000:] for p in full_predictions])

# Compute weighted averages
weighted_avg_cal = np.average(calibration_predictions, weights=weights_bic, axis=1)
weighted_avg_eval = np.average(evaluation_predictions, weights=weights_bic, axis=1)

# Calculate RMSE
rmse_cal = sqrt(mean_squared_error(y_data.iloc[:3000], weighted_avg_cal))
rmse_eval = sqrt(mean_squared_error(y_data.iloc[3000:], weighted_avg_eval))

# Print RMSE results
print("\nCalibration RMSE:", rmse_cal)
print("Evaluation RMSE:", rmse_eval)


BIC Weights (Betas):
Beta_1: 0.0000
Beta_2: 0.0000
Beta_3: 0.0000
Beta_4: 0.0000
Beta_5: 0.0000
Beta_6: 0.0000
Beta_7: 0.0000
Beta_8: 1.0000

Calibration RMSE: 16.315458052956362
Evaluation RMSE: 21.594831189824628


<p><span style="font-size: 18pt;"><strong>Granger-Ramanathan averaging</strong></span></p>

In [15]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error
from math import sqrt


# Fit models for the entire dataset, one per column
models = []
predictions = []

for col in data.columns:
    X = sm.add_constant(data[[col]])  
    model = sm.OLS(y_data, X).fit()
    models.append(model)
    predictions.append(model.predict(X))

# Convert predictions to a matrix (Y)
Y = np.column_stack(predictions)

# Fit the OLS model to find betas (weights)
X = sm.add_constant(Y)
ols_model = sm.OLS(y_data, X).fit()
betas = ols_model.params[1:]  # exclude the intercept

# Print the GRA weights (betas)
print("GRA Weights (Betas):")
for idx, beta in enumerate(betas, 1):
    print(f"Beta_{idx}: {beta:.4f}")

# Prepare weighted predictions using the betas
weighted_predictions = np.dot(Y, betas)

# Split the data for calibration and evaluation
calibration_data = data.iloc[:3000, :]
evaluation_data = data.iloc[3000:, :]
calibration_obs = y_data.iloc[:3000]
evaluation_obs = y_data.iloc[3000:]

# Compute weighted averages for calibration and evaluation
calibration_predictions = weighted_predictions[:3000]
evaluation_predictions = weighted_predictions[3000:]

# Calculate RMSE for calibration and evaluation
rmse_cal = sqrt(mean_squared_error(calibration_obs, calibration_predictions))
rmse_eval = sqrt(mean_squared_error(evaluation_obs, evaluation_predictions))

# Print RMSE results
print("\nCalibration RMSE:", rmse_cal)
print("Evaluation RMSE:", rmse_eval)


GRA Weights (Betas):
Beta_1: -0.0367
Beta_2: 0.2473
Beta_3: 0.0959
Beta_4: 0.2764
Beta_5: -0.0953
Beta_6: -0.1930
Beta_7: -0.0269
Beta_8: 0.7093

Calibration RMSE: 15.708349089078702
Evaluation RMSE: 20.29853756594686


<p><span style="font-size: 18pt;"><strong>Mallows Model Averaging (MMA)</strong></span></p>

In [17]:

import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.optimize import minimize
from sklearn.metrics import mean_squared_error
from math import sqrt



# Fit models for the entire dataset, one per column
models = []
predictions = []
p_k = []

for col in data.columns:
    X = sm.add_constant(data[[col]])  
    model = sm.OLS(y_data, X).fit()
    models.append(model)
    predictions.append(model.predict(X))
    p_k.append(len(model.params))  # Number of parameters for each model

# Convert predictions to a matrix (Y in the formula)
Y = np.column_stack(predictions)

# Ensure y_data is a numpy array
y_data_np = y_data.to_numpy()

# Define the objective function C_K(beta) as described
def CK(beta, Y, y, p_k, S2):
    residuals = y - np.dot(Y, beta)
    penalty = 2 * S2 * np.dot(beta, p_k)
    return np.sum(residuals**2) + penalty

# Estimate S2 (variance of the forecast error) using the model with the most parameters
S2 = np.var(y_data_np - np.dot(Y, np.ones(len(models)) / len(models)))

# Define the constraint that the weights sum to one
constraints = ({'type': 'eq', 'fun': lambda beta: np.sum(beta) - 1})

# Define the bounds to be between 0 and 1 for each beta
bounds = [(0, 1) for _ in range(len(models))]

# Set the initial guess for the weights to random values that sum to 1
np.random.seed(0)  # For reproducibility
initial_guess = np.random.rand(len(models))
initial_guess /= initial_guess.sum()

# Perform the optimization to find the optimal weights using a different method
result = minimize(CK, initial_guess, args=(Y, y_data_np, p_k, S2), constraints=constraints, bounds=bounds, method='trust-constr')

# Get the optimal weights (betas)
betas = result.x

# Print the MMA weights (betas)
print("MMA Weights (Betas):")
for idx, beta in enumerate(betas, 1):
    print(f"Beta_{idx}: {beta:.4f}")

# Prepare weighted predictions using the betas
weighted_predictions = np.dot(Y, betas)

# Split the data for calibration and evaluation
calibration_obs = y_data_np[:3000]
evaluation_obs = y_data_np[3000:]

# Compute weighted averages for calibration and evaluation
calibration_predictions = weighted_predictions[:3000]
evaluation_predictions = weighted_predictions[3000:]

# Calculate RMSE for calibration and evaluation
rmse_cal = sqrt(mean_squared_error(calibration_obs, calibration_predictions))
rmse_eval = sqrt(mean_squared_error(evaluation_obs, evaluation_predictions))

# Print RMSE results
print("\nCalibration RMSE:", rmse_cal)
print("Evaluation RMSE:", rmse_eval)


  self.H.update(delta_x, delta_g)
  self.H.update(self.x - self.x_prev, self.g - self.g_prev)


MMA Weights (Betas):
Beta_1: 0.6353
Beta_2: 0.0000
Beta_3: 0.0000
Beta_4: 0.0000
Beta_5: 0.3647
Beta_6: 0.0000
Beta_7: 0.0000
Beta_8: 0.0000

Calibration RMSE: 26.362207397556055
Evaluation RMSE: 39.84748976888232


In [18]:
import statsmodels.api as sm
from scipy.optimize import minimize
from sklearn.metrics import mean_squared_error
from math import sqrt


models = []
predictions = []
p_k = []

for col in data.columns:
    X = sm.add_constant(data[[col]])  
    model = sm.OLS(y_data, X).fit()
    models.append(model)
    predictions.append(model.predict(X))
    p_k.append(len(model.params))  # Number of parameters for each model

# Convert predictions to a matrix (Y in the formula)
Y = np.column_stack(predictions)

# Ensure y_data is a numpy array
y_data_np = y_data.to_numpy()

# Define the objective function C_K(beta) as described
def CK(beta, Y, y, p_k, S2):
    residuals = y - np.dot(Y, beta)
    penalty = 2 * S2 * np.dot(beta, p_k)
    return np.sum(residuals**2) + penalty

# Estimate S2 (variance of the forecast error) using the model with the most parameters
S2 = np.var(y_data_np - np.dot(Y, np.ones(len(models)) / len(models)))

# Define the constraint that the weights sum to one
constraints = ({'type': 'eq', 'fun': lambda beta: np.sum(beta) - 1})

# Define the bounds to be between 0 and 1 for each beta
bounds = [(0, 1) for _ in range(len(models))]

# Set the initial guess for the weights to random values that sum to 1
np.random.seed(0)  # For reproducibility
initial_guess = np.random.rand(len(models))
initial_guess /= initial_guess.sum()

# Perform the optimization to find the optimal weights using a different method
result = minimize(CK, initial_guess, args=(Y, y_data_np, p_k, S2), constraints=constraints, bounds=bounds, method='trust-constr')

# Get the optimal weights (betas)
betas = result.x

# Print the MMA weights (betas)
print("MMA Weights (Betas) with S^K=1:")
for idx, beta in enumerate(betas, 1):
    print(f"Beta_{idx}: {beta:.4f}")

# Prepare weighted predictions using the betas
weighted_predictions = np.dot(Y, betas)

# Split the data for calibration and evaluation
calibration_obs = y_data_np[:3000]
evaluation_obs = y_data_np[3000:]

# Compute weighted averages for calibration and evaluation
calibration_predictions = weighted_predictions[:3000]
evaluation_predictions = weighted_predictions[3000:]

# Calculate RMSE for calibration and evaluation
rmse_cal = sqrt(mean_squared_error(calibration_obs, calibration_predictions))
rmse_eval = sqrt(mean_squared_error(evaluation_obs, evaluation_predictions))

# Print RMSE results
print("\nCalibration RMSE:", rmse_cal)
print("Evaluation RMSE:", rmse_eval)


  self.H.update(delta_x, delta_g)
  self.H.update(self.x - self.x_prev, self.g - self.g_prev)


MMA Weights (Betas) with S^K=1:
Beta_1: 0.6353
Beta_2: 0.0000
Beta_3: 0.0000
Beta_4: 0.0000
Beta_5: 0.3647
Beta_6: 0.0000
Beta_7: 0.0000
Beta_8: 0.0000

Calibration RMSE: 26.362207397556055
Evaluation RMSE: 39.84748976888232


In [19]:
# Perform the optimization to find the optimal weights without the constraint
result_no_constraint = minimize(CK, initial_guess, args=(Y, y_data_np, p_k, S2), method='trust-constr')

# Get the optimal weights (betas) without constraints
betas_no_constraint = result_no_constraint.x

# Print the MMA weights (betas)
print("\nMMA Weights (Betas) with S^K=0:")
for idx, beta in enumerate(betas_no_constraint, 1):
    print(f"Beta_{idx}: {beta:.4f}")

# Prepare weighted predictions using the betas without constraints
weighted_predictions_no_constraint = np.dot(Y, betas_no_constraint)

# Compute weighted averages for calibration and evaluation without constraints
calibration_predictions_no_constraint = weighted_predictions_no_constraint[:3000]
evaluation_predictions_no_constraint = weighted_predictions_no_constraint[3000:]

# Calculate RMSE for calibration and evaluation without constraints
rmse_cal_no_constraint = sqrt(mean_squared_error(calibration_obs, calibration_predictions_no_constraint))
rmse_eval_no_constraint = sqrt(mean_squared_error(evaluation_obs, evaluation_predictions_no_constraint))

# Print RMSE results without constraints
print("\nCalibration RMSE without constraints:", rmse_cal_no_constraint)
print("Evaluation RMSE without constraints:", rmse_eval_no_constraint)



MMA Weights (Betas) with S^K=0:
Beta_1: 0.3209
Beta_2: 0.3146
Beta_3: -0.1832
Beta_4: -0.1744
Beta_5: 0.4205
Beta_6: -0.1441
Beta_7: 0.3207
Beta_8: -0.4843

Calibration RMSE without constraints: 39.31996445912326
Evaluation RMSE without constraints: 64.8128086266838


<p><span style="font-size: 18pt;"><strong>Basyn Model Avergaing&nbsp;</strong></span></p>

In [20]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.optimize import differential_evolution
from scipy.stats import norm
from sklearn.metrics import mean_squared_error
from math import sqrt
# Log liklihood 
def log_likelihood(theta, Y, y):
    K = Y.shape[1]
    betas = theta[:K]
    sigmas = theta[K:2*K]
    n = len(y)
    
    # Compute  log-likelihood
    log_lik = 0
    for t in range(n):
        pred_density = 0
        for k in range(K):
            pred_density += betas[k] * norm.pdf(y[t], loc=Y[t, k], scale=sigmas[k])
        log_lik += np.log(pred_density)
    
    return -log_lik  # Return negative log-likelihood for minimization


In [21]:
# Fit models for the entire dataset, one per column
models = []
predictions = []
p_k = []

for col in data.columns:
    X = sm.add_constant(data[[col]])  # Use double brackets to ensure DataFrame format
    model = sm.OLS(y_data, X).fit()
    models.append(model)
    predictions.append(model.predict(X))
    p_k.append(len(model.params))  

# Convert predictions to a matrix (Y in the formula)
Y = np.column_stack(predictions)

# Ensure y_data is a numpy array
y_data_np = y_data.to_numpy()

# Set the initial values for theta
K = len(models)
initial_betas = np.ones(K) / K
initial_sigmas = np.std(Y, axis=0) * 5
initial_theta = np.concatenate([initial_betas, initial_sigmas])


In [22]:
# Define the bounds for DE-MC
bounds = [(0, 1)] * K + [(0, 5 * np.std(y_data_np))] * K

# Normalize function
def normalize_theta(theta):
    K = len(theta) // 2
    betas = theta[:K]
    normalized_betas = betas / np.sum(betas)
    return np.concatenate([normalized_betas, theta[K:]])

# DE-MC optimization
def demc_optimization(Y, y, bounds, iterations=10000, popsize=15):
    # Initialize population
    population = []
    for _ in range(popsize):
        theta = np.random.uniform(low=[b[0] for b in bounds], high=[b[1] for b in bounds])
        theta = normalize_theta(theta)
        population.append(theta)
    
    population = np.array(population)
    
    # Main DE-MC loop
    for iter in range(iterations):
        new_population = []
        for i in range(popsize):
            indices = np.random.choice(range(popsize), 3, replace=False)
            a, b, c = population[indices]
            mutation = a + 0.8 * (b - c)
            mutation = np.clip(mutation, [b[0] for b in bounds], [b[1] for b in bounds])
            mutation = normalize_theta(mutation)
            new_population.append(mutation)
        
        population = np.array(new_population)
    
    # Find the best solution
    best_theta = min(population, key=lambda theta: log_likelihood(theta, Y, y))
    return best_theta

# Run DE-MC optimization
best_theta = demc_optimization(Y, y_data_np, bounds)
betas_bma = best_theta[:K]
sigmas_bma = best_theta[K:]


  x = np.asarray((x - loc)/scale, dtype=dtyp)


In [23]:
# Print the BMA weights (betas)
print("BMA Weights (Betas):")
for idx, beta in enumerate(betas_bma, 1):
    print(f"Beta_{idx}: {beta:.4f}")

# Prepare weighted predictions using the betas
weighted_predictions_bma = np.dot(Y, betas_bma)

# Split the data for calibration and evaluation
calibration_obs = y_data_np[:3000]
evaluation_obs = y_data_np[3000:]

# Compute weighted averages for calibration and evaluation
calibration_predictions_bma = weighted_predictions_bma[:3000]
evaluation_predictions_bma = weighted_predictions_bma[3000:]

# Calculate RMSE for calibration and evaluation
rmse_cal_bma = sqrt(mean_squared_error(calibration_obs, calibration_predictions_bma))
rmse_eval_bma = sqrt(mean_squared_error(evaluation_obs, evaluation_predictions_bma))

# Print RMSE results
print("\nCalibration RMSE:", rmse_cal_bma)
print("Evaluation RMSE:", rmse_eval_bma)


BMA Weights (Betas):
Beta_1: 0.0695
Beta_2: 0.0253
Beta_3: 0.0000
Beta_4: 0.0212
Beta_5: 0.0628
Beta_6: 0.4570
Beta_7: 0.0008
Beta_8: 0.3633

Calibration RMSE: 17.57596825950873
Evaluation RMSE: 26.63719861156392
