This Jupyter-Notebook works as a place to dump all lines of code that has been used over the process of this thesis but are not used in the main code anymore. This includes in particular the definition of function to create pie charts or the hyperparameter tuning.

##### Functions

In [None]:
def plot_efficient_frontier(mu, S, cum_returns_portfolio, title):
    ef = EfficientFrontier(mu, S)
    ef.add_constraint(lambda w: w[0] == 0.00)
    fig, ax = plt.subplots()
    ef_max_sharpe = ef.deepcopy()
    ef_min_vol = copy.deepcopy(ef)
    plotting.plot_efficient_frontier(ef, ax=ax, show_assets=False)

    # Find the tangency portfolio with max Sharpe ratio
    ef_max_sharpe.max_sharpe()
    weights_max = ef_max_sharpe.clean_weights()
    ret_max = (cum_returns_portfolio * weights_max).sum(axis=1)
    std_max = ret_max.std()
    ax.scatter(std_max, ret_max.iloc[-1], marker="*", s=100, c="r", label="Max Sharpe")

    # Find the minimum volatility portfolio
    ef_min_vol.min_volatility()
    weights_min = ef_min_vol.clean_weights()
    ret_min = (cum_returns_portfolio * weights_min).sum(axis=1)
    std_min = ret_min.std()
    ax.scatter(std_min, ret_min.iloc[-1], marker="*", s=100, c="g", label="Min Volatility")    
    
    # compute the efficient frontier mathematically while using same optimization objective as the EfficientFrontier class without using pyportfolioopt
    """
    # Compute the efficient frontier
    """

    # save the return and volatility of max sharpe and min vola in two lists to a csv file naming them after mu

    
    
    # Generate random portfolios
    n_samples = 10000
    w = np.random.dirichlet(np.ones(ef.n_assets), n_samples)
    rets = w.dot(ef.expected_returns)
    stds = np.sqrt(np.diag(w @ S @ w.T))
    sharpes = rets / stds
    ax.scatter(stds, rets, marker=".", c=sharpes, cmap="viridis_r")

    weights_max = pd.DataFrame.from_dict(weights_max, orient='index')
    weights_max.columns = ['Max Sharpe']
    weights_max = weights_max.T
    weights_min = pd.DataFrame.from_dict(weights_min, orient='index')
    weights_min.columns = ['Min Volatility']
    weights_min = weights_min.T
    
    
    # Generate a table with the weights of the max Sharpe ratio portfolio
    weights = pd.concat([weights_max, weights_min])
    # drop the rows with 0 weights in both portfolios
    weights = weights.loc[:, (weights != 0).any(axis=0)]
    # add a column that sums the weights per row
    weights['Sum'] = weights.sum(axis=1)

    # Display the clean table
    print("Weights:\n", weights.to_string(index=True, float_format='{:.2%}'.format))

    # Display the return and volatility of the max Sharpe ratio and min volatility portfolios
    performance = pd.DataFrame({"Return": [ret_max.iloc[-1]/100, ret_min.iloc[-1]/100],
                                "Volatility": [std_max/100, std_min/100]},
                               index=["Max Sharpe", "Min Volatility"])

    # Display the performance table
    print("\nPerformance:\n", performance.to_string(float_format='{:.2%}'.format))

    # Print table as LaTeX
    #print("\nWeights (LaTeX):\n", weights.transpose().to_latex(index=True, float_format='{:.2%}'.format))
    #print("\nPerformance (LaTeX):\n", performance.transpose().to_latex(float_format='{:.2%}'.format))

    # Output
    ax.set_title(title)
    ax.legend()
    plt.tight_layout()
    plt.show();

In [None]:
def generate_pie_chart(weights, title):
    cleaned_weights = {k: v for k, v in weights.items() if v > 1e-5}

    plt.figure(figsize=(10, 5))
    # Generate the pie chart without labels inside the slices
    wedges, _, _ = plt.pie(cleaned_weights.values(), labels=[''] * len(cleaned_weights), autopct='%1.2f%%', startangle=90)

    # Create a legend with the cleaned weights keys
    plt.legend(wedges, cleaned_weights.keys(), loc='upper left')

    # Create a legend with the cleaned weights keys
    plt.legend(wedges, cleaned_weights.keys(), loc='upper left')# Equal aspect ratio ensures that pie is drawn as a circle.
    plt.axis('equal')
    plt.title(title)
    plt.show();

##### Linear Regression

In [None]:
"""# the only hyperparameter for the linear regression model is the number of lags
hyperparameter_linear = {
    "lags": list(range(1, 336))
}

# the gridsearch function returns the best model with regard to the metric, which does not necessarily have to be the best model in terms of performance

tuning_linear = LinearRegressionModel.gridsearch(
    parameters= hyperparameter_linear,
    series= train, # The target series used as input and target for training.
    val_series= test,
    start= 0.0, # represents the starting point in the time index of series from which predictions will be made to evaluate the model
    #metric= , # function that takes actual and prediction, and returns a float error value
    n_jobs= -1, # setting the parameter to -1 means using all the available cores
    verbose= True # whether to print progress
)

best_model, best_params = tuning_linear[0], tuning_linear[1]
best_model.model_params

# linear regression model with 316 lags of STOXX Europe 600
linear_model_316 = LinearRegressionModel(lags=316, output_chunk_length=1)
linear_model_316.fit(train)
linear_forecast_316 = linear_model_316.predict(len(test))

# linear regression model with 317 lags of STOXX Europe 600
linear_model_317 = LinearRegressionModel(lags=317, output_chunk_length=1)
linear_model_317.fit(train)
linear_forecast_317 = linear_model_317.predict(len(test))

# linear regression model with 335 lags of STOXX Europe 600
linear_model_335 = LinearRegressionModel(lags=335, output_chunk_length=1)
linear_model_335.fit(train) # ['STOXX Europe 600']
linear_forecast_335 = linear_model_335.predict(len(test))

%store linear_forecast_1
%store linear_forecast_316
%store linear_forecast_317
%store linear_forecast_335

# plot linear forecast with 1, 316 and 335 lags and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
linear_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, linear_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, linear_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, linear_forecast_1), 2)) + '%, MAPE: ' + str(round(mape(test, linear_forecast_1), 2)) + '%')
linear_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, linear_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, linear_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, linear_forecast_316), 2)) + '%, MAPE: ' + str(round(mape(test, linear_forecast_316), 2)) + '%')
linear_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, linear_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, linear_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, linear_forecast_317), 2)) + '%, MAPE: ' + str(round(mape(test, linear_forecast_317), 2)) + '%')
linear_forecast_335['STOXX Europe 600'].plot(label='335 lags, MAPE: ' + str(round(mape(test, linear_forecast_335), 2)) + '%, R2 score: ' + str(round(r2_score(test, linear_forecast_335), 2)) + '%, MSE: ' + str(round(mse(test, linear_forecast_335), 2)) + '%, MAPE: ' + str(round(mape(test, linear_forecast_335), 2)) + '%')
# rewrite to see only one line
cum_returns['STOXX Europe 600'].plot(label='actual')
# show the legend outside the plot 
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

# second linear regression model with 316 lags
hf2_linear_regression = linear_model_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# third linear regression model with 317 lags
hf3_linear_regression = linear_model_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# fourth linear regression model with 335 lags
hf4_linear_regression = linear_model_335.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

hf2_linear_regression['STOXX Europe 600'].plot(label='Linear Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_linear_regression, cum_returns), r2_score(hf2_linear_regression, cum_returns), mse(hf2_linear_regression, cum_returns)))
hf3_linear_regression['STOXX Europe 600'].plot(label='Linear Regression with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_linear_regression, cum_returns), r2_score(hf3_linear_regression, cum_returns), mse(hf3_linear_regression, cum_returns)))
hf4_linear_regression['STOXX Europe 600'].plot(label='Linear Regression with 335 lags (MAPE: {:.2f}%, R2: {:.2f}%, MSE: {:.2f})'.format(mape(hf4_linear_regression, cum_returns), r2_score(hf4_linear_regression, cum_returns), mse(hf4_linear_regression, cum_returns)))

linear_forecast_316 = linear_forecast_316.pd_dataframe()

mu_linear_regression_316 = expected_returns.mean_historical_return(linear_forecast_316)
S_linear_regression_316 = risk_models.sample_cov(linear_forecast_316)


ef_linear_regression_316 = EfficientFrontier(mu_linear_regression_316, S_linear_regression_316)
ef_linear_regression_316.add_constraint(lambda w: w[0] == 0)
weights_lr_316 = ef_linear_regression_316.max_sharpe()

# show the weights of the optimal portfolio cleaned
cleaned_weights_lr_1 = ef_linear_regression_1.clean_weights()
# print 
print('The weights of the optimal portfolio with 1 lag are: ' + str(cleaned_weights_lr_1))

lr_returns_316 = (cum_returns_portfolio * weights_lr_316).sum(axis=1)
portfolio_returns['linear_regression_316'] = lr_returns_316

#show the last value of each portfolio in percent and round two 2 decimals
print('Last value of the STOXX Europe 600: ' + str(round(cum_returns_portfolio['STOXX Europe 600'].iloc[-1], 2)) + '%')
print('Last value of the equal weighted portfolio: ' + str(round(portfolio_returns['equal_weighted'].iloc[-1], 2)) + '%')
print('Last value of the historical average portfolio: ' + str(round(portfolio_returns['historical_average'].iloc[-1], 2)) + '%')
print('Last value of the linear regression portfolio with 1 lag: ' + str(round(portfolio_returns['linear_regression_1'].iloc[-1], 2)) + '%')
print('Last value of the linear regression portfolio with 316 lags: ' + str(round(portfolio_returns['linear_regression_316'].iloc[-1], 2)) + '%')"""

##### Ridge Regression

In [None]:
"""hyperparameter_ridge = {
    "lags": list(range(1,336,2)),
    'model': [
        sklearn.linear_model.Ridge(alpha=a) for a in np.arange(0, 1.1, 0.1)
    ]
}

ridge_regression_tune = RegressionModel(
    lags= hyperparameter_ridge['lags'][0],
    model=sklearn.linear_model.Ridge(0)
)

tuning_ridge = ridge_regression_tune.gridsearch(
    parameters= hyperparameter_ridge,
    series= train,
    val_series= test,
    start= 0.1,
    #metric= mse,
    n_jobs= -1,
    verbose= True
)

best_model, best_params = tuning_ridge[0], tuning_ridge[1]
best_model.model_params

# ridge regression model with 316 lags of STOXX Europe 600
ridge_model_316 =RegressionModel(lags=316, model=sklearn.linear_model.Ridge(alpha=1))
ridge_model_316.fit(train)
ridge_forecast_316 = ridge_model_316.predict(len(test))

# ridge regression model with 317 lags of STOXX Europe 600
ridge_model_317 = RegressionModel(lags=317, model=sklearn.linear_model.Ridge(alpha=1))
ridge_model_317.fit(train)
ridge_forecast_317 = ridge_model_317.predict(len(test))

# ridge regression model with 335 lags of STOXX Europe 600
ridge_model_335 = RegressionModel(lags=335, model=sklearn.linear_model.Ridge(alpha=1))
ridge_model_335.fit(train)
ridge_forecast_335 = ridge_model_335.predict(len(test))

# plot the ridge forecast with 1, 316, 317 and 335 lags and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
ridge_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, ridge_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, ridge_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, ridge_forecast_1), 2)) + '%')
ridge_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, ridge_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, ridge_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, ridge_forecast_316), 2)) + '%')
ridge_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, ridge_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, ridge_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, ridge_forecast_317), 2)) + '%')
ridge_forecast_335['STOXX Europe 600'].plot(label='335 lags, MAPE: ' + str(round(mape(test, ridge_forecast_335), 2)) + '%, R2 score: ' + str(round(r2_score(test, ridge_forecast_335), 2)) + '%, MSE: ' + str(round(mse(test, ridge_forecast_335), 2)) + '%')

# rewrite to see only one line
cum_returns['STOXX Europe 600'].plot(label='actual')

# second ridge regression model with 316 lags
#hf2_ridge_regression = ridge_model_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# third ridge regression model with 317 lags
#hf3_ridge_regression = ridge_model_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# fourth ridge regression model with 335 lags
#hf4_ridge_regression = ridge_model_335.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

#hf2_ridge_regression['STOXX Europe 600'].plot(label='Ridge Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_ridge_regression, cum_returns), r2_score(hf2_ridge_regression, cum_returns), mse(hf2_ridge_regression, cum_returns)))
#hf3_ridge_regression['STOXX Europe 600'].plot(label='Ridge Regression with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_ridge_regression, cum_returns), r2_score(hf3_ridge_regression, cum_returns), mse(hf3_ridge_regression, cum_returns)))
#hf4_ridge_regression['STOXX Europe 600'].plot(label='Ridge Regression with 335 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf4_ridge_regression, cum_returns), r2_score(hf4_ridge_regression, cum_returns), mse(hf4_ridge_regression, cum_returns)))

ridge_forecast_316 = ridge_forecast_316.pd_dataframe()
mu_ridge_regression_316 = expected_returns.mean_historical_return(ridge_forecast_316)
S_ridge_regression_316 = risk_models.sample_cov(ridge_forecast_316)

ef_ridge_regression_316 = EfficientFrontier(mu_ridge_regression_316, S_ridge_regression_316)
ef_ridge_regression_316.add_constraint(lambda w: w[0] == 0)
weights_rr_316 = ef_ridge_regression_316.max_sharpe()

print('Weights for ridge regression with 316 lags: ' + str(np.round(pd.Series(weights_rr_316), 3)))

rr_returns_316 = (cum_returns_portfolio * weights_rr_316).sum(axis=1)
portfolio_returns['ridge_regression_316'] = rr_returns_316

# plot the cumulative returns of the benchmark and the ridge regression portfolio
plt.figure(figsize=(15,10))
plt.plot(cum_returns_portfolio['STOXX Europe 600'], label='STOXX Europe 600')
plt.plot(portfolio_returns['equal_weighted'], label='Equal Weighted Portfolio')
plt.plot(portfolio_returns['historical_average'], label='Historical Average Portfolio')
plt.plot(portfolio_returns['ridge_regression_1'], label='Ridge Regression Portfolio with 1 lag')
plt.plot(portfolio_returns['ridge_regression_316'], label='Ridge Regression Portfolio with 316 lags')
plt.legend()
plt.show();"""

#### LASSO Regression

In [None]:
"""hyperparameter_lasso = {
    "lags": list(range(1,2)),
    'model': [
        sklearn.linear_model.Lasso(alpha=a) for a in np.arange(0, 1.1, 0.01)
    ]
}

lasso_regression_tune = RegressionModel(
    lags= hyperparameter_lasso['lags'][0],
    model=sklearn.linear_model.Lasso(0)
)

tuning_lasso = lasso_regression_tune.gridsearch(
    parameters= hyperparameter_lasso,
    series= train,
    val_series= test,
    start= 0.1,
    #metric= mape,
    n_jobs= -1,
    verbose= True
)

best_model, best_params = tuning_lasso[0], tuning_lasso[1]
best_model.model_params


# lasso regression model with 316 lags of STOXX Europe 600
lasso_model_316 = RegressionModel(lags=316, model=sklearn.linear_model.Lasso(alpha=0))
lasso_model_316.fit(train)
lasso_forecast_316 = lasso_model_316.predict(len(test))
# lasso regression model with 317 lags of STOXX Europe 600
lasso_model_317 = RegressionModel(lags=317, model=sklearn.linear_model.Lasso(alpha=0))
lasso_model_317.fit(train)
lasso_forecast_317 = lasso_model_317.predict(len(test))
# lasso regression model with 335 lags of STOXX Europe 600
lasso_model_335 = RegressionModel(lags=335, model=sklearn.linear_model.Lasso(alpha=0))
lasso_model_335.fit(train)
lasso_forecast_335 = lasso_model_335.predict(len(test))

# second lasso regression model with 316 lags
hf2_lasso_regression = lasso_model_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# third lasso regression model with 317 lags
hf3_lasso_regression = lasso_model_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# fourth lasso regression model with 335 lags
hf4_lasso_regression = lasso_model_335.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

#hf2_lasso_regression['STOXX Europe 600'].plot(label='Lasso Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_lasso_regression, cum_returns), r2_score(hf2_lasso_regression, cum_returns), mse(hf2_lasso_regression, cum_returns)))
#hf3_lasso_regression['STOXX Europe 600'].plot(label='Lasso Regression with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_lasso_regression, cum_returns), r2_score(hf3_lasso_regression, cum_returns), mse(hf3_lasso_regression, cum_returns)))
#hf4_lasso_regression['STOXX Europe 600'].plot(label='Lasso Regression with 335 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf4_lasso_regression, cum_returns), r2_score(hf4_lasso_regression, cum_returns), mse(hf4_lasso_regression, cum_returns)))

lasso_forecast_316 = lasso_forecast_316.pd_dataframe()

mu_lasso_regression_316 = expected_returns.mean_historical_return(lasso_forecast_316)
S_lasso_regression_316 = risk_models.sample_cov(lasso_forecast_316)

ef_lasso_regression_316 = EfficientFrontier(mu_lasso_regression_316, S_lasso_regression_316)
ef_lasso_regression_316.add_constraint(lambda w: w[0] == 0)
weights_lar_316 = ef_lasso_regression_316.max_sharpe()

print('Weights for lasso regression with 316 lags: ' + str(np.round(pd.Series(weights_lar_316), 3)))

# plot the cumulative returns of the benchmark and the lasso regression portfolio
plt.figure(figsize=(15,10))
plt.plot(cum_returns_portfolio['STOXX Europe 600'], label='STOXX Europe 600')
plt.plot(portfolio_returns['equal_weighted'], label='Equal Weighted Portfolio')
plt.plot(portfolio_returns['historical_average'], label='Historical Average Portfolio')
plt.plot(portfolio_returns['lasso_regression_1'], label='Lasso Regression with 1 lag')
plt.plot(portfolio_returns['lasso_regression_316'], label='Lasso Regression with 316 lags')
plt.legend(loc='upper left', fontsize=12)
plt.show();

lar_returns_316 = (cum_returns_portfolio * weights_lar_316).sum(axis=1)
portfolio_returns['lasso_regression_316'] = lar_returns_316"""

#### Elastic Net

In [1]:
"""hyperparameter_elastic = {
    "lags": list(range(1,2)),
    'model': [
        sklearn.linear_model.ElasticNet(alpha=a) for a in np.arange(0, 1.1, 0.01)
    ]
}"""

"""elastic_net_tune = RegressionModel(
    lags= hyperparameter_elastic['lags'][0],
    model=sklearn.linear_model.ElasticNet(0)
)"""

"""tuning_elastic_net = elastic_net_tune.gridsearch(
    parameters= hyperparameter_elastic,
    series= train,
    val_series= test,
    start= 0.1,
   # metric= mape,
    n_jobs= -1,
    verbose= True
)"""

"""best_model, best_params = tuning_elastic_net[0], tuning_elastic_net[1]
# print the best model parameters
best_model.model_params"""

"""# I have tested the model with different lags, which have shown the best results for mape and mse
# elastic net regression model with 1 lag of STOXX Europe 600
elastic_model_1 = RegressionModel(lags=1, model=sklearn.linear_model.ElasticNet(alpha=0))
elastic_model_1.fit(train)
elastic_forecast_1 = elastic_model_1.predict(len(test))

# elastic net regression model with 316 lags of STOXX Europe 600
elastic_model_316 = RegressionModel(lags=316, model=sklearn.linear_model.ElasticNet(alpha=0))
elastic_model_316.fit(train)
elastic_forecast_316 = elastic_model_316.predict(len(test))

# elastic net regression model with 317 lags of STOXX Europe 600
elastic_model_317 = RegressionModel(lags=317, model=sklearn.linear_model.ElasticNet(alpha=0))
elastic_model_317.fit(train)
elastic_forecast_317 = elastic_model_317.predict(len(test))

# elastic net regression model with 335 lags of STOXX Europe 600
elastic_model_335 = RegressionModel(lags=335, model=sklearn.linear_model.ElasticNet(alpha=0))
elastic_model_335.fit(train)
elastic_forecast_335 = elastic_model_335.predict(len(test))"""

"""# plot the elastic net forecast with 1, 316, 317 and 335 lags and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
elastic_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, elastic_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, elastic_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, elastic_forecast_1), 2)) + '%')
elastic_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, elastic_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, elastic_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, elastic_forecast_316), 2)) + '%')
elastic_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, elastic_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, elastic_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, elastic_forecast_317), 2)) + '%')
elastic_forecast_335['STOXX Europe 600'].plot(label='335 lags, MAPE: ' + str(round(mape(test, elastic_forecast_335), 2)) + '%, R2 score: ' + str(round(r2_score(test, elastic_forecast_335), 2)) + '%, MSE: ' + str(round(mse(test, elastic_forecast_335), 2)) + '%')

# rewrite to see only one line
cum_returns['STOXX Europe 600'].plot(label='actual')"""

"""# first elastic net regression model with 1 lag
hf1_elastic_net_regression = elastic_model_1.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

# second elastic net regression model with 316 lags
hf2_elastic_net_regression = elastic_model_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

# third elastic net regression model with 317 lags
#hf3_elastic_net_regression = elastic_model_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

# fourth elastic net regression model with 335 lags
#hf4_elastic_net_regression = elastic_model_335.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)"""

"""# compute historical forecasts that would have been obtained by this model
cum_returns['STOXX Europe 600'].plot(label='STOXX Europe 600')
hf1_elastic_net_regression['STOXX Europe 600'].plot(label='Elastic Net Regression with 1 lag (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf1_elastic_net_regression, cum_returns), r2_score(hf1_elastic_net_regression, cum_returns), mse(hf1_elastic_net_regression, cum_returns)))
hf2_elastic_net_regression['STOXX Europe 600'].plot(label='Elastic Net Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_elastic_net_regression, cum_returns), r2_score(hf2_elastic_net_regression, cum_returns), mse(hf2_elastic_net_regression, cum_returns)))
#hf3_elastic_net_regression['STOXX Europe 600'].plot(label='Elastic Net Regression with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_elastic_net_regression, cum_returns), r2_score(hf3_elastic_net_regression, cum_returns), mse(hf3_elastic_net_regression, cum_returns)))
#hf4_elastic_net_regression['STOXX Europe 600'].plot(label='Elastic Net Regression with 335 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf4_elastic_net_regression, cum_returns), r2_score(hf4_elastic_net_regression, cum_returns), mse(hf4_elastic_net_regression, cum_returns)))"""

"""# change type of the elastic net forecasts to a pandas dataframe
elastic_forecast_1 = elastic_forecast_1.pd_dataframe()
elastic_forecast_316 = elastic_forecast_316.pd_dataframe()"""

"""# perform mean-variance optimization
mu_elastic_net_regression_1 = expected_returns.mean_historical_return(elastic_forecast_1)
S_elastic_net_regression_1 = risk_models.sample_cov(elastic_forecast_1)

mu_elastic_net_regression_316 = expected_returns.mean_historical_return(elastic_forecast_316)
S_elastic_net_regression_316 = risk_models.sample_cov(elastic_forecast_316)"""

"""# optimize for maximal Sharpe ratio
ef_elastic_net_regression_1 = EfficientFrontier(mu_elastic_net_regression_1, S_elastic_net_regression_1)
ef_elastic_net_regression_1.add_constraint(lambda w: w[0] == 0)
weights_enr_1 = ef_elastic_net_regression_1.max_sharpe()

ef_elastic_net_regression_316 = EfficientFrontier(mu_elastic_net_regression_316, S_elastic_net_regression_316)
ef_elastic_net_regression_316.add_constraint(lambda w: w[0] == 0)
weights_enr_316 = ef_elastic_net_regression_316.max_sharpe()"""

"""# print the weights in a table and round them to 4 decimals
print('Weights for Elastic Net regression with 1 lag: ' + str(np.round(pd.Series(weights_enr_1), 3)))
print('Weights for Elastic Net regression with 316 lags: ' + str(np.round(pd.Series(weights_enr_316), 3)))"""

"""# compute the returns of elastic net regression portfolio
enr_returns_1 = (cum_returns_portfolio * weights_enr_1).sum(axis=1)
enr_returns_316 = (cum_returns_portfolio * weights_enr_316).sum(axis=1)

portfolio_returns['Elastic_Net_1'] = enr_returns_1
portfolio_returns['Elastic_Net_316'] = enr_returns_316"""

"""# plot the cumulative returns of the benchmark and the elastic net regression portfolio
plt.figure(figsize=(15,10))
plt.plot(cum_returns_portfolio['STOXX Europe 600'], label='STOXX Europe 600')
plt.plot(portfolio_returns['equal_weighted'], label='Equal Weighted Portfolio')
plt.plot(portfolio_returns['historical_average'], label='Historical Average Portfolio')
plt.plot(portfolio_returns['Elastic_Net_1'], label='Elastic Net Regression with 1 lag')
plt.plot(portfolio_returns['Elastic_Net_316'], label='Elastic Net Regression with 316 lags')
plt.legend(loc='upper left')
plt.show();"""

"""# show the last value of each portfolio and round it to 2 decimal places
print('Last value of the STOXX Europe 600: ' + str(np.round(cum_returns_portfolio['STOXX Europe 600'].iloc[-1], 2)) + '%')
print('Last value of the equal weighted portfolio: ' + str(np.round(portfolio_returns['equal_weighted'].iloc[-1], 2)) + '%')
print('Last value of the historical average portfolio: ' + str(np.round(portfolio_returns['historical_average'].iloc[-1], 2)) + '%')
print('Last value of the ridge regression portfolio with 1 lag: ' + str(np.round(portfolio_returns['ridge_regression_1'].iloc[-1], 12)) + '%')
print('Last value of the ridge regression portfolio with 316 lags: ' + str(np.round(portfolio_returns['ridge_regression_316'].iloc[-1], 5)) + '%')
# also for the linear regression portfolio
print('Last value of the linear regression portfolio: ' + str(np.round(portfolio_returns['linear_regression_1'].iloc[-1], 12)) + '%')
print('Last value of the linear regression portfolio: ' + str(np.round(portfolio_returns['linear_regression_316'].iloc[-1], 5)) + '%')
# also for the lasso regression portfolio
print('Last value of the lasso regression portfolio: ' + str(np.round(portfolio_returns['lasso_regression_1'].iloc[-1], 2)) + '%')
print('Last value of the lasso regression portfolio: ' + str(np.round(portfolio_returns['lasso_regression_316'].iloc[-1], 2)) + '%')
# also for the elastic net regression portfolio
print('Last value of the elastic net regression portfolio with 1 lag: ' + str(np.round(portfolio_returns['Elastic_Net_1'].iloc[-1], 2)) + '%')
print('Last value of the elastic net regression portfolio with 316 lags: ' + str(np.round(portfolio_returns['Elastic_Net_316'].iloc[-1], 2)) + '%')"""

'best_model, best_params = tuning_elastic_net[0], tuning_elastic_net[1]\n# print the best model parameters\nbest_model.model_params'

### Bridge Regression

In [None]:
"""hyperparameter_bridge = {
    "lags": list(range(1,336, 10)),
    'model': [
        sklearn.linear_model.LassoLarsCV(cv=c, n_jobs=-1) 
        for c in np.arange(2, 13, 1)
    ]
}"""

"""bridge_regression_tune = RegressionModel(
    lags= hyperparameter_bridge['lags'][0],
    model=sklearn.linear_model.LassoLarsCV(cv=0, n_jobs=-1)
)"""

"""tuning_bridge = bridge_regression_tune.gridsearch(
    parameters= hyperparameter_bridge,
    series= train,
    val_series= test,
    start= 0.1,
    #metric= mape,
    n_jobs= -1,
    verbose= True
)"""

"""best_model, best_params = tuning_bridge[0], tuning_bridge[1]
# print the best model parameters
best_model.model_params"""

"""# I have tested the model with different lags, which have shown the best results for mape and mse
# bridge regression model with 1 lag of STOXX Europe 600
bridge_regression_1 = RegressionModel(lags= 1, model=sklearn.linear_model.LassoLarsCV(cv=2, n_jobs=-1))
bridge_regression_1.fit(train)
bridge_forecast_1 = bridge_regression_1.predict(len(test))

# bridge regression model with 316 lags of STOXX Europe 600
bridge_regression_316 = RegressionModel(lags= 316, model=sklearn.linear_model.LassoLarsCV(cv=2, n_jobs=-1))
bridge_regression_316.fit(train)
bridge_forecast_316 = bridge_regression_316.predict(len(test))

# bridge regression model with 317 lags of STOXX Europe 600
bridge_regression_317 = RegressionModel(lags= 317, model=sklearn.linear_model.LassoLarsCV(cv=2, n_jobs=-1))
bridge_regression_317.fit(train)
bridge_forecast_317 = bridge_regression_317.predict(len(test))"""

"""# plot the bridge forecast with 1, 316, 317  lags and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
bridge_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, bridge_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, bridge_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, bridge_forecast_1), 2)) + '%')
bridge_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, bridge_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, bridge_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, bridge_forecast_316), 2)) + '%')
bridge_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, bridge_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, bridge_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, bridge_forecast_317), 2)) + '%')

# rewrite to see only one line
cum_returns['STOXX Europe 600'].plot(label='actual')"""

"""# first bridge regression model with 1 lag
hf1_bridge_regression = bridge_regression_1.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# second bridge regression model with 316 lags
#hf2_bridge_regression = bridge_regression_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# third bridge regression model with 317 lags
#hf3_bridge_regression = bridge_regression_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)"""

"""# compute historical forecasts that would have been obtained by this model
cum_returns['STOXX Europe 600'].plot(label='STOXX Europe 600')
hf1_bridge_regression['STOXX Europe 600'].plot(label='Bridge Regression with 1 lag (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf1_bridge_regression, cum_returns), r2_score(hf1_bridge_regression, cum_returns), mse(hf1_bridge_regression, cum_returns)))
#hf2_bridge_regression['STOXX Europe 600'].plot(label='Bridge Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_bridge_regression, cum_returns), r2_score(hf2_bridge_regression, cum_returns), mse(hf2_bridge_regression, cum_returns)))
#hf3_bridge_regression['STOXX Europe 600'].plot(label='Bridge Regression with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_bridge_regression, cum_returns), r2_score(hf3_bridge_regression, cum_returns), mse(hf3_bridge_regression, cum_returns)))
#hf4_bridge_regression['STOXX Europe 600'].plot(label='Bridge Regression with 335 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf4_bridge_regression, cum_returns), r2_score(hf4_bridge_regression, cum_returns), mse(hf4_bridge_regression, cum_returns)))"""

"""# change the type of the bridge forecast to pandas dataframe
bridge_forecast_1 = bridge_forecast_1.pd_dataframe()"""

"""# perform mean-variance optimization
mu_bridge_regression_1 = expected_returns.mean_historical_return(bridge_forecast_1)
S_bridge_regression_1 = risk_models.sample_cov(bridge_forecast_1)"""

"""# optimize for maximal Sharpe ratio
ef_bridge_regression_1 = EfficientFrontier(mu_bridge_regression_1, S_bridge_regression_1)
ef_bridge_regression_1.add_constraint(lambda w: w[0] == 0)
weights_br_1 = ef_bridge_regression_1.max_sharpe()"""

"""# print the weights in a table and round them to 4 decimals
print('Weights for Bridge regression with 1 lag: ' + str(np.round(pd.Series(weights_br_1), 3)))"""

"""# compute the returns of bridge regression portfolio
br_returns_1 = (cum_returns_portfolio * weights_br_1).sum(axis=1)

portfolio_returns['bridge_regression_1'] = br_returns_1"""

"""# plot the cumulative returns of the benchmark and the bridge regression portfolio
plt.figure(figsize=(15,10))
plt.plot(cum_returns_portfolio['STOXX Europe 600'], label='STOXX Europe 600')
plt.plot(portfolio_returns['equal_weighted'], label='Equal Weighted Portfolio')
plt.plot(portfolio_returns['historical_average'], label='Historical Average Portfolio')
plt.plot(portfolio_returns['bridge_regression_1'], label='Bridge Regression with 1 lag')
plt.legend(loc='upper left')
plt.show();"""

### PLS Regression

In [None]:
"""hyperparameter_pls = {
    "lags": list(range(317,318,1)),
    'model': [
        sklearn.cross_decomposition.PLSRegression(n_components=n) for n in np.arange(1, 15, 1)
    ]
}"""

"""pls_regression_tune = RegressionModel(
    lags= hyperparameter_pls['lags'][0],
    model=sklearn.cross_decomposition.PLSRegression(n_components=0)
)"""

"""tuning_pls = pls_regression_tune.gridsearch(
    parameters= hyperparameter_pls,
    series= train,
    val_series= test,
    start= 0.1,
    metric= mse,
    n_jobs= -1,
    verbose= True
)"""

"""# print the best model parameters
best_model, best_params = tuning_pls[0], tuning_pls[1]
best_model.model_params"""

"""
# pls regression model with 316 lags
pls_model_316 = RegressionModel(lags=316, model=sklearn.cross_decomposition.PLSRegression(n_components=1))
pls_model_316.fit(train)
pls_forecast_316 = pls_model_316.predict(len(test))

# pls regression model with 317 lags
pls_model_317 = RegressionModel(lags=317, model=sklearn.cross_decomposition.PLSRegression(n_components=1))
pls_model_317.fit(train)
pls_forecast_317 = pls_model_317.predict(len(test))

# 335 lags was not possible to run

# plot the pls forecast with 1, 316, 317  and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
pls_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, pls_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, pls_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, pls_forecast_1), 2)) + '%')
pls_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, pls_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, pls_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, pls_forecast_316), 2)) + '%')
pls_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, pls_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, pls_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, pls_forecast_317), 2)) + '%')

# rewrite to see only one line
cum_returns['STOXX Europe 600'].plot(label='actual')


# second pls regression model with 316 lags
hf2_pls_regression = pls_model_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

# third pls regression model with 317 lags
#hf3_pls_regression = pls_model_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

#hf2_pls_regression['STOXX Europe 600'].plot(label='PLS Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_pls_regression, cum_returns), r2_score(hf2_pls_regression, cum_returns), mse(hf2_pls_regression, cum_returns)))
#hf3_pls_regression['STOXX Europe 600'].plot(label='PLS Regression with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_pls_regression, cum_returns), r2_score(hf3_pls_regression, cum_returns), mse(hf3_pls_regression, cum_returns)))


pls_forecast_316 = pls_forecast_316.pd_dataframe()


mu_pls_regression_316 = expected_returns.mean_historical_return(pls_forecast_316)
S_pls_regression_316 = risk_models.sample_cov(pls_forecast_316)


ef_pls_regression_316 = EfficientFrontier(mu_pls_regression_316, S_pls_regression_316)
weights_pls_316 = ef_pls_regression_316.max_sharpe()

print('Weights for PLS regression with 316 lags: ' + str(np.round(pd.Series(weights_pls_316), 3)))

pls_returns_316 = (cum_returns_portfolio * weights_pls_316).sum(axis=1)
portfolio_returns['pls_regression_316'] = pls_returns_316

# show the last value of each portfolio in percent and round to 2 decimals
print('Last value of the STOXX Europe 600: ' + str(round(cum_returns_portfolio['STOXX Europe 600'].iloc[-1], 2)) + '%')
print('Last value of the equal weighted portfolio: ' + str(round(portfolio_returns['equal_weighted'].iloc[-1], 2)) + '%')
print('Last value of the historical average portfolio: ' + str(round(portfolio_returns['historical_average'].iloc[-1], 2)) + '%')
print('Last value of the pls regression portfolio with 1 lag: ' + str(round(portfolio_returns['pls_regression_1'].iloc[-1], 2)) + '%')
print('Last value of the pls regression portfolio with 316 lags: ' + str(round(portfolio_returns['pls_regression_316'].iloc[-1], 2)) + '%')
"""

### Canonical PLS

In [None]:
"""hyperparameter_canonical = {
    "lags": list(range(316,317,1)),
    'model': [
        sklearn.cross_decomposition.PLSCanonical(n_components=beta) for beta in np.arange(1, 15,1)
    ]
}"""

"""# pls canonical regression model
pls_canonical_tune = RegressionModel(
    lags=hyperparameter_canonical['lags'][0], 
    model= sklearn.cross_decomposition.PLSCanonical(n_components=0)
    )"""

"""tuning_canonical = pls_canonical_tune.gridsearch(
    parameters= hyperparameter_canonical,
    series= train,
    val_series= test,
    start= 0.1,
    metric= mse,
    n_jobs= -1,
    verbose= True
)"""

"""best_model, best_params = tuning_canonical[0], tuning_canonical[1]
best_model.model_params"""

"""
# canonical pls regression model with 316 lags
canonical_pls_model_316 = RegressionModel(lags=316, model=sklearn.cross_decomposition.PLSCanonical(n_components=2))
canonical_pls_model_316.fit(train)
canonical_pls_forecast_316 = canonical_pls_model_316.predict(len(test))

# canonical pls regression model with 317 lags
canonical_pls_model_317 = RegressionModel(lags=317, model=sklearn.cross_decomposition.PLSCanonical(n_components=14))
canonical_pls_model_317.fit(train)
canonical_pls_forecast_317 = canonical_pls_model_317.predict(len(test))

# canonical pls regression model with 335 lags
#canonical_pls_forecast_335 = RegressionModel(lags=335, model=sklearn.cross_decomposition.PLSCanonical(n_components=1))

# plot the canonical pls forecast with 1, 316, 317 and 335 and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
canonical_pls_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, canonical_pls_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, canonical_pls_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, canonical_pls_forecast_1), 2)) + '%')
#canonical_pls_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, canonical_pls_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, canonical_pls_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, canonical_pls_forecast_316), 2)) + '%')
#canonical_pls_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, canonical_pls_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, canonical_pls_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, canonical_pls_forecast_317), 2)) + '%')
#canonical_pls_forecast_335['STOXX Europe 600'].plot(label='335 lags, MAPE: ' + str(round(mape(test, canonical_pls_forecast_335), 2)) + '%, R2 score: ' + str(round(r2_score(test, canonical_pls_forecast_335), 2)) + '%, MSE: ' + str(round(mse(test, canonical_pls_forecast_335), 2)) + '%')

# rewrite to see only one line
cum_returns['STOXX Europe 600'].plot(label='actual')


# second canonical pls model with 316 lags
#hf2_canonical_pls = canonical_pls_model_316.historical_forecasts(series= cum_returns, start= 0.7, forecast_horizon= 1, verbose= True)

#hf2_canonical_pls['STOXX Europe 600'].plot(label='PLS Canonical Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_canonical_pls, cum_returns), r2_score(hf2_canonical_pls, cum_returns), mse(hf2_canonical_pls, cum_returns)))
"""

### CCA

In [None]:
"""hyperparameter_cca = {
    "lags": list(range(1,2, 1)),
    'model': [
        sklearn.cross_decomposition.CCA(n_components=beta) for beta in np.arange(1, 15,1)
    ]
}"""

"""cca_tune = RegressionModel(
    lags= hyperparameter_cca['lags'][0],
    model=sklearn.cross_decomposition.CCA(n_components=0)
)"""

"""tuning_cca = cca_tune.gridsearch(
    parameters= hyperparameter_cca,
    series= train,
    val_series= test,
    start= 0.3,
    #metric= mape,
    n_jobs= -1,
    verbose= True
)"""

"""# print the best model parameters
best_model, best_params = tuning_cca[0], tuning_cca[1]
best_model.model_params"""

"""
# cca regression model with 316 lags
cca_model_316 = RegressionModel(lags=316, model=sklearn.cross_decomposition.CCA(n_components=1))
cca_model_316.fit(train)
cca_forecast_316 = cca_model_316.predict(len(test))

# cca regression model with 317 lags
cca_model_317 = RegressionModel(lags=317, model=sklearn.cross_decomposition.CCA(n_components=1))
cca_model_317.fit(train)
cca_forecast_317 = cca_model_317.predict(len(test))

# plot the cca forecast with 1, 316 and 317 and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
cca_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, cca_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, cca_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, cca_forecast_1), 2)) + '%')
cca_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, cca_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, cca_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, cca_forecast_316), 2)) + '%')
cca_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, cca_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, cca_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, cca_forecast_317), 2)) + '%')

# rewrite to see only one line
cum_returns['STOXX Europe 600'].plot(label='actual')


# second cca model with 316 lags
#hf2_cca = cca_model_316.historical_forecasts(series= cum_returns, start= 0.7, forecast_horizon= 1, verbose= True)


#hf2_cca['STOXX Europe 600'].plot(label='CCA Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_cca, cum_returns), r2_score(hf2_cca, cum_returns), mse(hf2_cca, cum_returns)))
"""

### Random Forest

In [None]:
"""hyperparameter_randomforest = {
    "lags": [1],
    'n_estimators': list(range(90, 150, 1)),
    'max_depth': [300],
    'random_state': [0]
}"""

"""random_forest_tune = RandomForest(
    lags=hyperparameter_randomforest['lags'][0], 
   n_estimators=hyperparameter_randomforest['n_estimators'][0],
    max_depth=hyperparameter_randomforest['max_depth'][0],
    random_state = 0
    )"""

"""tuning_randomforest = random_forest_tune.gridsearch(
    parameters= hyperparameter_randomforest,
    series= train,
    val_series= test,
    start= 0.1,
    #metric= mse,
    n_jobs= -1,
    verbose= True
)"""

"""# print the best model parameters
best_model, best_params = tuning_randomforest[0], tuning_randomforest[1]
best_model.model_params"""


# random forest regression model with 316 lags
#randomforest_model_316 = RegressionModel(lags=316, model=sklearn.ensemble.RandomForestRegressor(n_estimators=100, max_depth=10000))
#randomforest_model_316.fit(train)
#randomforest_forecast_316 = randomforest_model_316.predict(len(test))

# random forest regression model with 317 lags
#randomforest_model_317 = RegressionModel(lags=317, model=sklearn.ensemble.RandomForestRegressor(n_estimators=100, max_depth=10))
#randomforest_model_317.fit(train)
#randomforest_forecast_317 = randomforest_model_317.predict(len(test))

# random forest regression model with 335 lags
#randomforest_model_335 = RegressionModel(lags=335, model=sklearn.ensemble.RandomForestRegressor(n_estimators=100, max_depth=1))
#randomforest_model_335.fit(train)
#randomforest_forecast_335 = randomforest_model_335.predict(len(test))

# plot the random forest forecast with 1, 316, 317 and 335 lags and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
#randomforest_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, randomforest_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, randomforest_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, randomforest_forecast_1), 2)) + '%')
#randomforest_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, randomforest_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, randomforest_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, randomforest_forecast_316), 2)) + '%')
#randomforest_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, randomforest_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, randomforest_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, randomforest_forecast_317), 2)) + '%')
#randomforest_forecast_335['STOXX Europe 600'].plot(label='335 lags, MAPE: ' + str(round(mape(test, randomforest_forecast_335), 2)) + '%, R2 score: ' + str(round(r2_score(test, randomforest_forecast_335), 2)) + '%, MSE: ' + str(round(mse(test, randomforest_forecast_335), 2)) + '%')

# rewrite to see only one line
#cum_returns['STOXX Europe 600'].plot(label='actual')


# second random forest model with 316 lags
#hf2_random_forest = randomforest_model_316.historical_forecasts(series= cum_returns, start= 0.7, forecast_horizon= 1, verbose= True)

# third random forest model with 317 lags
#hf3_random_forest = randomforest_model_317.historical_forecasts(series= cum_returns, start= 0.7, forecast_horizon= 1, verbose= True)

# fourth random forest model with 335 lags
#hf4_random_forest = randomforest_model_335.historical_forecasts(series= cum_returns, start= 0.7, forecast_horizon= 1, verbose= True)

#hf2_random_forest['STOXX Europe 600'].plot(label='Random Forest with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_random_forest, cum_returns), r2_score(hf2_random_forest, cum_returns), mse(hf2_random_forest, cum_returns)))
#hf3_random_forest['STOXX Europe 600'].plot(label='Random Forest with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_random_forest, cum_returns), r2_score(hf3_random_forest, cum_returns), mse(hf3_random_forest, cum_returns)))
#hf4_random_forest['STOXX Europe 600'].plot(label='Random Forest with 335 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf4_random_forest, cum_returns), r2_score(hf4_random_forest, cum_returns), mse(hf4_random_forest, cum_returns)))

### Gradient Boosting Regressor

In [None]:
"""hyperparameter_gradient_boosting = {
    "lags": [1], #1, 316, 317, 335
    'model': [
        GradientBoostingRegressor(n_estimators=100,learning_rate=l, max_depth=m, random_state=0)   
        #for n in range(1, 100, 5)
        for m in range(1, 501, 10)
        for l in np.arange(0.1, 1, 0.2) 
        
    ]
}"""

"""gradient_boosting_tune = RegressionModel(
    lags= hyperparameter_gradient_boosting['lags'][0],
    model=GradientBoostingRegressor(n_estimators=0, learning_rate=0, max_depth=0, random_state=0)
)"""

"""tuning_gradient_boosting = gradient_boosting_tune.gridsearch(
    parameters= hyperparameter_gradient_boosting,
    series= train,
    val_series= test,
    start= 0.1,
    metric= mse,
    n_jobs= -1,
    verbose= True
)"""

"""# print the best parameters
best_model, best_params = tuning_gradient_boosting[0], tuning_gradient_boosting[1]
best_model.model_params"""


# gradient boosting regression model with 316 lags
#gradient_boosting_model_316 = RegressionModel(lags=316, model=sklearn.ensemble.GradientBoostingRegressor(n_estimators=100, max_depth=400, learning_rate=0.5))
#gradient_boosting_model_316.fit(train)
#gradient_boosting_forecast_316 = gradient_boosting_model_316.predict(len(test))

# gradient boosting regression model with 317 lags
#gradient_boosting_model_317 = RegressionModel(lags=317, model=sklearn.ensemble.GradientBoostingRegressor(n_estimators=100, max_depth=400, learning_rate=0.5))
#gradient_boosting_model_317.fit(train)
#gradient_boosting_forecast_317 = gradient_boosting_model_317.predict(len(test))

# gradient boosting regression model with 335 lags
#gradient_boosting_model_335 = RegressionModel(lags=335, model=sklearn.ensemble.GradientBoostingRegressor(n_estimators=1000, max_depth=800, learning_rate=0.9))
#gradient_boosting_model_335.fit(train)
#gradient_boosting_forecast_335 = gradient_boosting_model_335.predict(len(test))

# plot the gradient boosting forecast with 1, 316, 317 and 335 lags and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
#gradient_boosting_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, gradient_boosting_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, gradient_boosting_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, gradient_boosting_forecast_1), 2)) + '%')
#gradient_boosting_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, gradient_boosting_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, gradient_boosting_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, gradient_boosting_forecast_316), 2)) + '%')
#gradient_boosting_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, gradient_boosting_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, gradient_boosting_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, gradient_boosting_forecast_317), 2)) + '%')
#gradient_boosting_forecast_335['STOXX Europe 600'].plot(label='335 lags, MAPE: ' + str(round(mape(test, gradient_boosting_forecast_335), 2)) + '%, R2 score: ' + str(round(r2_score(test, gradient_boosting_forecast_335), 2)) + '%, MSE: ' + str(round(mse(test, gradient_boosting_forecast_335), 2)) + '%')

# rewrite to see only one line
#cum_returns['STOXX Europe 600'].plot(label='actual')


# first gradient boosting model with 316 lags
#hf316_gradient_boosting = gradient_boosting_model_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

# first gradient boosting model with 317 lags
#hf317_gradient_boosting = gradient_boosting_model_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

#hf316_gradient_boosting['STOXX Europe 600'].plot(label='Gradient Boosting with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf316_gradient_boosting, cum_returns), r2_score(hf316_gradient_boosting, cum_returns), mse(hf316_gradient_boosting, cum_returns)))
#hf317_gradient_boosting['STOXX Europe 600'].plot(label='Gradient Boosting with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf317_gradient_boosting, cum_returns), r2_score(hf317_gradient_boosting, cum_returns), mse(hf317_gradient_boosting, cum_returns)))

# The maximum portfolio return that possibly can be reached is 8.93%. Therefore, we only need to identify the one with the best performance measures.

# n_estimators=100, max_depth=300, learning_rate=0.5 - 19.79, 0.87, 0.19
# n_estimators=100, max_depth=31, learning_rate=0.3 - 18.95, 0.88, 0.18

# 8.93% - gradient_boosting_model_1 = RegressionModel(lags=1, model=sklearn.ensemble.GradientBoostingRegressor(n_estimators=100, max_depth=300, learning_rate=0.5)) # same with l=0.9

# 4.78% - gradient_boosting_model_1 = RegressionModel(lags=1, model=sklearn.ensemble.GradientBoostingRegressor(n_estimators=147, max_depth=300, learning_rate=0.5))

### AdaBoost

In [None]:
"""hyperparameter_ada_boosting = {
    "lags": [1],
    'model': [
        sklearn.ensemble.AdaBoostRegressor(n_estimators=n, learning_rate=0.5, loss=x, random_state=0) 
        for n in np.arange(1, 500, 25)
        for l in np.arange(0.1, 1, 0.2)
        for x in ['linear', 'square', 'exponential']
    ]
}"""

"""ada_boosting_tune = RegressionModel(
    lags= hyperparameter_ada_boosting['lags'][0],
    model=sklearn.ensemble.AdaBoostRegressor(n_estimators=0, random_state=0)
)"""

"""tuning_ada_boosting = ada_boosting_tune.gridsearch(
    parameters= hyperparameter_ada_boosting,
    series= train,
    val_series= test,
    start= 0.1,
    metric= mape,
    n_jobs= -1,
    verbose= True
)"""

"""# print the best model parameters
best_model, best_params = tuning_ada_boosting[0], tuning_ada_boosting[1]
best_model.model_params"""

# plot the forecast of the ada boosting model with 1 lag
#ada_boosting_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, ada_boosting_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, ada_boosting_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, ada_boosting_forecast_1), 2)) + '%')
# plot cum returns
#cum_returns['STOXX Europe 600'].plot(label='actual')

# n_estimators=50, learning_rate=0.5, loss='exponential' 
# 29.92; 0.90; 0.16

# n_estimators=176, learning_rate=0.5, loss='exponential'
# 21,7; 0.9; 0.16

# n_estimators=50, learning_rate=0.3, random_state=0, loss='exponential'
# 23.28; 0.89; 0.16

### Extremely Randomized Trees

In [None]:
"""hyperparameter_extra_trees = {
    "lags": [1],
    'model': [
        ExtraTreesRegressor(n_estimators=n, max_depth = 5, min_samples_split=15, random_state=0) 
        for n in np.arange(50, 200, 1)
        #for m in np.arange(1, 500, 5)
        #for s in np.arange(2, 20, 2)
        ]
}"""

"""extra_trees_tune = RegressionModel(
    lags= hyperparameter_ada_boosting['lags'][0],
    model=ExtraTreesRegressor(n_estimators=0,  random_state=0)
)"""

"""tuning_extra_trees = extra_trees_tune.gridsearch(
    parameters= hyperparameter_extra_trees,
    series= train,
    val_series= test,
    start= 0.1,
    metric= mape,
    n_jobs= -1,
    verbose= True
)"""

"""# print the best model parameters
best_model, best_params = tuning_extra_trees[0], tuning_extra_trees[1]
best_model.model_params"""

# 13.42% - n_estimators=62, max_depth= 5, min_samples_split= 15
# 16.70; 0.73; 0.36

# 7.35% - n_estimators=50, max_depth= 5
# 16.08; 0.77; 0.32

# 3.06 - n_estimators=26, max_depth= 26
# 12.21; 0.88; 0.16

# 0.41% - n_estimators=100, max_depth= 26, min_samples_split= 6
# 12.30; 0.87; 0.18

###  $\varepsilon$ Support Vector Regressor

In [None]:
# 20.64% - kernel='rbf', C=2, gamma=0.094, epsilon=0.069
# 13.49; 0.83; 0.50

# 20.16% - kernel='rbf', C=2, gamma=0.09, epsilon=0.07
# 13.49; 0.83; 0.49

# 19.58% - kernel='rbf', C=2, gamma=0.09, epsilon= 0.06
# 13.23; 0.84; 0.49

# 17.76% - kernel='rbf', C=2, gamma=0.09, epsilon= 0.05
# 15.15; 0.84; 0.49

# 17.29% - kernel='rbf', C=2, gamma=0.1, epsilon=0.04
# 14.78; 0.84; 0.49

# 16.79% - kernel='rbf', C=2, gamma=0.09, epsilon=.1
# 13.53; 0.83; 0.49

# 7.65% - kernel='rbf', C=1, gamma=0.1, epsilon=.1
# 15.54; 0.71; 0.93

# 5.04% - kernel='rbf', C=100, gamma=0.1, epsilon=.1
# 14.54; 0.85; 0.24%

### $\nu$ Support Vector Regressor


In [None]:
# 18.02% - kernel='rbf', C=13, gamma=0.04, nu=0.55
# 14.10; 0.91; 0.15

# 12.33% - kernel='rbf', C=20, gamma=0.02, nu=0.55
# 12.51; 0.92; 0.13

### Multilayer perceptron with 3 hidden layers

In [None]:
"""hyperparameter_mlp = {
    "lags": [1],
    'model': [
        sklearn.neural_network.MLPRegressor(hidden_layer_sizes=(n,20,20), activation='relu', solver='adam', random_state=0)
        for n in np.arange(1, 101, 1)
        #for m in np.arange(1, 51, 3)
        #for p in np.arange(1, 51, 3)
    ]
}"""

"""mlp_tune = RegressionModel(
    lags= hyperparameter_mlp['lags'][0],
    model=sklearn.neural_network.MLPRegressor(hidden_layer_sizes=(0,0,0), activation='relu', solver='adam', random_state=0)
)"""

"""tuning_mlp = mlp_tune.gridsearch(
    parameters= hyperparameter_mlp,
    series= train,
    val_series= test,
    start= 0.1,
    metric= mse,
    n_jobs= -1,
    verbose= True
)"""

"""# best model parameters
best_model, best_params = tuning_mlp[0], tuning_mlp[1]
best_model.model_params"""

# 20.21% - hidden_layer_sizes=(40, 20, 20), max_iter=200, activation='relu', random_state= 0
# 16.43; 0.91; 0.12

# 17.51% - (40, 30, 30)
# 15.93% - (40, 20, 20)
# 13.06% - (6, 26, 31)
# 9.41% - (14, 2, 2)
# 7.09 - (12, 5, 5)
# 6.44% - (10, 6, 4)
# 5.72% - (10, 5, 2)

