This Jupyter-Notebook works as a place to dump all lines of code that has been used over the process of this thesis but are not used in the main code anymore. This includes in particular the definition of function to create pie charts or the hyperparameter tuning.

##### Functions

In [None]:
def plot_efficient_frontier(mu, S, cum_returns_portfolio, title):
    ef = EfficientFrontier(mu, S)
    ef.add_constraint(lambda w: w[0] == 0.00)
    fig, ax = plt.subplots()
    ef_max_sharpe = ef.deepcopy()
    ef_min_vol = copy.deepcopy(ef)
    plotting.plot_efficient_frontier(ef, ax=ax, show_assets=False)

    # Find the tangency portfolio with max Sharpe ratio
    ef_max_sharpe.max_sharpe()
    weights_max = ef_max_sharpe.clean_weights()
    ret_max = (cum_returns_portfolio * weights_max).sum(axis=1)
    std_max = ret_max.std()
    ax.scatter(std_max, ret_max.iloc[-1], marker="*", s=100, c="r", label="Max Sharpe")

    # Find the minimum volatility portfolio
    ef_min_vol.min_volatility()
    weights_min = ef_min_vol.clean_weights()
    ret_min = (cum_returns_portfolio * weights_min).sum(axis=1)
    std_min = ret_min.std()
    ax.scatter(std_min, ret_min.iloc[-1], marker="*", s=100, c="g", label="Min Volatility")    
    
    # compute the efficient frontier mathematically while using same optimization objective as the EfficientFrontier class without using pyportfolioopt
    """
    # Compute the efficient frontier
    """

    # save the return and volatility of max sharpe and min vola in two lists to a csv file naming them after mu

    
    
    # Generate random portfolios
    n_samples = 10000
    w = np.random.dirichlet(np.ones(ef.n_assets), n_samples)
    rets = w.dot(ef.expected_returns)
    stds = np.sqrt(np.diag(w @ S @ w.T))
    sharpes = rets / stds
    ax.scatter(stds, rets, marker=".", c=sharpes, cmap="viridis_r")

    weights_max = pd.DataFrame.from_dict(weights_max, orient='index')
    weights_max.columns = ['Max Sharpe']
    weights_max = weights_max.T
    weights_min = pd.DataFrame.from_dict(weights_min, orient='index')
    weights_min.columns = ['Min Volatility']
    weights_min = weights_min.T
    
    
    # Generate a table with the weights of the max Sharpe ratio portfolio
    weights = pd.concat([weights_max, weights_min])
    # drop the rows with 0 weights in both portfolios
    weights = weights.loc[:, (weights != 0).any(axis=0)]
    # add a column that sums the weights per row
    weights['Sum'] = weights.sum(axis=1)

    # Display the clean table
    print("Weights:\n", weights.to_string(index=True, float_format='{:.2%}'.format))

    # Display the return and volatility of the max Sharpe ratio and min volatility portfolios
    performance = pd.DataFrame({"Return": [ret_max.iloc[-1]/100, ret_min.iloc[-1]/100],
                                "Volatility": [std_max/100, std_min/100]},
                               index=["Max Sharpe", "Min Volatility"])

    # Display the performance table
    print("\nPerformance:\n", performance.to_string(float_format='{:.2%}'.format))

    # Print table as LaTeX
    #print("\nWeights (LaTeX):\n", weights.transpose().to_latex(index=True, float_format='{:.2%}'.format))
    #print("\nPerformance (LaTeX):\n", performance.transpose().to_latex(float_format='{:.2%}'.format))

    # Output
    ax.set_title(title)
    ax.legend()
    plt.tight_layout()
    plt.show();

In [None]:
def generate_pie_chart(weights, title):
    cleaned_weights = {k: v for k, v in weights.items() if v > 1e-5}

    plt.figure(figsize=(10, 5))
    # Generate the pie chart without labels inside the slices
    wedges, _, _ = plt.pie(cleaned_weights.values(), labels=[''] * len(cleaned_weights), autopct='%1.2f%%', startangle=90)

    # Create a legend with the cleaned weights keys
    plt.legend(wedges, cleaned_weights.keys(), loc='upper left')

    # Create a legend with the cleaned weights keys
    plt.legend(wedges, cleaned_weights.keys(), loc='upper left')# Equal aspect ratio ensures that pie is drawn as a circle.
    plt.axis('equal')
    plt.title(title)
    plt.show();

##### Linear Regression

In [None]:
"""# the only hyperparameter for the linear regression model is the number of lags
hyperparameter_linear = {
    "lags": list(range(1, 336))
}

# the gridsearch function returns the best model with regard to the metric, which does not necessarily have to be the best model in terms of performance

tuning_linear = LinearRegressionModel.gridsearch(
    parameters= hyperparameter_linear,
    series= train, # The target series used as input and target for training.
    val_series= test,
    start= 0.0, # represents the starting point in the time index of series from which predictions will be made to evaluate the model
    #metric= , # function that takes actual and prediction, and returns a float error value
    n_jobs= -1, # setting the parameter to -1 means using all the available cores
    verbose= True # whether to print progress
)

best_model, best_params = tuning_linear[0], tuning_linear[1]
best_model.model_params

# linear regression model with 316 lags of STOXX Europe 600
linear_model_316 = LinearRegressionModel(lags=316, output_chunk_length=1)
linear_model_316.fit(train)
linear_forecast_316 = linear_model_316.predict(len(test))

# linear regression model with 317 lags of STOXX Europe 600
linear_model_317 = LinearRegressionModel(lags=317, output_chunk_length=1)
linear_model_317.fit(train)
linear_forecast_317 = linear_model_317.predict(len(test))

# linear regression model with 335 lags of STOXX Europe 600
linear_model_335 = LinearRegressionModel(lags=335, output_chunk_length=1)
linear_model_335.fit(train) # ['STOXX Europe 600']
linear_forecast_335 = linear_model_335.predict(len(test))

%store linear_forecast_1
%store linear_forecast_316
%store linear_forecast_317
%store linear_forecast_335

# plot linear forecast with 1, 316 and 335 lags and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
linear_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, linear_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, linear_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, linear_forecast_1), 2)) + '%, MAPE: ' + str(round(mape(test, linear_forecast_1), 2)) + '%')
linear_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, linear_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, linear_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, linear_forecast_316), 2)) + '%, MAPE: ' + str(round(mape(test, linear_forecast_316), 2)) + '%')
linear_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, linear_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, linear_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, linear_forecast_317), 2)) + '%, MAPE: ' + str(round(mape(test, linear_forecast_317), 2)) + '%')
linear_forecast_335['STOXX Europe 600'].plot(label='335 lags, MAPE: ' + str(round(mape(test, linear_forecast_335), 2)) + '%, R2 score: ' + str(round(r2_score(test, linear_forecast_335), 2)) + '%, MSE: ' + str(round(mse(test, linear_forecast_335), 2)) + '%, MAPE: ' + str(round(mape(test, linear_forecast_335), 2)) + '%')
# rewrite to see only one line
cum_returns['STOXX Europe 600'].plot(label='actual')
# show the legend outside the plot 
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

# second linear regression model with 316 lags
hf2_linear_regression = linear_model_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# third linear regression model with 317 lags
hf3_linear_regression = linear_model_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# fourth linear regression model with 335 lags
hf4_linear_regression = linear_model_335.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

hf2_linear_regression['STOXX Europe 600'].plot(label='Linear Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_linear_regression, cum_returns), r2_score(hf2_linear_regression, cum_returns), mse(hf2_linear_regression, cum_returns)))
hf3_linear_regression['STOXX Europe 600'].plot(label='Linear Regression with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_linear_regression, cum_returns), r2_score(hf3_linear_regression, cum_returns), mse(hf3_linear_regression, cum_returns)))
hf4_linear_regression['STOXX Europe 600'].plot(label='Linear Regression with 335 lags (MAPE: {:.2f}%, R2: {:.2f}%, MSE: {:.2f})'.format(mape(hf4_linear_regression, cum_returns), r2_score(hf4_linear_regression, cum_returns), mse(hf4_linear_regression, cum_returns)))

linear_forecast_316 = linear_forecast_316.pd_dataframe()

mu_linear_regression_316 = expected_returns.mean_historical_return(linear_forecast_316)
S_linear_regression_316 = risk_models.sample_cov(linear_forecast_316)


ef_linear_regression_316 = EfficientFrontier(mu_linear_regression_316, S_linear_regression_316)
ef_linear_regression_316.add_constraint(lambda w: w[0] == 0)
weights_lr_316 = ef_linear_regression_316.max_sharpe()

# show the weights of the optimal portfolio cleaned
cleaned_weights_lr_1 = ef_linear_regression_1.clean_weights()
# print 
print('The weights of the optimal portfolio with 1 lag are: ' + str(cleaned_weights_lr_1))

lr_returns_316 = (cum_returns_portfolio * weights_lr_316).sum(axis=1)
portfolio_returns['linear_regression_316'] = lr_returns_316

#show the last value of each portfolio in percent and round two 2 decimals
print('Last value of the STOXX Europe 600: ' + str(round(cum_returns_portfolio['STOXX Europe 600'].iloc[-1], 2)) + '%')
print('Last value of the equal weighted portfolio: ' + str(round(portfolio_returns['equal_weighted'].iloc[-1], 2)) + '%')
print('Last value of the historical average portfolio: ' + str(round(portfolio_returns['historical_average'].iloc[-1], 2)) + '%')
print('Last value of the linear regression portfolio with 1 lag: ' + str(round(portfolio_returns['linear_regression_1'].iloc[-1], 2)) + '%')
print('Last value of the linear regression portfolio with 316 lags: ' + str(round(portfolio_returns['linear_regression_316'].iloc[-1], 2)) + '%')"""

##### Ridge Regression

In [None]:
"""hyperparameter_ridge = {
    "lags": list(range(1,336,2)),
    'model': [
        sklearn.linear_model.Ridge(alpha=a) for a in np.arange(0, 1.1, 0.1)
    ]
}

ridge_regression_tune = RegressionModel(
    lags= hyperparameter_ridge['lags'][0],
    model=sklearn.linear_model.Ridge(0)
)

tuning_ridge = ridge_regression_tune.gridsearch(
    parameters= hyperparameter_ridge,
    series= train,
    val_series= test,
    start= 0.1,
    #metric= mse,
    n_jobs= -1,
    verbose= True
)

best_model, best_params = tuning_ridge[0], tuning_ridge[1]
best_model.model_params

# ridge regression model with 316 lags of STOXX Europe 600
ridge_model_316 =RegressionModel(lags=316, model=sklearn.linear_model.Ridge(alpha=1))
ridge_model_316.fit(train)
ridge_forecast_316 = ridge_model_316.predict(len(test))

# ridge regression model with 317 lags of STOXX Europe 600
ridge_model_317 = RegressionModel(lags=317, model=sklearn.linear_model.Ridge(alpha=1))
ridge_model_317.fit(train)
ridge_forecast_317 = ridge_model_317.predict(len(test))

# ridge regression model with 335 lags of STOXX Europe 600
ridge_model_335 = RegressionModel(lags=335, model=sklearn.linear_model.Ridge(alpha=1))
ridge_model_335.fit(train)
ridge_forecast_335 = ridge_model_335.predict(len(test))

# plot the ridge forecast with 1, 316, 317 and 335 lags and label with MAPE rounded to 2 decimals and R2 score rounded to 2 decimals and MSE rounded to 2 decimals
ridge_forecast_1['STOXX Europe 600'].plot(label='1 lag, MAPE: ' + str(round(mape(test, ridge_forecast_1), 2)) + '%, R2 score: ' + str(round(r2_score(test, ridge_forecast_1), 2)) + '%, MSE: ' + str(round(mse(test, ridge_forecast_1), 2)) + '%')
ridge_forecast_316['STOXX Europe 600'].plot(label='316 lags, MAPE: ' + str(round(mape(test, ridge_forecast_316), 2)) + '%, R2 score: ' + str(round(r2_score(test, ridge_forecast_316), 2)) + '%, MSE: ' + str(round(mse(test, ridge_forecast_316), 2)) + '%')
ridge_forecast_317['STOXX Europe 600'].plot(label='317 lags, MAPE: ' + str(round(mape(test, ridge_forecast_317), 2)) + '%, R2 score: ' + str(round(r2_score(test, ridge_forecast_317), 2)) + '%, MSE: ' + str(round(mse(test, ridge_forecast_317), 2)) + '%')
ridge_forecast_335['STOXX Europe 600'].plot(label='335 lags, MAPE: ' + str(round(mape(test, ridge_forecast_335), 2)) + '%, R2 score: ' + str(round(r2_score(test, ridge_forecast_335), 2)) + '%, MSE: ' + str(round(mse(test, ridge_forecast_335), 2)) + '%')

# rewrite to see only one line
cum_returns['STOXX Europe 600'].plot(label='actual')

# second ridge regression model with 316 lags
#hf2_ridge_regression = ridge_model_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# third ridge regression model with 317 lags
#hf3_ridge_regression = ridge_model_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# fourth ridge regression model with 335 lags
#hf4_ridge_regression = ridge_model_335.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

#hf2_ridge_regression['STOXX Europe 600'].plot(label='Ridge Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_ridge_regression, cum_returns), r2_score(hf2_ridge_regression, cum_returns), mse(hf2_ridge_regression, cum_returns)))
#hf3_ridge_regression['STOXX Europe 600'].plot(label='Ridge Regression with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_ridge_regression, cum_returns), r2_score(hf3_ridge_regression, cum_returns), mse(hf3_ridge_regression, cum_returns)))
#hf4_ridge_regression['STOXX Europe 600'].plot(label='Ridge Regression with 335 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf4_ridge_regression, cum_returns), r2_score(hf4_ridge_regression, cum_returns), mse(hf4_ridge_regression, cum_returns)))

ridge_forecast_316 = ridge_forecast_316.pd_dataframe()
mu_ridge_regression_316 = expected_returns.mean_historical_return(ridge_forecast_316)
S_ridge_regression_316 = risk_models.sample_cov(ridge_forecast_316)

ef_ridge_regression_316 = EfficientFrontier(mu_ridge_regression_316, S_ridge_regression_316)
ef_ridge_regression_316.add_constraint(lambda w: w[0] == 0)
weights_rr_316 = ef_ridge_regression_316.max_sharpe()

print('Weights for ridge regression with 316 lags: ' + str(np.round(pd.Series(weights_rr_316), 3)))

rr_returns_316 = (cum_returns_portfolio * weights_rr_316).sum(axis=1)
portfolio_returns['ridge_regression_316'] = rr_returns_316

# plot the cumulative returns of the benchmark and the ridge regression portfolio
plt.figure(figsize=(15,10))
plt.plot(cum_returns_portfolio['STOXX Europe 600'], label='STOXX Europe 600')
plt.plot(portfolio_returns['equal_weighted'], label='Equal Weighted Portfolio')
plt.plot(portfolio_returns['historical_average'], label='Historical Average Portfolio')
plt.plot(portfolio_returns['ridge_regression_1'], label='Ridge Regression Portfolio with 1 lag')
plt.plot(portfolio_returns['ridge_regression_316'], label='Ridge Regression Portfolio with 316 lags')
plt.legend()
plt.show();"""

#### LASSO Regression

In [None]:
"""hyperparameter_lasso = {
    "lags": list(range(1,2)),
    'model': [
        sklearn.linear_model.Lasso(alpha=a) for a in np.arange(0, 1.1, 0.01)
    ]
}

lasso_regression_tune = RegressionModel(
    lags= hyperparameter_lasso['lags'][0],
    model=sklearn.linear_model.Lasso(0)
)

tuning_lasso = lasso_regression_tune.gridsearch(
    parameters= hyperparameter_lasso,
    series= train,
    val_series= test,
    start= 0.1,
    #metric= mape,
    n_jobs= -1,
    verbose= True
)

best_model, best_params = tuning_lasso[0], tuning_lasso[1]
best_model.model_params


# lasso regression model with 316 lags of STOXX Europe 600
lasso_model_316 = RegressionModel(lags=316, model=sklearn.linear_model.Lasso(alpha=0))
lasso_model_316.fit(train)
lasso_forecast_316 = lasso_model_316.predict(len(test))
# lasso regression model with 317 lags of STOXX Europe 600
lasso_model_317 = RegressionModel(lags=317, model=sklearn.linear_model.Lasso(alpha=0))
lasso_model_317.fit(train)
lasso_forecast_317 = lasso_model_317.predict(len(test))
# lasso regression model with 335 lags of STOXX Europe 600
lasso_model_335 = RegressionModel(lags=335, model=sklearn.linear_model.Lasso(alpha=0))
lasso_model_335.fit(train)
lasso_forecast_335 = lasso_model_335.predict(len(test))

# second lasso regression model with 316 lags
hf2_lasso_regression = lasso_model_316.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# third lasso regression model with 317 lags
hf3_lasso_regression = lasso_model_317.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)
# fourth lasso regression model with 335 lags
hf4_lasso_regression = lasso_model_335.historical_forecasts(series= cum_returns, start= 0.6, forecast_horizon= 1, verbose= True)

#hf2_lasso_regression['STOXX Europe 600'].plot(label='Lasso Regression with 316 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf2_lasso_regression, cum_returns), r2_score(hf2_lasso_regression, cum_returns), mse(hf2_lasso_regression, cum_returns)))
#hf3_lasso_regression['STOXX Europe 600'].plot(label='Lasso Regression with 317 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf3_lasso_regression, cum_returns), r2_score(hf3_lasso_regression, cum_returns), mse(hf3_lasso_regression, cum_returns)))
#hf4_lasso_regression['STOXX Europe 600'].plot(label='Lasso Regression with 335 lags (MAPE: {:.2f}%, R2: {:.2f}%), MSE: {:.2f}'.format(mape(hf4_lasso_regression, cum_returns), r2_score(hf4_lasso_regression, cum_returns), mse(hf4_lasso_regression, cum_returns)))

lasso_forecast_316 = lasso_forecast_316.pd_dataframe()

mu_lasso_regression_316 = expected_returns.mean_historical_return(lasso_forecast_316)
S_lasso_regression_316 = risk_models.sample_cov(lasso_forecast_316)

ef_lasso_regression_316 = EfficientFrontier(mu_lasso_regression_316, S_lasso_regression_316)
ef_lasso_regression_316.add_constraint(lambda w: w[0] == 0)
weights_lar_316 = ef_lasso_regression_316.max_sharpe()

print('Weights for lasso regression with 316 lags: ' + str(np.round(pd.Series(weights_lar_316), 3)))

# plot the cumulative returns of the benchmark and the lasso regression portfolio
plt.figure(figsize=(15,10))
plt.plot(cum_returns_portfolio['STOXX Europe 600'], label='STOXX Europe 600')
plt.plot(portfolio_returns['equal_weighted'], label='Equal Weighted Portfolio')
plt.plot(portfolio_returns['historical_average'], label='Historical Average Portfolio')
plt.plot(portfolio_returns['lasso_regression_1'], label='Lasso Regression with 1 lag')
plt.plot(portfolio_returns['lasso_regression_316'], label='Lasso Regression with 316 lags')
plt.legend(loc='upper left', fontsize=12)
plt.show();

lar_returns_316 = (cum_returns_portfolio * weights_lar_316).sum(axis=1)
portfolio_returns['lasso_regression_316'] = lar_returns_316"""

#### Elastic Net