In [220]:
# Python
import pandas as pd
import numpy as np
#
from prophet import Prophet
import plotly.graph_objects as go
#
# Python
#
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
#
import importlib
import utilities.variables as variables
import utilities.utility as utility
import utilities.plots as plots
importlib.reload(variables)
importlib.reload(utility)
importlib.reload(plots)
#
import logging
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

In [49]:
df = pd.read_csv('../../../data/df_monthly_returns_complete_percentage.csv', index_col='Date')
df_pct = pd.read_csv('../../../data/df_monthly_returns_complete_percentage.csv', index_col='Date')

### Train & Plot Methods

In [50]:
# actual values
df_train = df.head(int(variables.ALL_YEARS_NR - 1) * 12)
df_test = df.tail(1 * 12)
# Percentage based values
df_pct_train = df_pct.head(int(variables.ALL_YEARS_NR - 1) * 12)
df_pct_test = df_pct.tail(1 * 12)

In [234]:
import cvxpy as cp

def train(dataframe, months=12):
    df_train_long = dataframe.reset_index().melt(id_vars=['Date'], var_name='ticker', value_name='y')
    df_train_long.rename(columns={'Date': 'ds'}, inplace=True)
    
    # model
    models = {}
    forecasts = {}
    
    for ticker, data in df_train_long.groupby('ticker'):
        model = Prophet()
        model.fit(data[['ds', 'y']])  # Train model
    
        future = model.make_future_dataframe(periods=months, freq='ME')  # Forecast next 12 months
        forecast = model.predict(future)
    
        models[ticker] = model
        forecasts[ticker] = forecast
        
    return forecasts

def forecast_to_df(dataframe, forecasts, months=12):
    # Allocate the last 5 years of data for testing
    min_date = pd.to_datetime(dataframe.index[-1]).replace(day=1) - pd.DateOffset(months=12)
    min_datestr = min_date.strftime('%Y-%m-%d')

    # Collect 'ds' (date) and 'yhat' from each forecast
    forecast_dfs = [item[['ds', 'yhat']].rename(columns={'yhat': stock}) for stock, item in forecasts.items()]

    # Merge all forecasts on 'ds' (date)
    merged_forecast = forecast_dfs[0]
    for df in forecast_dfs[1:]:
        merged_forecast = merged_forecast.merge(df, on='ds', how='outer')

    # Compute the mean 'yhat' per time point
    y_pred = merged_forecast.tail(months)
    y_true = dataframe.loc[dataframe.index >= min_datestr].head(months)

    return y_pred, y_true

def plot(dataframe, forecasts, months=12):
    # Allocate the last 5 years of data for testing
    min_date = pd.to_datetime(dataframe.index[-1]).replace(day=1) - pd.DateOffset(months=12)
    min_datestr = min_date.strftime('%Y-%m-%d')
    
    X_train = dataframe.loc[dataframe.index < min_datestr]
    # df_test = dataframe.loc[dataframe.index >= min_datestr]

    # Collect 'ds' (date) and 'yhat' from each forecast
    forecast_dfs = [item[['ds', 'yhat']].rename(columns={'yhat': stock}) for stock, item in forecasts.items()]

    # Merge all forecasts on 'ds' (date)
    merged_forecast = forecast_dfs[0]
    for df in forecast_dfs[1:]:
        merged_forecast = merged_forecast.merge(df, on='ds', how='outer')

    # Compute the mean 'yhat' per time point
    y_pred = merged_forecast.iloc[:, 1:].mean(axis=1)
    y_true = dataframe.mean(axis=1)

    #
    train_true_list = y_pred[:len(X_train)]
    test_true_list = y_pred[len(X_train):]
    
    # Create the plot
    fig = go.Figure()

    # Add the timeseries line
    fig.add_trace(go.Scatter(y=y_true, x=dataframe.index.tolist(), mode='lines', name='Actual returns',
                             line=dict(color='#5c839f', width=2)))  #, line=dict(color='red'))
    # Add the training plot in red
    fig.add_trace(go.Scatter(y=train_true_list, x=dataframe.index.tolist()[:len(train_true_list)],
                             mode='lines', name='Train returns',
                             line=dict(color='red', width=2)))  #, line=dict(color='red')

    # Add the testing plot in green
    fig.add_trace(go.Scatter(y=test_true_list, x=dataframe.index.tolist()[len(train_true_list):],
                             mode='lines', name='Test returns',
                             line=dict(color='green', width=2)))  # , line=dict(color='green')

    fig.add_vline(x=min_datestr, line_color='red', line_dash='dash', line_width=1)

    # Update layout with labels
    fig.update_layout(
        title='1 Year Prediction vs Actual Plot',
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Day closing return (%)',
            tickformat='.0%',
            range=[0.75, 1.6]
        ),
        legend=dict(title="Legend"),
        template="plotly_white"
    )
    
    fig.show()

def get_portfolio_performance(forecasts, file_name = "weights.csv", min_avg_return=variables.MIN_AVG_RETURN, months=12):
    # Create DataFrame of forecasted prices
    # Collect 'ds' (date) and 'yhat' from each forecast
    forecast_dfs = [item[['ds', 'yhat']].rename(columns={'yhat': stock}) for stock, item in forecasts.items()]

    # Merge all forecasts on 'ds' (date)
    merged_forecast = forecast_dfs[0]
    for df in forecast_dfs[1:]:
        merged_forecast = merged_forecast.merge(df, on='ds', how='outer')

    merged_forecast = merged_forecast.set_index('ds')

    # Calculate expected returns and sample covariance
    mu_0 = expected_returns.mean_historical_return(merged_forecast)

    # Get only tickers with a mean historical return of at least 5% 
    optimal_tickers = mu_0[mu_0 > min_avg_return].index
    df_optimal = merged_forecast[optimal_tickers].tail(months)

    mu = expected_returns.mean_historical_return(df_optimal)
    S = risk_models.CovarianceShrinkage(df_optimal).ledoit_wolf()
    
    # Optimize for maximal Sharpe ratio
    ef = EfficientFrontier(mu, S, solver=cp.CLARABEL)
    # ef_new = EfficientFrontier(mu, S, solver=cp.CLARABEL)

    raw_weights = ef.max_sharpe()
    cleaned_weights = ef.clean_weights()
    # volatility = ef.min_volatility()
    ef.save_weights_to_file(file_name)  # saves to file
    #
    p_mu, p_sigma, p_sharpe = ef.portfolio_performance(verbose=True)

    return df_optimal, cleaned_weights, mu, S, p_sigma, p_sharpe

def create_discrete_allocation(df, raw_weights, total_portfolio_value = 10000):
    latest_prices = get_latest_prices(df)

    da = DiscreteAllocation(raw_weights, latest_prices, total_portfolio_value=total_portfolio_value)
    allocation, leftover = da.greedy_portfolio()
    print("Discrete allocation:", allocation)
    print("Funds remaining: €{:.2f}".format(leftover))

### 1 Month Actual vs Prediction

In [83]:
forecasts_1m = train(dataframe=df_train, months=1)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x2c56857d0>


In [188]:
plot(dataframe=df, forecasts=forecasts_1m, months=1)
# print('dummy')

2023-09-01 2024-09-01 00:00:00 <-1 * DateOffset: months=1>
287 1


#### Overview table - 1 Month

In [149]:
df_pred_1m, raw_weights_1m, mu_1m, S_1m, sigma_1m, sharpe_1m = get_portfolio_performance(forecasts_1m, "prophet_weights_1m.csv", min_avg_return=-0.5, months=1)
create_discrete_allocation(df_pred_1m, raw_weights_1m)

# Create overvi
df_view_1m = pd.DataFrame.from_dict(raw_weights_1m, orient='index', columns=['max_sharpe_weight'])
# Extract volatilities (square root of diagonal elements)
df_view_1m['avg_annual_volatility'] = pd.Series(np.sqrt(np.diag(S_1m)), index=S_1m.columns).values
# Set annual returns
df_view_1m['avg_annual_return'] = mu_1m.values
#
df_view_1m['return_last_period(1m)'] = round(df_pct_test.head(1).prod() - 1, 2)
df_view_1m

Expected annual return: 33.2%
Annual volatility: 49.5%
Sharpe Ratio: 0.63
Discrete allocation: {'7564.T': 230, 'IVT': 260, 'BIRD': 195, 'RWS.L': 114, 'RENT': 137, 'TWKS': 125, 'HOV': 123, 'GME': 82, 'STAA': 90, 'PMTS': 123, 'SMCI': 83, 'ARLO': 80, '6951.T': 50, 'SSP': 39, 'ITI': 65, 'TPX': 55, 'RGL.L': 69, '8111.T': 70, 'AIV': 57, 'FGP.L': 61, 'MVIS': 70, '9107.T': 54, 'NFG.L': 53, '7936.T': 60, 'LABS.L': 57, 'TEG.DE': 54, 'BSL.DE': 49, 'RCH.L': 46, '6702.T': 49, 'FN': 53, 'WOOF': 45, 'BRKR': 39, 'FOUR.L': 54, '7508.T': 52, 'BBW': 48, 'DJCO': 47, 'NDX1.DE': 40, 'EVC': 38, 'PINE.L': 49, '8803.T': 42, 'CVNA': 50, 'OHI': 44, '6287.T': 45, 'MOD': 38, 'CRCT': 48, 'DOCS.L': 42, '6701.T': 41, 'ONL': 40, 'NXST': 36, 'LAUR': 42, 'SVC': 33, '2767.T': 38, 'HLX': 28, 'DXLG': 35, '2715.T': 38, 'APLD': 38, '8141.T': 41, 'CLMB': 34, 'RTO.L': 38, 'SNX': 36, '2395.T': 41, 'HUBS': 36, 'COUR': 40, 'ONT.L': 35, 'GT': 29, 'TBCG.L': 36, 'ARCB': 32, 'MGRC': 31, 'PATH': 32, 'RHP': 31, '5101.T': 35, 'NBPE.L': 

Unnamed: 0,max_sharpe_weight,avg_annual_volatility,avg_annual_return,return_last_year
1435.T,0.00000,2.482093,0.069226,-0.09
1911.T,0.00152,2.468919,0.036460,-0.07
1925.T,0.00105,2.487015,0.164947,0.04
1928.T,0.00198,2.476667,0.040160,-0.01
1973.T,0.00000,2.467761,-0.032044,0.05
...,...,...,...,...
ZM,0.00000,2.528258,0.006081,-0.14
ZS,0.00000,2.499623,0.004087,0.02
ZTS,0.00000,2.450344,0.036335,-0.10
ZUMZ,0.00000,2.528348,0.225132,-0.08


### 6 Months Actual vs Prediction

In [179]:
forecasts_6m = train(dataframe=df_train, months=6)

In [187]:
plot(dataframe=df, forecasts=forecasts_6m, months=6)


2023-09-01 2024-09-01 00:00:00 <-1 * DateOffset: months=6>
287 6


In [164]:
df_pred_6m, raw_weights_6m, mu_6m, S_6m, sigma_6m, sharpe_6m = get_portfolio_performance(forecasts_6m, "prophet_weights_6m.csv", min_avg_return=-0.5, months=6)
create_discrete_allocation(df_pred_6m, raw_weights_6m)

Expected annual return: 43340.1%
Annual volatility: 65.7%
Sharpe Ratio: 659.43
finished
Discrete allocation: {'GME': 975, 'ACCO': 658, 'IVAC': 558, 'CENT': 477, 'MCG.L': 233, 'SBS.DE': 196, '2792.T': 150, 'AMC': 120, '6460.T': 127, 'FUBO': 114, 'CRTO': 103, '8173.T': 105, 'PEN': 89, 'CLVT': 92, '3668.T': 97, '8057.T': 78, 'TTSH': 77, 'BIG': 63, 'BOOT': 70, '7844.T': 84, 'GROW.L': 68, '9470.T': 70, 'IQ': 67, 'YETI': 64, '8008.T': 60, '8150.T': 58, '7599.T': 51, '2767.T': 51, 'SAIA': 56, 'BC8.DE': 56, 'ENPH': 46, '7846.T': 46, 'DAVA': 49, 'YUMC': 46, '9413.T': 46, 'SUP': 45, 'CPRI': 47, '7984.T': 44, 'SOHO.L': 45, 'ADNT': 45, '2471.T': 52, 'AOF.DE': 64, 'IIPR': 43, '7947.T': 42, 'PETQ': 37, 'SFM': 36, 'RH': 38, 'RESI.L': 36, '2124.T': 71, 'ETSY': 36, 'SCT.L': 33, 'SREI.L': 31, 'OMCL': 35, 'TM17.L': 36, '8079.T': 34, 'VEEV': 33, '3421.T': 32, 'IOT': 34, '7451.T': 34, '2475.T': 36, 'CWH': 31, '9882.T': 32, '7250.T': 33, '2168.T': 31, 'RM': 33, 'BCYC': 32, 'BCPT.L': 30, 'DBX': 30, '1973.T':

In [224]:

df_view_6m = pd.DataFrame.from_dict(raw_weights_6m, orient='index', columns=['max_sharpe_weight'])
# Extract volatilities (square root of diagonal elements)
df_view_6m['avg_annual_volatility'] = pd.Series(np.sqrt(np.diag(S_6m)), index=S_6m.columns).values
# Set annual returns
df_view_6m['avg_annual_return'] = mu_6m.values
#
df_view_6m['return_last_period(6m)'] = round(df_pct_test.head(6).prod() - 1, 2)
df_view_6m

Unnamed: 0,max_sharpe_weight,avg_annual_volatility,avg_annual_return,return_last_period(6m)
1435.T,0.00000,3.188391,3.414011,-0.19
1911.T,0.00000,3.161535,-0.434779,0.32
1925.T,0.00000,3.241360,0.204087,0.13
1928.T,0.00000,3.182174,1.979223,0.20
1973.T,0.00299,3.142809,-0.811765,0.30
...,...,...,...,...
ZM,0.00142,3.165392,-0.898285,-0.07
ZS,0.00113,3.164307,2.832100,0.24
ZTS,0.00000,3.132432,0.082133,-0.03
ZUMZ,0.00000,3.423219,5.954555,-0.15


### 12 months Actual vs Prediction

In [243]:
# TODO change to 12 instead of 11
forecasts_12m = train(dataframe=df_train, months=11)

In [244]:
plot(dataframe=df, forecasts=forecasts_12m, months=11)

In [245]:
# @TODO Check issue with 12 months, maybe it gets out of range, 11 Months works fine
df_pred_12m, raw_weights_12m, mu_12m, S_12m, sigma_12m, sharpe_12m = get_portfolio_performance(forecasts_12m, "prophet_weights_12m.csv", min_avg_return=0.07, months=11)
create_discrete_allocation(df_pred_12m, raw_weights_12m)

df_view_12m = pd.DataFrame.from_dict(raw_weights_12m, orient='index', columns=['max_sharpe_weight'])
# Extract volatilities (square root of diagonal elements)
df_view_12m['avg_annual_volatility'] = pd.Series(np.sqrt(np.diag(S_12m)), index=S_12m.columns).values
# Set annual returns
df_view_12m['avg_annual_return'] = mu_12m.values
#
df_view_12m['return_last_period(1y)'] = round(df_pct_test.head(12).prod() - 1, 2)
df_view_12m

Expected annual return: 86251.6%
Annual volatility: 300.1%
Sharpe Ratio: 287.40
Discrete allocation: {'2792.T': 6245, 'CPRX': 1545, 'MCG.L': 755, 'ILM1.DE': 422, 'XPP.L': 437, 'LAUR': 67, 'IVT': 1, 'PMTS': 1}
Funds remaining: €0.58


Unnamed: 0,max_sharpe_weight,avg_annual_volatility,avg_annual_return,return_last_period(1y)
2154.T,0.0,4.868636,4.463809,0.15
2395.T,0.0,3.858559,1.362946,-0.38
2760.T,0.0,4.215461,-0.851072,0.01
2792.T,0.66444,4.597486,1223.012239,0.06
4751.T,0.0,4.232147,-0.985117,0.33
7564.T,0.0,4.397553,-1.0,-0.03
9766.T,0.0,4.013118,3.394916,0.77
ANET,0.0,3.875831,0.055964,0.76
BIRD,0.0,5.28925,-0.999996,-0.5
CPRX,0.17909,3.866867,258.86685,0.62


In [246]:
S_12m.max()

2154.T      23.703615
2395.T      14.888475
2760.T      17.770114
2792.T      21.136876
4751.T      17.911066
7564.T      19.338473
9766.T      16.105114
ANET        15.022062
BIRD        27.976163
CPRX        14.952657
CVNA        14.917670
DLR         16.693498
DXLG        28.167298
FLGT        16.437160
FN          18.186169
ILM1.DE    111.386866
ITI         17.786958
IVT         15.309659
LAUR        15.260633
LZB         18.341497
MCG.L      112.102949
NEM.DE      17.508268
OMI         17.800479
PINE.L      16.633885
PMTS        14.976817
PSTG        16.590014
RENT        31.668702
RGL.L       22.044481
SHYF        17.677360
STAA        16.824904
STVG.L      18.425946
SUP         54.214820
TLYS        17.129953
TWI         28.882990
XPP.L       14.985651
dtype: float64