In [66]:
# Python
import pandas as pd
import numpy as np
#
from prophet import Prophet
import matplotlib.pyplot as plt
import plotly.graph_objects as go
#
# Python
from prophet.serialize import model_to_json, model_from_json
#
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices

#
import logging
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)

In [67]:
df = pd.read_csv('../../../data/df_monthly_returns_complete_percentage.csv', index_col='Date')

### Train & Plot Methods

In [68]:
def train(dataframe, months=12):
    df_train_long = dataframe.reset_index().melt(id_vars=['Date'], var_name='ticker', value_name='y')
    df_train_long.rename(columns={'Date': 'ds'}, inplace=True)
    
    # model
    models = {}
    forecasts = {}
    
    for ticker, data in df_train_long.groupby('ticker'):
        model = Prophet()
        model.fit(data[['ds', 'y']])  # Train model
    
        future = model.make_future_dataframe(periods=months, freq='ME')  # Forecast next 12 months
        forecast = model.predict(future)
    
        models[ticker] = model
        forecasts[ticker] = forecast
        
    return forecasts

def plot(dataframe, forecasts, months=12):
    # Allocate the last 5 years of data for testing
    min_date = pd.to_datetime(dataframe.index[-1]).replace(day=1) - pd.DateOffset(months=months)
    min_datestr = min_date.strftime('%Y-%m-%d')
    
    # max_date = min_date + pd.DateOffset(months=1)
    # max_datestr = max_date.strftime('%Y-%m-%d')

    df_train = dataframe.loc[dataframe.index < min_datestr]
    # df_test = dataframe.loc[dataframe.index >= min_datestr]

    # Collect 'ds' (date) and 'yhat' from each forecast
    forecast_dfs = [item[['ds', 'yhat']].rename(columns={'yhat': stock}) for stock, item in forecasts.items()]

    # Merge all forecasts on 'ds' (date)
    merged_forecast = forecast_dfs[0]
    for df in forecast_dfs[1:]:
        merged_forecast = merged_forecast.merge(df, on='ds', how='outer')

    # Compute the mean 'yhat' per time point
    y_pred = merged_forecast.iloc[:, 1:].mean(axis=1)
    y_true = dataframe.mean(axis=1)

    #
    train_true_list = y_pred[:len(df_train)]
    test_true_list = y_pred[len(df_train):]

    # Create the plot
    fig = go.Figure()

    # Add the timeseries line
    fig.add_trace(go.Scatter(y=y_true, x=dataframe.index.tolist(), mode='lines', name='Actual returns',
                             line=dict(color='#5c839f', width=2)))  #, line=dict(color='red'))
    # Add the training plot in red
    fig.add_trace(go.Scatter(y=train_true_list, x=dataframe.index.tolist()[:len(train_true_list)],
                             mode='lines', name='Train returns',
                             line=dict(color='red', width=2)))  #, line=dict(color='red')

    # Add the testing plot in green
    fig.add_trace(go.Scatter(y=test_true_list, x=dataframe.index.tolist()[len(train_true_list):],
                             mode='lines', name='Test returns',
                             line=dict(color='green', width=2)))  # , line=dict(color='green')

    fig.add_vline(x=min_datestr, line_color='red', line_dash='dash', line_width=1)

    # Update layout with labels
    fig.update_layout(
        title='1 Year Prediction vs Actual Plot',
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Day closing return (%)',
            tickformat='.0%',
            range=[0.75, 1.6]
        ),
        legend=dict(title="Legend"),
        template="plotly_white"
    )
    
    fig.show()

### 1 Month Actual vs Prediction

In [69]:
forecasts_1m = train(dataframe=df, months=1)

In [70]:
plot(dataframe=df, forecasts=forecasts_1m, months=1)


### 6 Months Actual vs Prediction

In [71]:
forecasts_6m = train(dataframe=df, months=6)

In [72]:
plot(dataframe=df, forecasts=forecasts_6m, months=6)


### 12 months Actual vs Prediction

In [73]:
forecasts_12m = train(dataframe=df, months=12)

In [74]:
plot(dataframe=df, forecasts=forecasts_12m, months=12)

## Sharpe Ratio

In [75]:
def build_efficient_frontier(forecasts):
    # Create DataFrame of forecasted prices
    # Collect 'ds' (date) and 'yhat' from each forecast
    forecast_dfs = [item[['ds', 'yhat']].rename(columns={'yhat': stock}) for stock, item in forecasts.items()]
    
    # Merge all forecasts on 'ds' (date)
    merged_forecast = forecast_dfs[0]
    for df in forecast_dfs[1:]:
        merged_forecast = merged_forecast.merge(df, on='ds', how='outer')
    merged_forecast
    
    merged_forecast = merged_forecast.set_index('ds')

    # Calculate expected returns and sample covariance
    mu_0 = expected_returns.mean_historical_return(merged_forecast)
    
    # Get only tickers with a mean historical return of at least 5% 
    optimal_tickers = mu_0[mu_0 > 0.05].index
    df_optimal = merged_forecast[optimal_tickers]
    
    mu = expected_returns.mean_historical_return(df_optimal)
    S = risk_models.CovarianceShrinkage(df_optimal).ledoit_wolf() # risk_models.sample_cov, # Ledoit-Wolf shrinkage (df_optimal, frequency=12), # Exponential Covariance
    
    
    # Optimize for maximal Sharpe ratio
    ef = EfficientFrontier(mu, S)
    ef_new = EfficientFrontier(mu, S)
    
    raw_weights = ef.max_sharpe()
    cleaned_weights = ef.clean_weights()
    ef.save_weights_to_file("weights.csv")  # saves to file
    #
    ef.portfolio_performance(verbose=True)

### 1 Month

In [None]:
build_efficient_frontier(forecasts_1m)

### 6 Months

In [None]:
build_efficient_frontier(forecasts_6m)

### 12 Months

In [None]:
build_efficient_frontier(forecasts_12m)

In [None]:
# Plot Efficient Frontier
import matplotlib.pyplot as plt
from pypfopt.plotting import plot_efficient_frontier

fig, ax = plt.subplots(figsize=(8, 6))
plot_efficient_frontier(ef, ax=ax, show_assets=True)
plt.show()