In [21]:
# Python
import pandas as pd
import numpy as np
#
from prophet import Prophet
import matplotlib.pyplot as plt
import plotly.graph_objects as go
#
# Python
from prophet.serialize import model_to_json, model_from_json

In [45]:
df = pd.read_csv('../../../data/df_monthly_returns_complete_percentage.csv', index_col='Date')
df = df[df.columns[0:50]] #  remove after test

### Training Prophet

### 1 Month Actual vs Prediction

In [50]:
def train_and_plot(dataframe, months=12):
    df_train_long = dataframe.reset_index().melt(id_vars=['Date'], var_name='ticker', value_name='y')
    df_train_long.rename(columns={'Date': 'ds'}, inplace=True)
    
    # Allocate the last 5 years of data for testing
    min_date = pd.to_datetime(dataframe.index[-1]).replace(day=1) - pd.DateOffset(months=months)
    min_datestr = min_date.strftime('%Y-%m-%d')
    #
    df_train = dataframe.loc[dataframe.index < min_datestr]
    df_test = dataframe.loc[dataframe.index >= min_datestr]
    
    # model
    models = {}
    forecasts = {}
    
    for ticker, data in df_train_long.groupby('ticker'):
        model = Prophet()
        model.fit(data[['ds', 'y']])  # Train model
    
        future = model.make_future_dataframe(periods=months, freq='ME')  # Forecast next 12 months
        forecast = model.predict(future)
    
        models[ticker] = model
        forecasts[ticker] = forecast
    
    max_date = min_date + pd.DateOffset(months=1)
    max_datestr = max_date.strftime('%Y-%m-%d')
    
    # Collect 'ds' (date) and 'yhat' from each forecast
    forecast_dfs = [item[['ds', 'yhat']].rename(columns={'yhat': stock}) for stock, item in forecasts.items()]
    
    # Merge all forecasts on 'ds' (date)
    merged_forecast = forecast_dfs[0]
    for df in forecast_dfs[1:]:
        merged_forecast = merged_forecast.merge(df, on='ds', how='outer')
    
    # Compute the mean 'yhat' per time point
    y_pred = merged_forecast.iloc[:, 1:].mean(axis=1)
    y_true = dataframe.mean(axis=1)
    
    #
    train_true_list = y_pred[:len(df_train)]
    test_true_list = y_pred[len(df_train):]
    
    # Create the plot
    fig = go.Figure()
    
    # Add the timeseries line
    fig.add_trace(go.Scatter(y=y_true, x=dataframe.index.tolist(), mode='lines', name='Actual returns',
                             line=dict(color='#5c839f', width=2)))  #, line=dict(color='red'))
    # Add the training plot in red
    fig.add_trace(go.Scatter(y=train_true_list, x=dataframe.index.tolist()[:len(train_true_list)],
                             mode='lines', name='Train returns',
                             line=dict(color='red', width=2)))  #, line=dict(color='red')
    
    # Add the testing plot in green
    fig.add_trace(go.Scatter(y=test_true_list, x=dataframe.index.tolist()[len(train_true_list):],
                             mode='lines', name='Test returns',
                             line=dict(color='green', width=2)))  # , line=dict(color='green')
    
    fig.add_vline(x=min_datestr, line_color='red', line_dash='dash', line_width=1)
    
    
    # Update layout with labels
    fig.update_layout(
        title='1 Year Prediction vs Actual Plot',
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Day closing return (%)',
            tickformat='.0%',
           # range=[-0.2,0.5]
        ),
        legend=dict(title="Legend"),
        template="plotly_white"
    )
    fig.show()
    return forecasts

In [51]:
forecasts = train_and_plot(dataframe=df, months=1)

02:25:56 - cmdstanpy - INFO - Chain [1] start processing
02:25:56 - cmdstanpy - INFO - Chain [1] done processing
02:25:56 - cmdstanpy - INFO - Chain [1] start processing
02:25:56 - cmdstanpy - INFO - Chain [1] done processing
02:25:56 - cmdstanpy - INFO - Chain [1] start processing
02:25:56 - cmdstanpy - INFO - Chain [1] done processing
02:25:56 - cmdstanpy - INFO - Chain [1] start processing
02:25:56 - cmdstanpy - INFO - Chain [1] done processing
02:25:56 - cmdstanpy - INFO - Chain [1] start processing
02:25:56 - cmdstanpy - INFO - Chain [1] done processing
02:25:56 - cmdstanpy - INFO - Chain [1] start processing
02:25:56 - cmdstanpy - INFO - Chain [1] done processing
02:25:56 - cmdstanpy - INFO - Chain [1] start processing
02:25:56 - cmdstanpy - INFO - Chain [1] done processing
02:25:56 - cmdstanpy - INFO - Chain [1] start processing
02:25:57 - cmdstanpy - INFO - Chain [1] done processing
02:25:57 - cmdstanpy - INFO - Chain [1] start processing
02:25:57 - cmdstanpy - INFO - Chain [1]

#### Save model for 1 month

In [30]:
with open('Prophet_1m.json', 'w') as fout:
    fout.write(model_to_json(model))  # Save model

### 6 Months Actual vs Prediction

In [31]:
months = 6

In [52]:
forecasts = train_and_plot(dataframe=df, months=6)

02:26:35 - cmdstanpy - INFO - Chain [1] start processing
02:26:35 - cmdstanpy - INFO - Chain [1] done processing
02:26:35 - cmdstanpy - INFO - Chain [1] start processing
02:26:36 - cmdstanpy - INFO - Chain [1] done processing
02:26:36 - cmdstanpy - INFO - Chain [1] start processing
02:26:36 - cmdstanpy - INFO - Chain [1] done processing
02:26:36 - cmdstanpy - INFO - Chain [1] start processing
02:26:36 - cmdstanpy - INFO - Chain [1] done processing
02:26:36 - cmdstanpy - INFO - Chain [1] start processing
02:26:36 - cmdstanpy - INFO - Chain [1] done processing
02:26:36 - cmdstanpy - INFO - Chain [1] start processing
02:26:36 - cmdstanpy - INFO - Chain [1] done processing
02:26:36 - cmdstanpy - INFO - Chain [1] start processing
02:26:36 - cmdstanpy - INFO - Chain [1] done processing
02:26:36 - cmdstanpy - INFO - Chain [1] start processing
02:26:36 - cmdstanpy - INFO - Chain [1] done processing
02:26:36 - cmdstanpy - INFO - Chain [1] start processing
02:26:36 - cmdstanpy - INFO - Chain [1]

#### Save model for 6 month

In [None]:
with open('Prophet_6m.json', 'w') as fout:
    fout.write(model_to_json(model))  # Save model

### 12 months Actual vs Prediction

In [9]:
months=12

#### Save model for 12 months

In [None]:
with open('Prophet_12m.json', 'w') as fout:
    fout.write(model_to_json(model))  # Save model

In [53]:
forecasts = train_and_plot(dataframe=df, months=12)

02:27:33 - cmdstanpy - INFO - Chain [1] start processing
02:27:34 - cmdstanpy - INFO - Chain [1] done processing
02:27:34 - cmdstanpy - INFO - Chain [1] start processing
02:27:34 - cmdstanpy - INFO - Chain [1] done processing
02:27:34 - cmdstanpy - INFO - Chain [1] start processing
02:27:34 - cmdstanpy - INFO - Chain [1] done processing
02:27:34 - cmdstanpy - INFO - Chain [1] start processing
02:27:34 - cmdstanpy - INFO - Chain [1] done processing
02:27:34 - cmdstanpy - INFO - Chain [1] start processing
02:27:34 - cmdstanpy - INFO - Chain [1] done processing
02:27:34 - cmdstanpy - INFO - Chain [1] start processing
02:27:34 - cmdstanpy - INFO - Chain [1] done processing
02:27:34 - cmdstanpy - INFO - Chain [1] start processing
02:27:34 - cmdstanpy - INFO - Chain [1] done processing
02:27:34 - cmdstanpy - INFO - Chain [1] start processing
02:27:34 - cmdstanpy - INFO - Chain [1] done processing
02:27:34 - cmdstanpy - INFO - Chain [1] start processing
02:27:34 - cmdstanpy - INFO - Chain [1]

## Load saved models

In [None]:
with open('Prophet_1m.json', 'r') as fin:
    model_m1 = model_from_json(fin.read())  # Load model
with open('Prophet_6m.json', 'r') as fin:
    model_m6 = model_from_json(fin.read())  # Load model
with open('Prophet_12m.json', 'r') as fin:
    model_12m = model_from_json(fin.read())  # Load model

### Sharpe Ratio

In [12]:
# @TODO resume here - get best performing stocks
# Create DataFrame of forecasted prices
forecasted_prices = pd.DataFrame({ticker: forecasts[ticker]['yhat'].values for ticker in forecasts})
forecasted_prices.index = forecasts['AAPL']['ds']

# Compute log returns
forecasted_returns = np.log(forecasted_prices / forecasted_prices.shift(1)).dropna()

# Calculate performance metrics
expected_returns = forecasted_returns.mean()  # Mean return per stock
volatility = forecasted_returns.std()  # Standard deviation of returns
sharpe_ratio = expected_returns / volatility  # Risk-adjusted return


invalid value encountered in log

