In [None]:
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow_probability import sts
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
from matplotlib import pylab as plt
import matplotlib.dates as mdates
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error
#from google.colab import drive

# Import data
#drive.mount('/content/drive')
df = pd.read_csv(
    'C:\Users\Farooqi\Downloads\data.csv',
    names=["company", "datetime", "close", "volume", "open", "high", "low"],
    dtype={'datetime': 'str', 'company': 'str', 'close': 'float'},
    parse_dates=['datetime'],
    header=0,
    skiprows=0,
    na_values=['Null']
)

# Filter Data. 
META_df = df[df['company'] == 'META'].reset_index()
META_df, meta_df=META_df.drop(columns=['index']), meta_df.drop(columns=['index'])
dates = META_df['datetime']
close = META_df['close']

# Sort the DataFrame by datetime
META_df = META_df.sort_values(by=['datetime'])

# Take an arbitrary slice of the data
start_date = '07/18/2013'
end_date = '07/17/2023'
middle_date = '07/17/2022'

# Slicing the data into training and testing sets
all_dates = (META_df['datetime'] > start_date) & (META_df['datetime'] < end_date)
training_dates = (META_df['datetime'] > start_date) & (META_df['datetime'] < middle_date)
testing_dates = (META_df['datetime'] >= middle_date) & (META_df['datetime'] < end_date)

training_data = META_df['close'][training_dates]
all_data = META_df['close'][all_dates]
dates_list = META_df['datetime'][all_dates]

# Dropping columns as we don't need these. Our focus is on only two columns ['Date', 'Close']
stock_df = META_df.drop(columns=['company', 'volume', 'open', 'high', 'low'])

a_training_data = close[training_dates]
all_data = close[all_dates]
dates_list = dates[all_dates]

training_data = a_training_data[::-1]

training_data = training_data.reset_index(drop=True)
training_data.index = training_data.index + 1


In [None]:
# BSTS Model - Local Level Component
trend = tfp.sts.LocalLevel(
   observed_time_series=training_data,
   name = 'trend'
)

model = sts.Sum(
  components = [trend],
  observed_time_series=training_data
)
variational_posteriors = tfp.sts.build_factored_surrogate_posterior(
    model=model)
# Model fitting
q_samples_demand_, kernel_results = tfp.sts.fit_with_hmc(
    model=model, observed_time_series=training_data.values)
# Forecasting the model
findata_forecast_dist = tfp.sts.forecast(
    model = model,
    observed_time_series=training_data.values,
    parameter_samples=q_samples_demand_,
    num_steps_forecast=sum(testing_dates))
num_samples=100
findata_forecast_mean, findata_forecast_scale, findata_forecast_samples = (
    findata_forecast_dist.mean().numpy()[..., 0],
    findata_forecast_dist.stddev().numpy()[..., 0],
    findata_forecast_dist.sample(num_samples).numpy()[..., 0])
# RMSE
BSTS_local_score = mean_squared_error(close[testing_dates].values, findata_forecast_mean)
rmse_local = np.sqrt(BSTS_local_score)
print("RMSE Local:" , rmse_local)
# MAE
mae = mean_absolute_error(close[testing_dates].values, findata_forecast_mean)
print("Mean Absolute Error:", mae)
# Plot
findata_loc = mdates.YearLocator(3)
findata_fmt = mdates.DateFormatter('%M%Y')
fig, (ax) = plt.subplots(1,1,figsize=(48,27))
ax.plot(
    dates[all_dates].values,
    close[all_dates].values,
)
ax.plot(
    dates[testing_dates].values,
    findata_forecast_mean,
    linestyle = 'dashed')

ax.fill_between(
    dates[testing_dates],
    findata_forecast_mean-2*findata_forecast_scale,
    findata_forecast_mean+2*findata_forecast_scale,
    color='Orange',
    alpha=0.2)
plt.show()

In [None]:
component_dists = sts.decompose_by_component(
    model,
    observed_time_series=training_data.values,
    parameter_samples=samples)

forecast_component_dists = sts.decompose_forecast_by_component(
    model,
    forecast_dist=findata_forecast_dist,
    parameter_samples=samples)

component_means, component_stddevs = (
    {k.name: c.mean() for k, c in component_dists.items()},
    {k.name: c.stddev() for k, c in component_dists.items()})

forecast_component_means, forecast_component_stddevs= (
    {k.name: c.mean() for k, c in forecast_component_dists.items()},
    {k.name: c.stddev() for k, c in forecast_component_dists.items()}
    )
trend_mean = np.concatenate(
    [component_means['trend/'],
    forecast_component_means['trend/']],
    axis=-1)
trend_stddev = np.concatenate(
    [component_stddevs['trend/'],
    forecast_component_stddevs['trend/']],
    axis=-1)

fig, ax1 = plt.subplots(1, 1, figsize=(48/2,27/2))
ax1.plot(
    dates[all_dates].values[::-1],
    trend_mean,
)
ax1.fill_between(
    dates[all_dates][::-1],
    trend_mean-2*trend_stddev,
    trend_mean+2*trend_stddev,
    color='Orange',
    alpha=0.2)
plt.show()
