In [None]:
#
# Install all dependencies`
#

EPOCHS = 20
MODEL_NAME = "gia"

!pip install pandas
!pip install boto3
!pip install sagemaker
!pip install matplotlib
!pip install mxnet==1.5.1
!pip install gluonts

In [None]:
#
# Setting up logging
#

#This is a logging tool that runs as a background threaded process. This is because when we close our Jupyter notebook but leave
#it running to train models, anything printed is not saved.
#So instead of printing any debug info, we log() it instead, and it will go to a log file.
#This is useful when running training over a weekend for example.
# import logging
# import threading
# import datetime
# logger = logging.getLogger()

# def setup_file_logger():
#     log_file = "{}-{}.log".format(MODEL_NAME, str(datetime.datetime.now()))
#     hdlr = logging.FileHandler(log_file)
#     formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
#     hdlr.setFormatter(formatter)
#     logger.addHandler(hdlr) 
#     logger.setLevel(logging.INFO)

# def log(message, type='info'):
#     #outputs to Jupyter console
#     print('{} {}'.format(datetime.datetime.now(), message))
#     #outputs to file
#     if type == 'info':
#         logger.info(message)
#     elif type == 'warning':
#         logger.warning(message)
#     elif type == 'error':
#         logger.error(message)
#     elif type == 'critical':
#         logger.critical(message)

# threaded_logging = threading.Thread(target=setup_file_logger)
# threaded_logging.start()
# threaded_logging.join()
# log("Background logger started")

In [None]:
#
# Run basic checks
#

import mxnet

print(mxnet.__version__)
gpu_count = mxnet.context.num_gpus()
print(f"The GPU count is [{gpu_count}]")

In [None]:
#
# Import dataset(s)
# Dataset retrieved from:
#   https://finance.yahoo.com/quote/%5EGSPC/history?period1=788936400&period2=1564545600&interval=1mo&filter=history&frequency=1mo
#

import boto3
import botocore

BUCKET_NAME = "sagemaker-studio-941048668662-pqxpata7h5"
DATASET = "SandP_1995_2019_monthly.csv"

s3 = boto3.resource('s3')

try:
    # Download as local file
    s3.Bucket(BUCKET_NAME).download_file(DATASET, DATASET)
except botocore.exceptions.ClientError as e:
    if e.response['Error']['Code'] == "404":
        log("The object does not exist.", "critical")
    else:
        raise

In [None]:
#
# Parse dataset
#

import pandas as pd

df = pd.read_csv(DATASET, header=0, index_col=0)
print("First sample:")
print(df.head(1))
print("\nLast sample:")
print(df.tail(1))

In [None]:
#
# Plot available data
#

import matplotlib.pyplot as plt

df['Adj Close'].plot(linewidth=2)
plt.grid(which='both')
plt.show()

In [None]:
#
# Configure trainging data
#

from gluonts.dataset.common import ListDataset

training_data = ListDataset(
    [{"start": df.index[0], "target": df['Adj Close'][:"2013-12-01"]}],
    freq="1M"
)

In [None]:
#
# Train model on training data
#

#from gluonts.dataset.artificial._base import ComplexSeasonalTimeSeries
from gluonts.model.deepar import DeepAREstimator
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator
from gluonts.trainer import Trainer

# estimator = SimpleFeedForwardEstimator(
#     num_hidden_dimensions=[1],
#     prediction_length=12,
#     context_length=100,
#     freq="1M",
#     trainer=Trainer(
#         epochs=10, 
#         learning_rate=1e-3, 
#         num_batches_per_epoch=1
#     )
# )

estimator = DeepAREstimator(freq="1M", prediction_length=12, trainer=Trainer(epochs=EPOCHS))
predictor = estimator.train(training_data=training_data)

In [None]:
from gluonts.dataset.util import to_pandas

# Configure test data
test_data = ListDataset(
    [{"start": df.index[0], "target": df['Adj Close'][:"2015-04-15 00:00:00"]}],
    freq="1M"
)

In [None]:
from gluonts.evaluation.backtest import make_evaluation_predictions

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_data,  # test dataset
    predictor=predictor,  # predictor
    num_samples=100,  # number of sample paths we want for evaluation
)

In [None]:
forecasts = list(forecast_it)
tss = list(ts_it)

forecast_entry = forecasts[0]
ts_entry = tss[0]

print(f"Number of sample paths: {forecast_entry.num_samples}")
print(f"Dimension of samples: {forecast_entry.samples.shape}")
print(f"Start date of the forecast window: {forecast_entry.start_date}")
print(f"Frequency of the time series: {forecast_entry.freq}")

print(f"Mean of the future window:\n {forecast_entry.mean}")
print(f"0.5-quantile (median) of the future window:\n {forecast_entry.quantile(0.5)}")

In [None]:
def plot_prob_forecasts(ts_entry, forecast_entry):
    plot_length = 150
    prediction_intervals = (50.0, 90.0)
    legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

    fig, ax = plt.subplots(1, 1, figsize=(10, 7))
    ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
    forecast_entry.plot(prediction_intervals=prediction_intervals, color='g')
    plt.grid(which="both")
    plt.legend(legend, loc="upper left")
    plt.show()

plot_prob_forecasts(ts_entry, forecast_entry)

In [None]:
#Evaluate Model
import json
from gluonts.evaluation import Evaluator

evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data))

print(json.dumps(agg_metrics, indent=4))

In [None]:
#
# Save the model
#

import os
import pathlib
import datetime

forecast_output = "gia_forecast"
os.makedirs(forecast_output, exist_ok=True)

now = datetime.datetime.now()
forecast_output_instance = forecast_output + "/run_" + now.strftime("%Y-%m-%d-%H-%M")
os.makedirs(forecast_output_instance, exist_ok=True)

predictor.serialize_prediction_net(pathlib.Path(forecast_output_instance))
predictor.serialize(pathlib.Path(forecast_output_instance))