In [None]:
#
# Install all dependencies
#

!pip install pandas
!pip install boto3
!pip install sagemaker
!pip install matplotlib
!pip install mxnet
!pip install gluonts
!pip install --upgrade "mxnet==1.4.1"

In [None]:
#
# Run basic checks
#

import mxnet

print(mxnet.__version__)
gpu_count = mxnet.context.num_gpus()
print(f"The GPU count is [{gpu_count}]")

In [None]:
#
# Import dataset(s)
# Dataset retrieved from:
#   https://finance.yahoo.com/quote/%5EGSPC/history?period1=788936400&period2=1564545600&interval=1mo&filter=history&frequency=1mo
#

import boto3
import botocore

BUCKET_NAME = "sagemaker-studio-941048668662-pqxpata7h5"
DATASET = "SandP_1995_2019_monthly.csv"

s3 = boto3.resource('s3')

try:
    # Download as local file
    s3.Bucket(BUCKET_NAME).download_file(DATASET, DATASET)
except botocore.exceptions.ClientError as e:
    if e.response['Error']['Code'] == "404":
        print("The object does not exist.")
    else:
        raise

In [None]:
#
# Parse dataset
#

import pandas as pd

df = pd.read_csv(DATASET, header=0, index_col=0)
print("First sample:")
print(df.head(1))
print("\nLast sample:")
print(df.tail(1))

In [None]:
#
# Plot available data
#

import matplotlib.pyplot as plt

df['Adj Close'].plot(linewidth=2)
plt.grid(which='both')
plt.show()

In [None]:
#
# Configure training  and test data
#

from gluonts.dataset.common import ListDataset
# from gluonts.dataset.util import to_pandas

# Configure training data
# training_data = ListDataset(
#     [{"start": df.index[0], "target": df['Adj Close'][:"2013-12-01"]}],
#     freq="1M"
# )

# # Configure test data
# test_data = ListDataset(
#     [{"start": df.index[0], "target": df['Adj Close'][:"2015-04-15 00:00:00"]}],
#     freq="1M"
# )

train = df[: "2015-04-05 00:00:00"]
train.to_csv("train.csv")

test = df[: "2015-04-15 00:00:00"]
test.to_csv("test.csv")

In [None]:
#
# Upload training and test data to S3
#

import sagemaker

# Configure S3
sagemaker_session = sagemaker.Session()
s3_bucket = sagemaker_session.default_bucket()

s3_train_data_path = "s3://{}/gia/train".format(s3_bucket)
s3_test_data_path = "s3://{}/gia/test".format(s3_bucket)

print("Data will be uploaded to: ", s3_bucket)

# Upload to S3
s3 = boto3.resource('s3')
def copy_to_s3(local_file, s3_path, override=False):
    assert s3_path.startswith('s3://')
    split = s3_path.split('/')
    bucket = split[2]
    path = '/'.join(split[3:])
    buk = s3.Bucket(bucket)
    
    if len(list(buk.objects.filter(Prefix=path))) > 0:
        if not override:
            print('File s3://{}/{} already exists.\nSet override to upload anyway.\n'.format(s3_bucket, s3_path))
            return
        else:
            print('Overwriting existing file')
    with open(local_file, 'rb') as data:
        print('Uploading file to {}'.format(s3_path))
        buk.put_object(Key=path, Body=data)
        
copy_to_s3("train.csv", s3_train_data_path + "/train.csv")
copy_to_s3("test.csv", s3_test_data_path + "/test.csv")