In [9]:
import pandas as pd
import matplotlib.pyplot as plt

Data files available here : https://www.kaggle.com/code/milanzdravkovic/pharma-sales-data-analysis-and-forecasting/data

In [10]:
hourly_raw=pd.read_csv('data/saleshourly.csv')
daily_raw=pd.read_csv('data/salesdaily.csv')
weekly_raw=pd.read_csv('data/salesweekly.csv')
monthly_raw=pd.read_csv('data/salesmonthly.csv')

In [11]:
hourly_raw.dtypes

datum            object
M01AB           float64
M01AE           float64
N02BA           float64
N02BE           float64
N05B            float64
N05C            float64
R03             float64
R06             float64
Year              int64
Month             int64
Hour              int64
Weekday Name     object
dtype: object

In [12]:
import boto3
region = boto3.Session("s3").region_name
session = boto3.Session(region_name=region) 
forecast = session.client(service_name='forecast')
forecastquery = session.client(service_name='forecastquery')

# Checking to make sure we can communicate with Amazon Forecast
assert forecast.list_predictors()
import sagemaker
sagemaker_session = sagemaker.session.Session()
bucket_name = sagemaker_session.default_bucket()
key='chapter10/data'

In [13]:
import json
from tqdm.auto import trange
def get_or_create_iam_role( role_name ):

    iam = boto3.client("iam")

    assume_role_policy_document = {
        "Version": "2012-10-17",
        "Statement": [
            {
              "Effect": "Allow",
              "Principal": {
                "Service": "forecast.amazonaws.com"
              },
              "Action": "sts:AssumeRole"
            }
        ]
    }

    try:
        create_role_response = iam.create_role(
            RoleName = role_name,
            AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
        )
        role_arn = create_role_response["Role"]["Arn"]
        print("Created", role_arn)
        
        print("Attaching policies...")
        iam.attach_role_policy(
            RoleName = role_name,
            PolicyArn = "arn:aws:iam::aws:policy/AmazonForecastFullAccess"
        )

        iam.attach_role_policy(
            RoleName=role_name,
            PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',
        )

        print("Waiting for a minute to allow IAM role policy attachment to propagate")
        for i in trange(60):
            time.sleep(1.0)
            
    except iam.exceptions.EntityAlreadyExistsException:
        print("The role " + role_name + " already exists, skipping creation")
        role_arn = boto3.resource('iam').Role(role_name).arn

    print("Done.")
    return role_arn

In [14]:
role_arn = get_or_create_iam_role( role_name = 'ForecastNotebookRole-Basic' )
role_arn

The role ForecastNotebookRole-Basic already exists, skipping creation
Done.


'arn:aws:iam::485822383573:role/ForecastNotebookRole-Basic'

In [15]:
hourly_raw['datum'] = pd.to_datetime(hourly_raw["datum"])
target_df=hourly_raw[['datum','M01AB']]
target_df['item_id'] = "M01AB"

target_df.head()
#target_df.plot(x='datum', y='M01AB', figsize=(15, 8))
target_df.to_csv('data/target_df.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [16]:
s3 = boto3.Session().resource('s3')
s3.Bucket(bucket_name).Object(key+'/target_df.csv').upload_file('data/target_df.csv')
ts_s3_path = f"s3://{bucket_name}/{key}/target_df.csv"

print(f"\nDone, the dataset is uploaded to S3 at {ts_s3_path}.")


Done, the dataset is uploaded to S3 at s3://sagemaker-us-east-1-485822383573/chapter10/data/target_df.csv.


In [17]:
target_df.head()

Unnamed: 0,datum,M01AB,item_id
0,2014-01-02 08:00:00,0.0,M01AB
1,2014-01-02 09:00:00,0.0,M01AB
2,2014-01-02 10:00:00,0.0,M01AB
3,2014-01-02 11:00:00,0.0,M01AB
4,2014-01-02 12:00:00,0.0,M01AB


In [20]:
dataset_group = "pharma_sales"
dataset_arns = []
create_dataset_group_response = \
    forecast.create_dataset_group(Domain="CUSTOM",
                                  DatasetGroupName=dataset_group,
                                  DatasetArns=dataset_arns)
dataset_group_arn = create_dataset_group_response['DatasetGroupArn']
forecast.describe_dataset_group(DatasetGroupArn=dataset_group_arn)

{'DatasetGroupName': 'pharma_sales',
 'DatasetGroupArn': 'arn:aws:forecast:us-east-1:485822383573:dataset-group/pharma_sales',
 'DatasetArns': [],
 'Domain': 'CUSTOM',
 'Status': 'ACTIVE',
 'CreationTime': datetime.datetime(2022, 8, 20, 1, 55, 41, 151000, tzinfo=tzlocal()),
 'LastModificationTime': datetime.datetime(2022, 8, 20, 1, 55, 41, 151000, tzinfo=tzlocal()),
 'ResponseMetadata': {'RequestId': '89666a17-53fd-4118-8337-20c658df0fdc',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Sat, 20 Aug 2022 01:55:40 GMT',
   'x-amzn-requestid': '89666a17-53fd-4118-8337-20c658df0fdc',
   'content-length': '247',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

In [24]:
DATASET_FREQUENCY = "H" # H for hourly.
TS_DATASET_NAME = dataset_group+"_MOIAB"

TS_SCHEMA = {
   "Attributes":[
      {
         "AttributeName":"timestamp",
         "AttributeType":"timestamp"
      },
      {
         "AttributeName":"target_value",
         "AttributeType":"float"
      },
      {
         "AttributeName":"item_id",
         "AttributeType":"string"
      }
   ]
}

create_dataset_response = forecast.create_dataset(Domain="CUSTOM",
                                                  DatasetType='TARGET_TIME_SERIES',
                                                  DatasetName=TS_DATASET_NAME,
                                                  DataFrequency=DATASET_FREQUENCY,
                                                  Schema=TS_SCHEMA)

ts_dataset_arn = create_dataset_response['DatasetArn']
describe_dataset_response = forecast.describe_dataset(DatasetArn=ts_dataset_arn)

print(f"The Dataset with ARN {ts_dataset_arn} is now {describe_dataset_response['Status']}.")

The Dataset with ARN arn:aws:forecast:us-east-1:485822383573:dataset/pharma_sales_MOIAB is now ACTIVE.


In [25]:
TIMESTAMP_FORMAT = "yyyy-MM-dd hh:mm:ss"
TS_IMPORT_JOB_NAME = "HOURLY_IMPORT"
TIMEZONE = "EST"

ts_dataset_import_job_response = \
    forecast.create_dataset_import_job(DatasetImportJobName=TS_IMPORT_JOB_NAME,
                                       DatasetArn=ts_dataset_arn,
                                       DataSource= {
                                         "S3Config" : {
                                             "Path": ts_s3_path,
                                             "RoleArn": role_arn
                                         } 
                                       },
                                       TimestampFormat=TIMESTAMP_FORMAT,
                                       TimeZone = TIMEZONE)

ts_dataset_import_job_arn = ts_dataset_import_job_response['DatasetImportJobArn']
describe_dataset_import_job_response = forecast.describe_dataset_import_job(DatasetImportJobArn=ts_dataset_import_job_arn)

print(f"Waiting for Dataset Import Job with ARN {ts_dataset_import_job_arn} to become ACTIVE. This process could take 5-10 minutes.\n\nCurrent Status:")

describe_dataset_import_job_response = forecast.describe_dataset_import_job(DatasetImportJobArn=ts_dataset_import_job_arn)
print(f"\n\nThe Dataset Import Job with ARN {ts_dataset_import_job_arn} is now {describe_dataset_import_job_response['Status']}.")

Waiting for Dataset Import Job with ARN arn:aws:forecast:us-east-1:485822383573:dataset-import-job/pharma_sales_MOIAB/HOURLY_IMPORT to become ACTIVE. This process could take 5-10 minutes.

Current Status:


The Dataset Import Job with ARN arn:aws:forecast:us-east-1:485822383573:dataset-import-job/pharma_sales_MOIAB/HOURLY_IMPORT is now CREATE_PENDING.


In [26]:
import time
while True:
    status=forecast.describe_dataset_import_job(DatasetImportJobArn=ts_dataset_import_job_arn)['Status']
    print(status)
    if status in ('ACTIVE', 'CREATE_FAILED'): break
    time.sleep(10)

CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
ACTIVE


In [28]:
dataset_group = "pharma_sales"
dataset_arns = [ts_dataset_arn]
create_dataset_group_response = \
    forecast.create_dataset_group(Domain="CUSTOM",
                                  DatasetGroupName=dataset_group,
                                  DatasetArns=dataset_arns)
dataset_group_arn = create_dataset_group_response['DatasetGroupArn']
forecast.describe_dataset_group(DatasetGroupArn=dataset_group_arn)

{'DatasetGroupName': 'pharma_sales',
 'DatasetGroupArn': 'arn:aws:forecast:us-east-1:485822383573:dataset-group/pharma_sales',
 'DatasetArns': ['arn:aws:forecast:us-east-1:485822383573:dataset/pharma_sales_MOIAB'],
 'Domain': 'CUSTOM',
 'Status': 'ACTIVE',
 'CreationTime': datetime.datetime(2022, 8, 20, 2, 54, 57, 623000, tzinfo=tzlocal()),
 'LastModificationTime': datetime.datetime(2022, 8, 20, 2, 54, 57, 623000, tzinfo=tzlocal()),
 'ResponseMetadata': {'RequestId': 'aacdc12b-234a-4f23-9e91-4d02d99b596b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Sat, 20 Aug 2022 02:54:57 GMT',
   'x-amzn-requestid': 'aacdc12b-234a-4f23-9e91-4d02d99b596b',
   'content-length': '315',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

In [29]:
algorithm_arn = 'arn:aws:forecast:::algorithm/'
algorithm = 'Deep_AR_Plus'
algorithm_arn_deep_ar_plus = algorithm_arn + algorithm
predictor_name_deep_ar = f"{TS_DATASET_NAME}_{algorithm.lower()}"
print(f"Predictor Name = {predictor_name_deep_ar}")

Predictor Name = pharma_sales_MOIAB_deep_ar_plus


In [32]:
FORECAST_LENGTH = 24
create_predictor_response = \
    forecast.create_predictor(PredictorName=predictor_name_deep_ar,
                              AlgorithmArn=algorithm_arn_deep_ar_plus,
                              ForecastHorizon=FORECAST_LENGTH,
                              PerformAutoML=False,
                              PerformHPO=False,
                              InputDataConfig= {"DatasetGroupArn": dataset_group_arn},
                              FeaturizationConfig= {"ForecastFrequency": 'H'}
                             )

In [33]:
predictor_arn_deep_ar = create_predictor_response['PredictorArn']

In [None]:
while True:
    status=forecast.describe_predictor(PredictorArn=predictor_arn_deep_ar)['Status']
    print(status)
    if status in ('ACTIVE', 'CREATE_FAILED'): break
    time.sleep(10)

CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PR

In [36]:
error_metrics_deep_ar_plus = forecast.get_accuracy_metrics(PredictorArn=predictor_arn_deep_ar)
error_metrics_deep_ar_plus

{'PredictorEvaluationResults': [{'AlgorithmArn': 'arn:aws:forecast:::algorithm/Deep_AR_Plus',
   'TestWindows': [{'EvaluationType': 'SUMMARY',
     'Metrics': {'RMSE': 0.06799650728058045,
      'WeightedQuantileLosses': [{'Quantile': 0.9,
        'LossValue': 2.0969126952121213},
       {'Quantile': 0.5, 'LossValue': 1.1322591406060607},
       {'Quantile': 0.1, 'LossValue': 0.47394512951515155}],
      'ErrorMetrics': [{'ForecastType': 'mean',
        'WAPE': 1.3516351381818186,
        'RMSE': 0.06799650728058045,
        'MASE': 0.6476585037121213,
        'MAPE': 0.041576848169191924}],
      'AverageWeightedQuantileLoss': 1.2343723217777778}},
    {'TestWindowStart': datetime.datetime(2019, 10, 7, 20, 0, tzinfo=tzlocal()),
     'TestWindowEnd': datetime.datetime(2019, 10, 8, 20, 0, tzinfo=tzlocal()),
     'ItemCount': 1,
     'EvaluationType': 'COMPUTED',
     'Metrics': {'RMSE': 0.06799650728058045,
      'WeightedQuantileLosses': [{'Quantile': 0.9,
        'LossValue': 2.096912

In [37]:
forecast_name_deep_ar = f"{TS_DATASET_NAME}_deeparp"
print(f"Forecast Name = {forecast_name_deep_ar}")

Forecast Name = pharma_sales_MOIAB_deeparp


In [38]:
create_forecast_response_deep_ar = forecast.create_forecast(ForecastName=forecast_name_deep_ar,
                                                        PredictorArn=predictor_arn_deep_ar)

In [39]:
forecast_arn_deep_ar = create_forecast_response_deep_ar['ForecastArn']

In [40]:
while True:
    status=forecast.describe_forecast(ForecastArn=forecast_arn_deep_ar)['Status']
    print(status)
    if status in ('ACTIVE', 'CREATE_FAILED'): break
    time.sleep(10)

CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PR

In [42]:
forecast_response_deep = forecastquery.query_forecast(
    ForecastArn=forecast_arn_deep_ar,
    Filters={"item_id": 'M01AB'})
forecast_response_deep

{'Forecast': {'Predictions': {'p10': [{'Timestamp': '2019-10-08T20:00:00',
     'Value': -0.0212139282},
    {'Timestamp': '2019-10-08T21:00:00', 'Value': -0.023361275},
    {'Timestamp': '2019-10-08T22:00:00', 'Value': -0.0044957907},
    {'Timestamp': '2019-10-08T23:00:00', 'Value': -0.0029490117},
    {'Timestamp': '2019-10-09T00:00:00', 'Value': -0.0014466448},
    {'Timestamp': '2019-10-09T01:00:00', 'Value': -0.0008959283},
    {'Timestamp': '2019-10-09T02:00:00', 'Value': -0.0005988376},
    {'Timestamp': '2019-10-09T03:00:00', 'Value': -0.0009790345},
    {'Timestamp': '2019-10-09T04:00:00', 'Value': -0.0011201639},
    {'Timestamp': '2019-10-09T05:00:00', 'Value': -0.0009358527},
    {'Timestamp': '2019-10-09T06:00:00', 'Value': -0.0010048471},
    {'Timestamp': '2019-10-09T07:00:00', 'Value': -0.0019908166},
    {'Timestamp': '2019-10-09T08:00:00', 'Value': -0.0163108818},
    {'Timestamp': '2019-10-09T09:00:00', 'Value': -0.0297796},
    {'Timestamp': '2019-10-09T10:00:00', 

In [44]:
def plot_forecasts(fcsts, exact, freq = '1H', forecastHorizon=24, time_back = 80):
    p10 = pd.DataFrame(fcsts['Forecast']['Predictions']['p10'])
    p50 = pd.DataFrame(fcsts['Forecast']['Predictions']['p50'])
    p90 = pd.DataFrame(fcsts['Forecast']['Predictions']['p90'])
    pred_int = p50['Timestamp'].apply(lambda x: pd.Timestamp(x))
    fcst_start_date = pred_int.iloc[0]
    fcst_end_date = pred_int.iloc[-1]
    time_int = exact['timestamp'].apply(lambda x: pd.Timestamp(x))
    plt.plot(time_int[-time_back:],exact['target'].values[-time_back:], color = 'r')
    plt.plot(pred_int, p50['Value'].values, color = 'k')
    plt.fill_between(pred_int, 
                     p10['Value'].values,
                     p90['Value'].values,
                     color='b', alpha=0.3);
    plt.axvline(x=pd.Timestamp(fcst_start_date), linewidth=3, color='g', ls='dashed')
    plt.axvline(x=pd.Timestamp(fcst_end_date), linewidth=3, color='g', ls='dashed')
    plt.xticks(rotation=30)
    plt.legend(['Target', 'Forecast'], loc = 'lower left')

In [45]:
def load_exact_sol(fname, item_id, is_schema_perm=False):
    exact = pd.read_csv(fname, header = None)
    exact.columns = ['item_id', 'timestamp', 'target']
    if is_schema_perm:
        exact.columns = ['timestamp', 'target', 'item_id']
    return exact.loc[exact['item_id'] == item_id]

In [47]:
fname = f'data/df_target.csv'
exact = load_exact_sol(fname, item_id)

NameError: name 'item_id' is not defined