# Import ItemMeta Time Series Data

In [1]:
import boto3
from time import sleep
import os
import pandas as pd
import json
import time
import pprint
import numpy as np

In [2]:
data_dir = 'data'
train_file_name = 'train.csv'
features_file_name = 'features.csv'
store_file_name = 'stores.csv'
store_data = pd.read_csv(os.path.join(data_dir,store_file_name))
store_df = store_data.copy()


In [3]:
store_df = store_df.drop('Size', axis=1)
store_df = store_df.rename(columns={'Store':'item_id', 'Type':'StoreType'})
store_df = store_df.set_index('item_id')


## Split into target file and validation file

In [4]:
# With the data in a great state, save it off as a CSV
store_meta_filename = "store_meta.csv"
store_meta_path = data_dir + "/" + store_meta_filename
store_df.to_csv(store_meta_path, header=False)

## Parameter

In [5]:
%store -r

In [7]:
DATASET_FREQUENCY = "W" 
TIMESTAMP_FORMAT = "yyyy-MM-dd hh:mm:ss"
itme_meta_suffix = '_ItemM'

item_meta_dataset_name= project+'DS' + itme_meta_suffix + suffix
item_datasetGroupName= project +'DSG'+ itme_meta_suffix + suffix

In [8]:
session = boto3.Session(region_name=region)
forecast = session.client(service_name='forecast')
forecast_query = session.client(service_name='forecastquery')

## Create schema

In [9]:
# Specify the schema of your dataset here. Make sure the order of columns matches the raw data files.
schema ={
   "Attributes":[
      {
         "AttributeName":"item_id",
         "AttributeType":"string"
      },       
      {
         "AttributeName":"StoreType",
         "AttributeType":"string"
      }       
   ]
}

## Create Item-Meta Dataset

In [10]:
response=forecast.create_dataset(
                    Domain="CUSTOM",
                    DatasetType='ITEM_METADATA',
                    DatasetName=item_meta_dataset_name,
                    DataFrequency=DATASET_FREQUENCY, 
                    Schema = schema
)

In [11]:
item_meta_datasetArn = response['DatasetArn']
forecast.describe_dataset(DatasetArn=item_meta_datasetArn)

{'DatasetArn': 'arn:aws:forecast:us-east-2:057716757052:dataset/WalmartKaggleDS_ItemM38922',
 'DatasetName': 'WalmartKaggleDS_ItemM38922',
 'Domain': 'CUSTOM',
 'DatasetType': 'ITEM_METADATA',
 'Schema': {'Attributes': [{'AttributeName': 'item_id',
    'AttributeType': 'string'},
   {'AttributeName': 'StoreType', 'AttributeType': 'string'}]},
 'EncryptionConfig': {},
 'Status': 'ACTIVE',
 'CreationTime': datetime.datetime(2020, 3, 26, 9, 7, 57, 281000, tzinfo=tzlocal()),
 'LastModificationTime': datetime.datetime(2020, 3, 26, 9, 7, 57, 281000, tzinfo=tzlocal()),
 'ResponseMetadata': {'RequestId': 'e9afb62e-9137-4f2a-9ef7-959c210db675',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Thu, 26 Mar 2020 09:07:59 GMT',
   'x-amzn-requestid': 'e9afb62e-9137-4f2a-9ef7-959c210db675',
   'content-length': '428',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

## Create dataset_import_job used to download dataset from S3

In [12]:
# Upload Target File
boto3.Session().resource('s3').Bucket(bucket_name).Object(store_meta_filename).upload_file(store_meta_path)
item_meta_s3DataPath = "s3://"+bucket_name+"/"+store_meta_filename

In [13]:
# Finally we can call import the dataset
datasetImportJobName = 'DSIMPORT_JOB_RELATED_WALMART' + itme_meta_suffix + suffix
ds_import_job_response=forecast.create_dataset_import_job(
      DatasetImportJobName= datasetImportJobName,
      DatasetArn= item_meta_datasetArn,
      DataSource= {
          "S3Config" : {
             "Path":item_meta_s3DataPath,
             "RoleArn": role_arn
          } 
      },
      TimestampFormat=TIMESTAMP_FORMAT
     )

In [14]:
ds_itme_meta_import_job_arn=ds_import_job_response['DatasetImportJobArn']
print(ds_itme_meta_import_job_arn)

arn:aws:forecast:us-east-2:057716757052:dataset-import-job/WalmartKaggleDS_ItemM38922/DSIMPORT_JOB_RELATED_WALMART_ItemM38922


In [15]:
forecast.describe_dataset_import_job(DatasetImportJobArn=ds_itme_meta_import_job_arn)

{'DatasetImportJobName': 'DSIMPORT_JOB_RELATED_WALMART_ItemM38922',
 'DatasetImportJobArn': 'arn:aws:forecast:us-east-2:057716757052:dataset-import-job/WalmartKaggleDS_ItemM38922/DSIMPORT_JOB_RELATED_WALMART_ItemM38922',
 'DatasetArn': 'arn:aws:forecast:us-east-2:057716757052:dataset/WalmartKaggleDS_ItemM38922',
 'TimestampFormat': 'yyyy-MM-dd hh:mm:ss',
 'DataSource': {'S3Config': {'Path': 's3://walmart-forecast/store_meta.csv',
   'RoleArn': 'arn:aws:iam::057716757052:role/WalmartForecast'}},
 'Status': 'CREATE_PENDING',
 'CreationTime': datetime.datetime(2020, 3, 26, 9, 8, 30, 969000, tzinfo=tzlocal()),
 'LastModificationTime': datetime.datetime(2020, 3, 26, 9, 8, 30, 969000, tzinfo=tzlocal()),
 'ResponseMetadata': {'RequestId': '4743e05b-56d8-4be8-b1f2-c2a7f68ab33f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Thu, 26 Mar 2020 09:08:34 GMT',
   'x-amzn-requestid': '4743e05b-56d8-4be8-b1f2-c2a7f68ab33f',
   'content-length': '

In [16]:
%%time

while True:
    dataImportStatus = forecast.describe_dataset_import_job(
        DatasetImportJobArn=ds_itme_meta_import_job_arn)['Status']
    print(dataImportStatus)
    if dataImportStatus != 'ACTIVE' and dataImportStatus != 'CREATE_FAILED':
        sleep(30)
    else:
        break

CREATE_PENDING
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
CREATE_IN_PROGRESS
ACTIVE
CPU times: user 19.2 ms, sys: 849 µs, total: 20 ms
Wall time: 2min


In [17]:
%store itme_meta_suffix
%store ds_itme_meta_import_job_arn
%store item_meta_datasetArn

Stored 'itme_meta_suffix' (str)
Stored 'ds_itme_meta_import_job_arn' (str)
Stored 'item_meta_datasetArn' (str)
