## Amazon SageMaker Featurestore

Notebook to demonstrate creation of offline (default mode), offline + online and online only feature groups.

1. Setup
2. Create feature groups
3. Ingest features into feature group
4. Delete the feature groups

### 1. Setup

#### 1.1. Imports

In [1]:
import boto3
import pandas as pd
import numpy as np
import io
import sagemaker
import sys
import json
import time
from time import gmtime, strftime, sleep

from sagemaker.session import Session
from sagemaker import get_execution_role

from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.feature_store.feature_group import FeatureDefinition
from sagemaker.feature_store.feature_group import FeatureTypeEnum

#### 1.2 Install required version of sagemaker libraries

In [2]:
# SageMaker Python SDK version 2.x is required
original_version = sagemaker.__version__
%pip install 'sagemaker>=2.0.0'

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


#### 1.3 Setup variables

In [3]:
prefix = 'sagemaker-featurestore-weather'
role = get_execution_role()

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
s3_bucket_name = sagemaker_session.default_bucket()

#### 1.4 Setup service clients

In [4]:
#Create the service clients
sagemaker_fs_runtime_client = sagemaker_session.boto_session.client('sagemaker-featurestore-runtime')
sagemaker_runtime = sagemaker_session.boto_session.client('sagemaker-runtime')
sagemaker_client = sagemaker_session.boto_session.client('sagemaker')
s3_client = boto3.client('s3', region_name=region)

### 2. Create a feature groups
We will use the location feature group that holds the weather station location related features, to demonstrate the usage of APIs

In [5]:
#Feature group name
location_feature_group_name_offline = 'location-feature-group-offline-' + strftime('%d-%H-%M-%S', gmtime())
location_feature_group_name_online = 'location-feature-group-online-' + strftime('%d-%H-%M-%S', gmtime())
location_feature_group_name_offline_online = 'location-feature-group-offline-online-' + strftime('%d-%H-%M-%S', gmtime())

In [6]:
##Create FeatureDefinitions
fd_location=FeatureDefinition(feature_name='location', feature_type=FeatureTypeEnum('Fractional'))
fd_value=FeatureDefinition(feature_name='city', feature_type=FeatureTypeEnum('Fractional'))
fd_is_mobile=FeatureDefinition(feature_name='ismobile', feature_type=FeatureTypeEnum('Integral'))
fd_source_name=FeatureDefinition(feature_name='sourcename', feature_type=FeatureTypeEnum('Fractional'))
fd_source_type=FeatureDefinition(feature_name='sourcetype', feature_type=FeatureTypeEnum('Fractional'))
fd_event_time=FeatureDefinition(feature_name='EventTime', feature_type=FeatureTypeEnum('Fractional'))

location_feature_definitions = []
location_feature_definitions.append(fd_location)
location_feature_definitions.append(fd_value)
location_feature_definitions.append(fd_is_mobile)
location_feature_definitions.append(fd_source_name)
location_feature_definitions.append(fd_source_type)
location_feature_definitions.append(fd_event_time)


In [7]:
##Define unique identifier
record_identifier_feature_name = "location"

#### 2.1 Create offline only feature group (Default mode)

In [8]:
#Create offline feature group
location_feature_group_offline = FeatureGroup(name=location_feature_group_name_offline, 
                                     feature_definitions=location_feature_definitions,
                                     sagemaker_session=sagemaker_session)

location_feature_group_offline.create(
    s3_uri=f"s3://{s3_bucket_name}/{prefix}",
    record_identifier_name="location",
    event_time_feature_name="EventTime",
    role_arn=role,
    tags=[{'Key':'project','Value':'weather-prediction'}]
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/location-feature-group-offline-07-15-47-35',
 'ResponseMetadata': {'RequestId': 'd7d84ce8-b69e-426a-8838-9bcab6a9d3e1',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd7d84ce8-b69e-426a-8838-9bcab6a9d3e1',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '119',
   'date': 'Sat, 07 Aug 2021 15:47:43 GMT'},
  'RetryAttempts': 0}}

In [9]:
#Describe the feature group
location_feature_group_offline.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/location-feature-group-offline-07-15-47-35',
 'FeatureGroupName': 'location-feature-group-offline-07-15-47-35',
 'RecordIdentifierFeatureName': 'location',
 'EventTimeFeatureName': 'EventTime',
 'FeatureDefinitions': [{'FeatureName': 'location',
   'FeatureType': 'Fractional'},
  {'FeatureName': 'city', 'FeatureType': 'Fractional'},
  {'FeatureName': 'ismobile', 'FeatureType': 'Integral'},
  {'FeatureName': 'sourcename', 'FeatureType': 'Fractional'},
  {'FeatureName': 'sourcetype', 'FeatureType': 'Fractional'},
  {'FeatureName': 'EventTime', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2021, 8, 7, 15, 47, 43, 680000, tzinfo=tzlocal()),
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://sagemaker-us-west-2-802439482869/sagemaker-featurestore-weather',
   'ResolvedOutputS3Uri': 's3://sagemaker-us-west-2-802439482869/sagemaker-featurestore-weather/802439482869/sagemaker/us-west-2/offli

#### 2.2 Create offline + online feature group

In [10]:
#Create offline + online feature group
#Note the usage of enable_online_store parameter
location_feature_group_offline_online = FeatureGroup(name=location_feature_group_name_offline_online, 
                                     feature_definitions=location_feature_definitions,
                                     sagemaker_session=sagemaker_session)

location_feature_group_offline_online.create(
    s3_uri=f"s3://{s3_bucket_name}/{prefix}",
    record_identifier_name="location",
    event_time_feature_name="EventTime",
    role_arn=role,
    enable_online_store=True,
    tags=[{'Key':'project','Value':'weather-prediction'}]
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/location-feature-group-offline-online-07-15-47-35',
 'ResponseMetadata': {'RequestId': 'e3aee3db-5c04-417d-8a64-32141078f2e7',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'e3aee3db-5c04-417d-8a64-32141078f2e7',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '126',
   'date': 'Sat, 07 Aug 2021 15:48:18 GMT'},
  'RetryAttempts': 0}}

In [11]:
#Describe the feature group
location_feature_group_offline_online.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/location-feature-group-offline-online-07-15-47-35',
 'FeatureGroupName': 'location-feature-group-offline-online-07-15-47-35',
 'RecordIdentifierFeatureName': 'location',
 'EventTimeFeatureName': 'EventTime',
 'FeatureDefinitions': [{'FeatureName': 'location',
   'FeatureType': 'Fractional'},
  {'FeatureName': 'city', 'FeatureType': 'Fractional'},
  {'FeatureName': 'ismobile', 'FeatureType': 'Integral'},
  {'FeatureName': 'sourcename', 'FeatureType': 'Fractional'},
  {'FeatureName': 'sourcetype', 'FeatureType': 'Fractional'},
  {'FeatureName': 'EventTime', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2021, 8, 7, 15, 48, 18, 323000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://sagemaker-us-west-2-802439482869/sagemaker-featurestore-weather',
   'ResolvedOutputS3Uri': 's3://sagemaker-us-west-2-802439482869/sage

#### 2.3 Create online only feature group

In [12]:
#Create online feature group
#Note s3_uri flag set to False for the online only FG
location_feature_group_online = FeatureGroup(name=location_feature_group_name_online, 
                                     feature_definitions=location_feature_definitions,
                                     sagemaker_session=sagemaker_session)

location_feature_group_online.create(
    s3_uri=False,
    record_identifier_name="location", 
    event_time_feature_name="EventTime",
    role_arn=role,
    enable_online_store=True,
    tags=[{'Key':'project','Value':'weather-prediction'}]
)

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/location-feature-group-online-07-15-47-35',
 'ResponseMetadata': {'RequestId': '9bc11cde-9069-4f14-a644-8582351129fd',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '9bc11cde-9069-4f14-a644-8582351129fd',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '118',
   'date': 'Sat, 07 Aug 2021 15:48:48 GMT'},
  'RetryAttempts': 0}}

In [13]:
#Describe the feature group
location_feature_group_online.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/location-feature-group-online-07-15-47-35',
 'FeatureGroupName': 'location-feature-group-online-07-15-47-35',
 'RecordIdentifierFeatureName': 'location',
 'EventTimeFeatureName': 'EventTime',
 'FeatureDefinitions': [{'FeatureName': 'location',
   'FeatureType': 'Fractional'},
  {'FeatureName': 'city', 'FeatureType': 'Fractional'},
  {'FeatureName': 'ismobile', 'FeatureType': 'Integral'},
  {'FeatureName': 'sourcename', 'FeatureType': 'Fractional'},
  {'FeatureName': 'sourcetype', 'FeatureType': 'Fractional'},
  {'FeatureName': 'EventTime', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2021, 8, 7, 15, 48, 49, 168000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'RoleArn': 'arn:aws:iam::802439482869:role/DataScienceEnvironment-SageMakerRole-1SVE0FKUVRVO5',
 'FeatureGroupStatus': 'Creating',
 'ResponseMetadata': {'RequestId': 'cc1d5191-a281-4b25-a2a4-5f149973d7d8',
  '

#### 2.4. List all featuregroups

In [14]:
sagemaker_client.list_feature_groups()

{'FeatureGroupSummaries': [{'FeatureGroupName': 'weather-feature-group-online-13-19-23-46',
   'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/weather-feature-group-online-13-19-23-46',
   'CreationTime': datetime.datetime(2021, 7, 13, 19, 23, 51, 48000, tzinfo=tzlocal()),
   'FeatureGroupStatus': 'Created'},
  {'FeatureGroupName': 'weather-feature-group-offline-online-13-19-23-46',
   'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/weather-feature-group-offline-online-13-19-23-46',
   'CreationTime': datetime.datetime(2021, 7, 13, 19, 23, 50, 26000, tzinfo=tzlocal()),
   'FeatureGroupStatus': 'Created'},
  {'FeatureGroupName': 'weather-feature-group-offline-13-19-23-46',
   'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/weather-feature-group-offline-13-19-23-46',
   'CreationTime': datetime.datetime(2021, 7, 13, 19, 23, 46, 695000, tzinfo=tzlocal()),
   'FeatureGroupStatus': 'Created'},
  {'FeatureGroupN

### 3. Ingest features into feature group

### 3.1 Use ingest API

In [22]:
##Get the file name at index from the 'prefix' folder
def get_file_in_bucket(prefix,index):
    response = s3_client.list_objects(
        Bucket=s3_bucket,
        Prefix=prefix
    )
    ## At '0' index you will find the SUCCESS/FAILURE of file uploades to S3. First data file is at index 1
    file_name = response['Contents'][index]['Key']
    print("Returing file name : " + file_name)
    return file_name

In [23]:
#Read a sample csv from S3 
###NOTE : Use the S3 bucket name in your account.
s3_bucket='datascience-environment-notebookinstance--06dc7a0224df'
s3_path = "s3://{}/{}".format(s3_bucket, get_file_in_bucket('prepared/train',1))
weather_df = pd.read_csv(s3_path)

Returing file name : prepared/train/part-00000-2554f113-947e-46bd-be31-9cd75cb4661c-c000.csv


In [24]:
##Location, city, source_name, source_type : Features included in the location FG
location_df=weather_df.iloc[:, 7:11]
location_df

Unnamed: 0,1210.0,731.0,10.0,9.0
0,1210.0,731.0,10.0,9.0
1,155.0,14.0,21.0,21.0
2,155.0,14.0,21.0,21.0
3,155.0,14.0,21.0,21.0
4,155.0,14.0,21.0,21.0
...,...,...,...,...
772298,97.0,3.0,0.0,0.0
772299,97.0,3.0,0.0,0.0
772300,97.0,3.0,0.0,0.0
772301,97.0,3.0,0.0,0.0


In [32]:
location_df.columns = ["location", "city", "sourcename", "sourcetype"]

In [25]:
##Add EventTime column to the df
location_df['EventTime']=time.time()

In [26]:
location_feature_group_offline.describe()


{'FeatureGroupArn': 'arn:aws:sagemaker:us-west-2:802439482869:feature-group/location-feature-group-offline-07-15-47-35',
 'FeatureGroupName': 'location-feature-group-offline-07-15-47-35',
 'RecordIdentifierFeatureName': 'location',
 'EventTimeFeatureName': 'EventTime',
 'FeatureDefinitions': [{'FeatureName': 'location',
   'FeatureType': 'Fractional'},
  {'FeatureName': 'city', 'FeatureType': 'Fractional'},
  {'FeatureName': 'ismobile', 'FeatureType': 'Integral'},
  {'FeatureName': 'sourcename', 'FeatureType': 'Fractional'},
  {'FeatureName': 'sourcetype', 'FeatureType': 'Fractional'},
  {'FeatureName': 'EventTime', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2021, 8, 7, 15, 47, 43, 680000, tzinfo=tzlocal()),
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://sagemaker-us-west-2-802439482869/sagemaker-featurestore-weather',
   'ResolvedOutputS3Uri': 's3://sagemaker-us-west-2-802439482869/sagemaker-featurestore-weather/802439482869/sagemaker/us-west-2/offli

In [28]:
##Check status of the feature group
def check_feature_group_status(feature_group):
    status = feature_group.describe().get("FeatureGroupStatus")
    while status == "Creating":
        print("Waiting for Feature Group to be Created")
        time.sleep(5)
        status = feature_group.describe().get("FeatureGroupStatus")
    print(f"FeatureGroup {feature_group.name} successfully created.")

In [29]:
##Wait till the feature group is ready
check_feature_group_status(location_feature_group_offline)

FeatureGroup location-feature-group-offline-07-15-47-35 successfully created.


In [33]:
#Ingest features into the feature group
location_feature_group_offline.ingest(
    data_frame=location_df, max_workers=3, wait=True
)

IngestionManagerPandas(feature_group_name='location-feature-group-offline-07-15-47-35', sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7efc0fc6b198>, max_workers=3, max_processes=1, _async_result=<multiprocess.pool.MapResult object at 0x7efc0c5c7d68>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

### 3.2 Use PutRecord API

In [34]:
##Create a record to ingest into the feature group
##This ingests features into the online store.
record = []

event_time_feature = {'FeatureName': 'EventTime','ValueAsString': str(int(round(time.time())))}
location_feature =   {'FeatureName': 'location','ValueAsString': str('250')}
ismobile_feature =   {'FeatureName': 'ismobile','ValueAsString': str('1')}
city_feature =      {'FeatureName': 'city','ValueAsString': str('12')}
sourcename_feature =      {'FeatureName': 'sourcename','ValueAsString': str('2.0')}
sourcetype_feature =      {'FeatureName': 'sourcetype','ValueAsString': str('2.0')}

record.append(event_time_feature)
record.append(location_feature)
record.append(ismobile_feature)
record.append(city_feature)
record.append(sourcename_feature)
record.append(sourcetype_feature)

response = sagemaker_fs_runtime_client.put_record(FeatureGroupName=location_feature_group_name_offline_online, 
                                                  Record=record)

response

{'ResponseMetadata': {'RequestId': 'ae90f64b-8ee2-4229-a19e-56615cd8e970',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'ae90f64b-8ee2-4229-a19e-56615cd8e970',
   'content-type': 'application/json',
   'content-length': '0',
   'date': 'Sat, 07 Aug 2021 17:05:37 GMT'},
  'RetryAttempts': 0}}

### 4. Retrieving featues from feature group

In [35]:
#Use get_record from the online store
record_identifier_value = str('250')
response = sagemaker_fs_runtime_client.get_record(FeatureGroupName=location_feature_group_name_offline_online, 
                                       RecordIdentifierValueAsString=record_identifier_value)
response

{'ResponseMetadata': {'RequestId': '7361fb32-2a76-492e-badc-1613992cfce5',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '7361fb32-2a76-492e-badc-1613992cfce5',
   'content-type': 'application/json',
   'content-length': '311',
   'date': 'Sat, 07 Aug 2021 17:05:42 GMT'},
  'RetryAttempts': 0},
 'Record': [{'FeatureName': 'location', 'ValueAsString': '250'},
  {'FeatureName': 'city', 'ValueAsString': '12'},
  {'FeatureName': 'ismobile', 'ValueAsString': '1'},
  {'FeatureName': 'sourcename', 'ValueAsString': '2.0'},
  {'FeatureName': 'sourcetype', 'ValueAsString': '2.0'},
  {'FeatureName': 'EventTime', 'ValueAsString': '1628355938'}]}

In [36]:
#Use batch-get_record
record_identifier_values = ["200", "250", "300"]
response=sagemaker_fs_runtime_client.batch_get_record(
    Identifiers=[
        {"FeatureGroupName": location_feature_group_name_offline_online, "RecordIdentifiersValueAsString": record_identifier_values}
    ]
)
response

{'ResponseMetadata': {'RequestId': '7648fabf-7b68-4a9b-bc5c-327f01a7c9e6',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '7648fabf-7b68-4a9b-bc5c-327f01a7c9e6',
   'content-type': 'application/json',
   'content-length': '474',
   'date': 'Sat, 07 Aug 2021 17:05:42 GMT'},
  'RetryAttempts': 0},
 'Records': [{'FeatureGroupName': 'location-feature-group-offline-online-07-15-47-35',
   'RecordIdentifierValueAsString': '250',
   'Record': [{'FeatureName': 'location', 'ValueAsString': '250'},
    {'FeatureName': 'city', 'ValueAsString': '12'},
    {'FeatureName': 'ismobile', 'ValueAsString': '1'},
    {'FeatureName': 'sourcename', 'ValueAsString': '2.0'},
    {'FeatureName': 'sourcetype', 'ValueAsString': '2.0'},
    {'FeatureName': 'EventTime', 'ValueAsString': '1628355938'}]}],
 'Errors': [],
 'UnprocessedIdentifiers': []}

### 4. Delete the feature groups

Uncomment the below to delete the feature groups created

In [37]:
sagemaker_client.delete_feature_group(FeatureGroupName=location_feature_group_name_offline)

{'ResponseMetadata': {'RequestId': '79e1cc2f-96f4-4b91-92f3-a4807e002e86',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '79e1cc2f-96f4-4b91-92f3-a4807e002e86',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 07 Aug 2021 17:05:47 GMT'},
  'RetryAttempts': 0}}

In [38]:
sagemaker_client.delete_feature_group(FeatureGroupName=location_feature_group_name_offline_online)

{'ResponseMetadata': {'RequestId': '15968ca3-636d-4c91-859e-b1fed47dcfa2',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '15968ca3-636d-4c91-859e-b1fed47dcfa2',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 07 Aug 2021 17:05:48 GMT'},
  'RetryAttempts': 1}}

In [39]:
sagemaker_client.delete_feature_group(FeatureGroupName=location_feature_group_name_online)

{'ResponseMetadata': {'RequestId': '67a680b6-4d24-4e96-bb7e-9fb8d294dd79',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '67a680b6-4d24-4e96-bb7e-9fb8d294dd79',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 07 Aug 2021 17:05:49 GMT'},
  'RetryAttempts': 1}}