In [26]:
import boto3
import sagemaker

from sagemaker.session import Session
from sagemaker.feature_store.feature_group import FeatureGroup

In [27]:
region = boto3.Session().region_name
session = boto3.Session(region_name=region)

client = session.client(
    service_name='sagemaker', 
    region_name=region
)
runtime = session.client(
    service_name='sagemaker-featurestore-runtime', 
    region_name=region
)

feature_store_session = Session(
    boto_session=session,
    sagemaker_client=client,
    sagemaker_featurestore_runtime_client=runtime
)

In [28]:
%store -r feature_group_name

feature_group = FeatureGroup(
    name=feature_group_name, 
    sagemaker_session=feature_store_session
)

In [29]:
feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:581320662326:feature-group/cookbook-feature-group',
 'FeatureGroupName': 'cookbook-feature-group',
 'RecordIdentifierFeatureName': 'index',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'label', 'FeatureType': 'Integral'},
  {'FeatureName': 'a', 'FeatureType': 'Fractional'},
  {'FeatureName': 'b', 'FeatureType': 'Fractional'},
  {'FeatureName': 'c', 'FeatureType': 'Integral'},
  {'FeatureName': 'd', 'FeatureType': 'Integral'},
  {'FeatureName': 'index', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2021, 5, 29, 14, 11, 57, 610000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://sagemaker-cookbook-bucket/chapter07/input',
   'ResolvedOutputS3Uri': 's3://sagemaker-cookbook-bucket/chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookboo

In [30]:
table = feature_group.athena_query().table_name
table

'cookbook-feature-group-1622297517'

In [31]:
describe_response = feature_group.describe()
offline_config = describe_response['OfflineStoreConfig']
s3_uri = offline_config['S3StorageConfig']['S3Uri']
s3_uri

's3://sagemaker-cookbook-bucket/chapter07/input'

In [32]:
!aws s3 ls {s3_uri} --recursive

2021-05-24 13:19:31          0 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/cookbook-feature-group2021-05-24T13:18:09.536Z.txt
2021-05-24 13:24:04      39388 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/data/year=2021/month=05/day=24/hour=13/20210524T131737Z_e6ORP4xrgB871rdp.parquet
2021-05-24 13:24:04      36764 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/data/year=2021/month=05/day=24/hour=13/20210524T131737Z_iEvmDP797Fbl6RAm.parquet
2021-05-24 13:24:04      39184 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/data/year=2021/month=05/day=24/hour=13/20210524T131737Z_vYGXQWqeKRLH7OJb.parquet
2021-05-24 13:24:04      38418 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/data/year=2021/month=05/day=24/hour=13/20210524T131737Z_xfb8J2vUWH

In [33]:
%store -r s3_bucket_name
%store -r prefix

In [34]:
base = f's3://{s3_bucket_name}/{prefix}'
output_location = f'{base}/query_results/'

print(output_location)

s3://sagemaker-cookbook-bucket/chapter07/query_results/


In [35]:
def query_data(query_string):
    print(f"QUERY: {query_string}\n")
    query = feature_group.athena_query()
    query.run(query_string=query_string, 
              output_location=output_location)
    
    query.wait()
    
    return query.as_dataframe()

In [36]:
query = f"""SELECT label, a, b, c, d FROM "{table}" ORDER BY index ASC LIMIT 3000"""

training_df = query_data(query)
training_df

QUERY: SELECT label, a, b, c, d FROM "cookbook-feature-group-1622297517" ORDER BY index ASC LIMIT 3000



Unnamed: 0,label,a,b,c,d
0,1,-8.837413,-6.551265,50,88
1,1,-9.216749,-2.483494,77,-3
2,1,-2.017317,-6.326533,1,-50
3,1,-10.748736,-4.622519,23,59
4,0,-3.675848,12.629029,-20,-37
...,...,...,...,...,...
2995,0,-5.786462,-6.790668,-81,-45
2996,1,-2.552410,-1.793217,47,8
2997,0,-10.692197,1.583437,23,22
2998,1,-14.109003,-4.745680,89,5


In [37]:
query = f"""SELECT label, a, b, c, d FROM "{table}" WHERE index > 3000 ORDER BY index ASC LIMIT 1000"""

validation_df = query_data(query)
validation_df

QUERY: SELECT label, a, b, c, d FROM "cookbook-feature-group-1622297517" WHERE index > 3000 ORDER BY index ASC LIMIT 1000



Unnamed: 0,label,a,b,c,d
0,0,-7.328588,-3.077598,-42,38
1,1,-10.128341,-3.126609,-89,-83
2,1,-7.863698,-8.622445,73,-88
3,1,-5.904219,-3.370329,1,50
4,0,-3.451493,-2.471520,-80,95
...,...,...,...,...,...
995,1,-7.286751,-6.346180,-7,80
996,0,-8.717633,18.595339,-80,-60
997,0,-7.991978,-6.598361,-9,-43
998,0,-23.082488,1.267317,53,84


In [38]:
query = f"""SELECT label, a, b, c, d FROM "{table}" WHERE index > 4000 ORDER BY index ASC LIMIT 1000 """

test_df = query_data(query)
test_df

QUERY: SELECT label, a, b, c, d FROM "cookbook-feature-group-1622297517" WHERE index > 4000 ORDER BY index ASC LIMIT 1000 



Unnamed: 0,label,a,b,c,d
0,0,-10.488853,0.632800,-69,-33
1,0,8.455742,4.183267,96,-26
2,0,-15.466566,-0.372287,-1,-11
3,1,-3.134794,-4.258036,-55,88
4,1,-8.362027,-9.802120,12,-14
...,...,...,...,...,...
995,0,0.659784,1.414005,49,95
996,0,-12.388167,-3.860623,-5,49
997,1,-4.294561,-8.472893,-90,-42
998,1,-6.044883,-5.261760,64,69


In [39]:
!mkdir -p tmp

In [40]:
training_df.to_csv('tmp/training_data.csv', 
                   header=True, 
                   index=False)
validation_df.to_csv('tmp/validation_data.csv', 
                     header=True, 
                     index=False)
test_df.to_csv('tmp/test_data.csv', 
               header=True, 
               index=False)

In [41]:
path = f"s3://{s3_bucket_name}/{prefix}"
training_data_path = f"{path}/input/training_data.csv"
validation_data_path = f"{path}/input/validation_data.csv"
test_data_path = f"{path}/input/test_data.csv"

In [42]:
!aws s3 cp tmp/training_data.csv {training_data_path}
!aws s3 cp tmp/validation_data.csv {validation_data_path}
!aws s3 cp tmp/test_data.csv {test_data_path}

upload: tmp/training_data.csv to s3://sagemaker-cookbook-bucket/chapter07/input/training_data.csv
upload: tmp/validation_data.csv to s3://sagemaker-cookbook-bucket/chapter07/input/validation_data.csv
upload: tmp/test_data.csv to s3://sagemaker-cookbook-bucket/chapter07/input/test_data.csv


In [43]:
training_df.to_csv('tmp/training_data_no_header.csv', 
                   header=False, 
                   index=False)
validation_df.to_csv('tmp/validation_data_no_header.csv', 
                     header=False, 
                     index=False)
test_df.to_csv('tmp/test_data_no_header.csv', 
               header=False, 
               index=False)

In [44]:
training_data_path_nh = f"{path}/input/training_data_no_header.csv"
validation_data_path_nh = f"{path}/input/validation_data_no_header.csv"
test_data_path_nh = f"{path}/input/test_data_no_header.csv"

In [45]:
!aws s3 cp tmp/training_data_no_header.csv {training_data_path_nh}
!aws s3 cp tmp/validation_data_no_header.csv {validation_data_path_nh}
!aws s3 cp tmp/test_data_no_header.csv {test_data_path_nh}

upload: tmp/training_data_no_header.csv to s3://sagemaker-cookbook-bucket/chapter07/input/training_data_no_header.csv
upload: tmp/validation_data_no_header.csv to s3://sagemaker-cookbook-bucket/chapter07/input/validation_data_no_header.csv
upload: tmp/test_data_no_header.csv to s3://sagemaker-cookbook-bucket/chapter07/input/test_data_no_header.csv


In [46]:
%store training_data_path
%store validation_data_path
%store test_data_path
%store training_data_path_nh
%store validation_data_path_nh
%store test_data_path_nh

Stored 'training_data_path' (str)
Stored 'validation_data_path' (str)
Stored 'test_data_path' (str)
Stored 'training_data_path_nh' (str)
Stored 'validation_data_path_nh' (str)
Stored 'test_data_path_nh' (str)
