In [28]:
import boto3
import sagemaker

from sagemaker.session import Session
from sagemaker.feature_store.feature_group import FeatureGroup

In [29]:
region = boto3.Session().region_name
session = boto3.Session(region_name=region)

client = session.client(
    service_name='sagemaker', 
    region_name=region
)
runtime = session.client(
    service_name='sagemaker-featurestore-runtime', 
    region_name=region
)

feature_store_session = Session(
    boto_session=session,
    sagemaker_client=client,
    sagemaker_featurestore_runtime_client=runtime
)

In [30]:
%store -r feature_group_name

feature_group = FeatureGroup(
    name=feature_group_name, 
    sagemaker_session=feature_store_session
)

In [31]:
feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:581320662326:feature-group/cookbook-feature-group',
 'FeatureGroupName': 'cookbook-feature-group',
 'RecordIdentifierFeatureName': 'index',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'approved', 'FeatureType': 'Integral'},
  {'FeatureName': 'sex', 'FeatureType': 'Integral'},
  {'FeatureName': 'math', 'FeatureType': 'Integral'},
  {'FeatureName': 'science', 'FeatureType': 'Integral'},
  {'FeatureName': 'technology', 'FeatureType': 'Integral'},
  {'FeatureName': 'random1', 'FeatureType': 'Integral'},
  {'FeatureName': 'random2', 'FeatureType': 'Integral'},
  {'FeatureName': 'index', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2021, 6, 13, 17, 15, 42, 30000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://sagemaker-cookbook-bucket/chapter07/inpu

In [32]:
table = feature_group.athena_query().table_name
table

'cookbook-feature-group-1623604542'

In [33]:
describe_response = feature_group.describe()
offline_config = describe_response['OfflineStoreConfig']
s3_uri = offline_config['S3StorageConfig']['S3Uri']
s3_uri

's3://sagemaker-cookbook-bucket/chapter07/input'

In [34]:
!aws s3 ls {s3_uri} --recursive

2021-05-24 13:19:31          0 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/cookbook-feature-group2021-05-24T13:18:09.536Z.txt
2021-05-24 13:24:04      39388 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/data/year=2021/month=05/day=24/hour=13/20210524T131737Z_e6ORP4xrgB871rdp.parquet
2021-05-24 13:24:04      36764 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/data/year=2021/month=05/day=24/hour=13/20210524T131737Z_iEvmDP797Fbl6RAm.parquet
2021-05-24 13:24:04      39184 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/data/year=2021/month=05/day=24/hour=13/20210524T131737Z_vYGXQWqeKRLH7OJb.parquet
2021-05-24 13:24:04      38418 chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook-feature-group-1621862289/data/year=2021/month=05/day=24/hour=13/20210524T131737Z_xfb8J2vUWH

In [35]:
%store -r s3_bucket_name
%store -r prefix

In [36]:
base = f's3://{s3_bucket_name}/{prefix}'
output_location = f'{base}/query_results/'

print(output_location)

s3://sagemaker-cookbook-bucket/chapter07/query_results/


In [37]:
def query_data(query_string):
    print(f"QUERY: {query_string}\n")
    query = feature_group.athena_query()
    query.run(query_string=query_string, 
              output_location=output_location)
    
    query.wait()
    
    return query.as_dataframe()

In [38]:
from time import sleep

# wait for 5 minutes for the offline store to be ready
sleep(5 * 60)

In [39]:
query = f"""SELECT approved, sex, math, science, technology, random1, random2 FROM "{table}" ORDER BY index ASC LIMIT 600"""

training_df = query_data(query)
training_df

QUERY: SELECT approved, sex, math, science, technology, random1, random2 FROM "cookbook-feature-group-1623604542" ORDER BY index ASC LIMIT 600



Unnamed: 0,approved,sex,math,science,technology,random1,random2
0,1,1,97,97,98,93,82
1,1,1,85,68,62,92,65
2,1,1,99,100,80,71,60
3,1,1,91,79,84,60,70
4,1,1,73,86,66,70,98
...,...,...,...,...,...,...,...
595,1,1,99,86,85,98,87
596,1,1,71,97,90,86,99
597,1,1,95,86,62,69,73
598,1,1,78,71,68,72,68


In [40]:
query = f"""SELECT approved, sex, math, science, technology, random1, random2 FROM "{table}" WHERE index > 600 ORDER BY index ASC LIMIT 200"""

validation_df = query_data(query)
validation_df

QUERY: SELECT approved, sex, math, science, technology, random1, random2 FROM "cookbook-feature-group-1623604542" WHERE index > 600 ORDER BY index ASC LIMIT 200



Unnamed: 0,approved,sex,math,science,technology,random1,random2
0,1,1,93,74,66,73,77
1,1,0,92,91,76,79,69
2,0,1,70,85,84,96,65
3,1,1,95,77,91,67,77
4,1,1,88,92,86,69,72
...,...,...,...,...,...,...,...
195,1,1,63,94,91,94,94
196,1,1,99,83,99,94,69
197,1,0,84,77,99,90,77
198,1,1,87,64,75,80,95


In [41]:
query = f"""SELECT approved, sex, math, science, technology, random1, random2 FROM "{table}" WHERE index > 800 ORDER BY index ASC LIMIT 200 """

test_df = query_data(query)
test_df

QUERY: SELECT approved, sex, math, science, technology, random1, random2 FROM "cookbook-feature-group-1623604542" WHERE index > 800 ORDER BY index ASC LIMIT 200 



Unnamed: 0,approved,sex,math,science,technology,random1,random2
0,1,1,64,65,73,85,66
1,1,0,90,91,99,72,73
2,1,1,88,90,68,90,100
3,1,1,64,96,73,60,70
4,1,1,64,85,98,85,96
...,...,...,...,...,...,...,...
195,1,0,99,62,92,71,75
196,1,0,85,74,91,69,63
197,1,1,72,99,86,61,65
198,1,1,79,89,79,98,80


In [42]:
!mkdir -p tmp

In [43]:
training_df.to_csv('tmp/training_data.csv', 
                   header=True, 
                   index=False)
validation_df.to_csv('tmp/validation_data.csv', 
                     header=True, 
                     index=False)
test_df.to_csv('tmp/test_data.csv', 
               header=True, 
               index=False)

In [44]:
path = f"s3://{s3_bucket_name}/{prefix}"
training_data_path = f"{path}/input/training_data.csv"
validation_data_path = f"{path}/input/validation_data.csv"
test_data_path = f"{path}/input/test_data.csv"

In [45]:
!aws s3 cp tmp/training_data.csv {training_data_path}
!aws s3 cp tmp/validation_data.csv {validation_data_path}
!aws s3 cp tmp/test_data.csv {test_data_path}

upload: tmp/training_data.csv to s3://sagemaker-cookbook-bucket/chapter07/input/training_data.csv
upload: tmp/validation_data.csv to s3://sagemaker-cookbook-bucket/chapter07/input/validation_data.csv
upload: tmp/test_data.csv to s3://sagemaker-cookbook-bucket/chapter07/input/test_data.csv


In [46]:
training_df.to_csv('tmp/training_data_no_header.csv', 
                   header=False, 
                   index=False)
validation_df.to_csv('tmp/validation_data_no_header.csv', 
                     header=False, 
                     index=False)
test_df.to_csv('tmp/test_data_no_header.csv', 
               header=False, 
               index=False)

In [47]:
training_data_path_nh = f"{path}/input/training_data_no_header.csv"
validation_data_path_nh = f"{path}/input/validation_data_no_header.csv"
test_data_path_nh = f"{path}/input/test_data_no_header.csv"

In [48]:
!aws s3 cp tmp/training_data_no_header.csv {training_data_path_nh}
!aws s3 cp tmp/validation_data_no_header.csv {validation_data_path_nh}
!aws s3 cp tmp/test_data_no_header.csv {test_data_path_nh}

upload: tmp/training_data_no_header.csv to s3://sagemaker-cookbook-bucket/chapter07/input/training_data_no_header.csv
upload: tmp/validation_data_no_header.csv to s3://sagemaker-cookbook-bucket/chapter07/input/validation_data_no_header.csv
upload: tmp/test_data_no_header.csv to s3://sagemaker-cookbook-bucket/chapter07/input/test_data_no_header.csv


In [49]:
%store training_data_path
%store validation_data_path
%store test_data_path
%store training_data_path_nh
%store validation_data_path_nh
%store test_data_path_nh

Stored 'training_data_path' (str)
Stored 'validation_data_path' (str)
Stored 'test_data_path' (str)
Stored 'training_data_path_nh' (str)
Stored 'validation_data_path_nh' (str)
Stored 'test_data_path_nh' (str)
