In [34]:
import boto3
import sagemaker

from sagemaker.session import Session
from sagemaker.feature_store.feature_group import FeatureGroup

In [35]:
region = boto3.Session().region_name
session = boto3.Session(region_name=region)

client = session.client(
    service_name='sagemaker', 
    region_name=region
)
runtime = session.client(
    service_name='sagemaker-featurestore-runtime', 
    region_name=region
)

feature_store_session = Session(
    boto_session=session,
    sagemaker_client=client,
    sagemaker_featurestore_runtime_client=runtime
)

In [36]:
%store -r feature_group_name

feature_group = FeatureGroup(
    name=feature_group_name, 
    sagemaker_session=feature_store_session
)

In [37]:
feature_group.describe()

{'FeatureGroupArn': 'arn:aws:sagemaker:us-east-1:581320662326:feature-group/cookbook-feature-group',
 'FeatureGroupName': 'cookbook-feature-group',
 'RecordIdentifierFeatureName': 'index',
 'EventTimeFeatureName': 'event_time',
 'FeatureDefinitions': [{'FeatureName': 'label', 'FeatureType': 'Integral'},
  {'FeatureName': 'a', 'FeatureType': 'Fractional'},
  {'FeatureName': 'b', 'FeatureType': 'Fractional'},
  {'FeatureName': 'c', 'FeatureType': 'Integral'},
  {'FeatureName': 'd', 'FeatureType': 'Integral'},
  {'FeatureName': 'index', 'FeatureType': 'Integral'},
  {'FeatureName': 'event_time', 'FeatureType': 'Fractional'}],
 'CreationTime': datetime.datetime(2021, 5, 24, 13, 18, 9, 536000, tzinfo=tzlocal()),
 'OnlineStoreConfig': {'EnableOnlineStore': True},
 'OfflineStoreConfig': {'S3StorageConfig': {'S3Uri': 's3://sagemaker-cookbook-bucket/chapter07/input',
   'ResolvedOutputS3Uri': 's3://sagemaker-cookbook-bucket/chapter07/input/581320662326/sagemaker/us-east-1/offline-store/cookbook

In [38]:
table = feature_group.athena_query().table_name
table

'cookbook-feature-group-1621862289'

In [39]:
describe_response = feature_group.describe()
offline_config = describe_response['OfflineStoreConfig']
s3_uri = offline_config['S3StorageConfig']['S3Uri']
s3_uri

's3://sagemaker-cookbook-bucket/chapter07/input'

In [40]:
%store -r s3_bucket_name
%store -r prefix

In [41]:
base = f's3://{s3_bucket_name}/{prefix}'
output_location = f'{base}/query_results/'

print(output_location)

s3://sagemaker-cookbook-bucket/chapter07/query_results/


In [42]:
def query_data(query_string):
    print(f"QUERY: {query_string}\n")
    query = feature_group.athena_query()
    query.run(query_string=query_string, 
              output_location=output_location)
    
    query.wait()
    
    return query.as_dataframe()

In [43]:
query = f"""SELECT label, a, b, c, d FROM "{table}" ORDER BY index ASC LIMIT 3000"""

training_df = query_data(query)
training_df

QUERY: SELECT label, a, b, c, d FROM "cookbook-feature-group-1621862289" ORDER BY index ASC LIMIT 3000



Unnamed: 0,label,a,b,c,d
0,1,-8.837413,-6.551265,23,-75
1,1,-9.216749,-2.483494,2,-51
2,1,-2.017317,-6.326533,91,34
3,1,-10.748736,-4.622519,8,-78
4,0,-3.675848,12.629029,47,32
...,...,...,...,...,...
2995,0,-5.786462,-6.790668,-65,70
2996,1,-2.552410,-1.793217,42,4
2997,0,-10.692197,1.583437,-90,-62
2998,1,-14.109003,-4.745680,37,64


In [44]:
query = f"""SELECT label, a, b, c, d FROM "{table}" WHERE index > 3000 ORDER BY index ASC LIMIT 1000"""

validation_df = query_data(query)
validation_df

QUERY: SELECT label, a, b, c, d FROM "cookbook-feature-group-1621862289" WHERE index > 3000 ORDER BY index ASC LIMIT 1000



Unnamed: 0,label,a,b,c,d
0,0,-7.328588,-3.077598,54,-9
1,1,-10.128341,-3.126609,-96,81
2,1,-7.863698,-8.622445,96,-71
3,1,-5.904219,-3.370329,15,-56
4,0,-3.451493,-2.471520,-10,-66
...,...,...,...,...,...
995,1,-7.286751,-6.346180,-51,-21
996,0,-8.717633,18.595339,32,17
997,0,-7.991978,-6.598361,36,14
998,0,-23.082488,1.267317,-43,-89


In [45]:
query = f"""SELECT label, a, b, c, d FROM "{table}" WHERE index > 4000 ORDER BY index ASC LIMIT 1000 """

test_df = query_data(query)
test_df

QUERY: SELECT label, a, b, c, d FROM "cookbook-feature-group-1621862289" WHERE index > 4000 ORDER BY index ASC LIMIT 1000 



Unnamed: 0,label,a,b,c,d
0,0,-10.488853,0.632800,-79,5
1,0,8.455742,4.183267,-7,-83
2,0,-15.466566,-0.372287,19,39
3,1,-3.134794,-4.258036,9,39
4,1,-8.362027,-9.802120,23,75
...,...,...,...,...,...
995,0,0.659784,1.414005,50,-36
996,0,-12.388167,-3.860623,2,-49
997,1,-4.294561,-8.472893,-41,-14
998,1,-6.044883,-5.261760,-49,-97


In [46]:
!mkdir -p tmp

In [47]:
training_df.to_csv('tmp/training_data.csv', 
                   header=True, 
                   index=False)
validation_df.to_csv('tmp/validation_data.csv', 
                     header=True, 
                     index=False)
test_df.to_csv('tmp/test_data.csv', 
               header=True, 
               index=False)

In [54]:
path = f"s3://{s3_bucket_name}/{prefix}"
training_data_path = f"{path}/input/training_data.csv"
validation_data_path = f"{path}/input/validation_data.csv"
test_data_path = f"{path}/input/test_data.csv"

In [55]:
!aws s3 cp tmp/training_data.csv {training_data_path}
!aws s3 cp tmp/validation_data.csv {validation_data_path}
!aws s3 cp tmp/test_data.csv {test_data_path}

upload: tmp/training_data.csv to s3://sagemaker-cookbook-bucket/chapter07/input/training_data.csv
upload: tmp/validation_data.csv to s3://sagemaker-cookbook-bucket/chapter07/input/validation_data.csv
upload: tmp/test_data.csv to s3://sagemaker-cookbook-bucket/chapter07/input/test_data.csv


In [50]:
training_df.to_csv('tmp/training_data_no_header.csv', 
                   header=False, 
                   index=False)
validation_df.to_csv('tmp/validation_data_no_header.csv', 
                     header=False, 
                     index=False)
test_df.to_csv('tmp/test_data_no_header.csv', 
               header=False, 
               index=False)

In [57]:
training_data_path_nh = f"{path}/input/training_data_no_header.csv"
validation_data_path_nh = f"{path}/input/validation_data_no_header.csv"
test_data_path_nh = f"{path}/input/test_data_no_header.csv"

In [58]:
!aws s3 cp tmp/training_data_no_header.csv {training_data_path_nh}
!aws s3 cp tmp/validation_data_no_header.csv {validation_data_path_nh}
!aws s3 cp tmp/test_data_no_header.csv {test_data_path_nh}

upload: tmp/training_data_no_header.csv to s3://sagemaker-cookbook-bucket/chapter07/input/training_data_no_header.csv
upload: tmp/validation_data_no_header.csv to s3://sagemaker-cookbook-bucket/chapter07/input/validation_data_no_header.csv
upload: tmp/test_data_no_header.csv to s3://sagemaker-cookbook-bucket/chapter07/input/test_data_no_header.csv


In [53]:
%store training_data_path
%store validation_data_path
%store test_data_path
%store training_data_path_nh
%store validation_data_path_nh
%store test_data_path_nh

Stored 'training_data_path' (str)
Stored 'validation_data_path' (str)
Stored 'test_data_path' (str)
Stored 'training_data_path_nh' (str)
Stored 'validation_data_path_nh' (str)
Stored 'test_data_path_nh' (str)
