# Generating a synthetic dataset and using the SageMaker Feature Store for storage and management

<img align="left" width="130" src="https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Extra/cover-small-padded.png"/>

This notebook contains the code to help readers work through one of the recipes of the book [Machine Learning with Amazon SageMaker Cookbook: 80 proven recipes for data scientists and developers to perform ML experiments and deployments](https://www.amazon.com/Machine-Learning-Amazon-SageMaker-Cookbook/dp/1800567030)

### How to do it...

In [None]:
%load https://raw.githubusercontent.com/PacktPublishing/Amazon-SageMaker-Cookbook/master/Chapter07/scripts/generator.py

In [None]:
all_df

In [None]:
import boto3
import sagemaker
from sagemaker.session import Session

In [None]:
region = boto3.Session().region_name
session = boto3.Session(region_name=region)

client = session.client(
    service_name='sagemaker', 
    region_name=region
)

runtime = session.client(
    service_name='sagemaker-featurestore-runtime', 
    region_name=region
)

feature_store_session = Session(
    boto_session=session,
    sagemaker_client=client,
    sagemaker_featurestore_runtime_client=runtime
)

In [None]:
s3_bucket_name = "<insert S3 bucket name here>"
prefix = "chapter07"

from sagemaker import get_execution_role
role = get_execution_role()

In [None]:
s3_client = boto3.client('s3', region_name=region)

In [None]:
feature_group_name = 'cookbook-feature-group'

In [None]:
from sagemaker.feature_store.feature_group import FeatureGroup

feature_group = FeatureGroup(
    name=feature_group_name, 
    sagemaker_session=feature_store_session
)

In [None]:
try:
    feature_group.delete()
    sleep(30)
except:
    print("Feature group does not exist")
    pass

In [None]:
%%time

feature_group.load_feature_definitions(data_frame=all_df)
sleep(1)

In [None]:
feature_group.create(
    s3_uri=f"s3://{s3_bucket_name}/{prefix}/input",
    record_identifier_name="index",
    event_time_feature_name="event_time",
    role_arn=role,
    enable_online_store=True
)

sleep(60)

In [None]:
feature_group.describe()

In [None]:
feature_group.describe().get("FeatureGroupStatus")

In [None]:
client.list_feature_groups()

In [None]:
all_df.dtypes

In [None]:
%%time

feature_group.ingest(
    data_frame=all_df, max_workers=3, wait=True
)

In [None]:
runtime.get_record(
    FeatureGroupName=feature_group.name, 
    RecordIdentifierValueAsString="300"
)

In [None]:
%store feature_group_name
%store s3_bucket_name
%store prefix