In [6]:
import json
import uuid
import boto3
import sagemaker
import pandas as pd

from datetime import datetime
from sagemaker.feature_store.feature_group import FeatureGroup, FeatureDefinition, FeatureTypeEnum

In [9]:
s3 = boto3.client('s3')
session = sagemaker.Session()
feature_store_client = session.boto_session.client('sagemaker-featurestore-runtime')
sagemaker_client = session.boto_session.client('sagemaker')
bucket_name = 'wildfires'
feature_group_name = 'fire-image-feature-group'
sm_role = sagemaker.get_execution_role()

In [None]:
# Create the feature group
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=session,
                             feature_definitions=feature_definitions)

feature_group.create(
    s3_uri=f's3://{bucket_name}/feature-store/',
    record_identifier_name='image_id',
    event_time_feature_name='event_time',
    role_arn=sm_role
)

In [10]:
def wait_for_group_created(feature_group_name):
    while True:
        response = sagemaker_client.describe_feature_group(FeatureGroupName=feature_group_name)
        status = response['FeatureGroupStatus']
        if status == 'Created':
            print('Feature group created successfully.')
            break
        elif status == 'CreateFailed':
            raise Exception(f'Failed to create feature group: {response["FailureReason"]}')
        else:
            print('Waiting for feature group to be created...')
            time.sleep(5)

In [11]:
wait_for_group_created(feature_group_name)

Feature group created successfully.


In [12]:
def list_s3_objects(prefix):
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
    return [obj['Key'] for obj in response.get('Contents', [])]

fire_images = list_s3_objects('fire_images/')
non_fire_images = list_s3_objects('non_fire_images/')
fire_images[:10]

['fire_images/fire.1.png',
 'fire_images/fire.10.png',
 'fire_images/fire.100.png',
 'fire_images/fire.101.png',
 'fire_images/fire.102.png',
 'fire_images/fire.103.png',
 'fire_images/fire.104.png',
 'fire_images/fire.105.png',
 'fire_images/fire.106.png',
 'fire_images/fire.107.png']

In [13]:
def generate_metadata(image_list, label):
    metadata = []
    for image_location in image_list:
        image_id = str(uuid.uuid4())
        image_type = image_location.split('.')[-1]
        metadata.append({
            'image_id': image_id,
            'image_location': f's3://{bucket_name}/{image_location}',
            'label': label,
            'image_type': image_type,
            'event_time': datetime.utcnow().isoformat() + 'Z'
        })
    return metadata

def lambda_handler(event, context):
    fire_images = list_s3_objects('fire_images/')
    non_fire_images = list_s3_objects('non_fire_images/')
    
    fire_metadata = generate_metadata(fire_images, 1)
    non_fire_metadata = generate_metadata(non_fire_images, 0)
    all_metadata = fire_metadata + non_fire_metadata
    
    # Send metadata to Feature Store
    feature_store_client = boto3.client('sagemaker-featurestore-runtime')
    
    for record in all_metadata:
        feature_store_client.put_record(
            FeatureGroupName='image-feature-group',
            Record=[
                {'FeatureName': key, 'ValueAsString': str(value)}
                for key, value in record.items()
            ]
        )
    
    return {
        'statusCode': 200,
        'body': json.dumps('Successfully ingested data into Feature Store')
    }


In [14]:
# List images
fire_images = list_s3_objects('fire_images/')
non_fire_images = list_s3_objects('non_fire_images/')

# Generate metadata
fire_metadata = generate_metadata(fire_images, 1)
non_fire_metadata = generate_metadata(non_fire_images, 0)
all_metadata = fire_metadata + non_fire_metadata


# Convert metadata to DataFrame
def convert_to_df(metadata):
    return pd.DataFrame(metadata)


df = convert_to_df(all_metadata)

# Define feature definitions
feature_definitions = [
    FeatureDefinition('image_id', FeatureTypeEnum.STRING),
    FeatureDefinition('image_location', FeatureTypeEnum.STRING),
    FeatureDefinition('label', FeatureTypeEnum.INTEGRAL),
    FeatureDefinition('image_type', FeatureTypeEnum.STRING),
    FeatureDefinition('event_time', FeatureTypeEnum.STRING)
]

In [15]:
# Ingest data into the feature group
feature_group.ingest(data_frame=df, max_workers=3, wait=True)

IngestionManagerPandas(feature_group_name='fire-image-feature-group', feature_definitions={'image_id': {'FeatureName': 'image_id', 'FeatureType': 'String'}, 'image_location': {'FeatureName': 'image_location', 'FeatureType': 'String'}, 'label': {'FeatureName': 'label', 'FeatureType': 'Integral'}, 'image_type': {'FeatureName': 'image_type', 'FeatureType': 'String'}, 'event_time': {'FeatureName': 'event_time', 'FeatureType': 'String'}}, sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f99311f25c0>, sagemaker_session=<sagemaker.session.Session object at 0x7f99314f8100>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f99313f1f60>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])

In [6]:
import json
import uuid
import boto3
import sagemaker
import pandas as pd

from datetime import datetime
from sagemaker.feature_store.feature_group import FeatureGroup, FeatureDefinition, FeatureTypeEnum

In [9]:
s3 = boto3.client('s3')
session = sagemaker.Session()
feature_store_client = session.boto_session.client('sagemaker-featurestore-runtime')
sagemaker_client = session.boto_session.client('sagemaker')
bucket_name = 'wildfires'
feature_group_name = 'fire-image-feature-group'
sm_role = sagemaker.get_execution_role()

In [None]:
# Create the feature group
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=session,
                             feature_definitions=feature_definitions)

feature_group.create(
    s3_uri=f's3://{bucket_name}/feature-store/',
    record_identifier_name='image_id',
    event_time_feature_name='event_time',
    role_arn=sm_role
)

In [10]:
def wait_for_group_created(feature_group_name):
    while True:
        response = sagemaker_client.describe_feature_group(FeatureGroupName=feature_group_name)
        status = response['FeatureGroupStatus']
        if status == 'Created':
            print('Feature group created successfully.')
            break
        elif status == 'CreateFailed':
            raise Exception(f'Failed to create feature group: {response["FailureReason"]}')
        else:
            print('Waiting for feature group to be created...')
            time.sleep(5)

In [11]:
wait_for_group_created(feature_group_name)

Feature group created successfully.


In [12]:
def list_s3_objects(prefix):
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
    return [obj['Key'] for obj in response.get('Contents', [])]

fire_images = list_s3_objects('fire_images/')
non_fire_images = list_s3_objects('non_fire_images/')
fire_images[:10]

['fire_images/fire.1.png',
 'fire_images/fire.10.png',
 'fire_images/fire.100.png',
 'fire_images/fire.101.png',
 'fire_images/fire.102.png',
 'fire_images/fire.103.png',
 'fire_images/fire.104.png',
 'fire_images/fire.105.png',
 'fire_images/fire.106.png',
 'fire_images/fire.107.png']

In [13]:
def generate_metadata(image_list, label):
    metadata = []
    for image_location in image_list:
        image_id = str(uuid.uuid4())
        image_type = image_location.split('.')[-1]
        metadata.append({
            'image_id': image_id,
            'image_location': f's3://{bucket_name}/{image_location}',
            'label': label,
            'image_type': image_type,
            'event_time': datetime.utcnow().isoformat() + 'Z'
        })
    return metadata

def lambda_handler(event, context):
    fire_images = list_s3_objects('fire_images/')
    non_fire_images = list_s3_objects('non_fire_images/')
    
    fire_metadata = generate_metadata(fire_images, 1)
    non_fire_metadata = generate_metadata(non_fire_images, 0)
    all_metadata = fire_metadata + non_fire_metadata
    
    # Send metadata to Feature Store
    feature_store_client = boto3.client('sagemaker-featurestore-runtime')
    
    for record in all_metadata:
        feature_store_client.put_record(
            FeatureGroupName='image-feature-group',
            Record=[
                {'FeatureName': key, 'ValueAsString': str(value)}
                for key, value in record.items()
            ]
        )
    
    return {
        'statusCode': 200,
        'body': json.dumps('Successfully ingested data into Feature Store')
    }


In [14]:
# List images
fire_images = list_s3_objects('fire_images/')
non_fire_images = list_s3_objects('non_fire_images/')

# Generate metadata
fire_metadata = generate_metadata(fire_images, 1)
non_fire_metadata = generate_metadata(non_fire_images, 0)
all_metadata = fire_metadata + non_fire_metadata


# Convert metadata to DataFrame
def convert_to_df(metadata):
    return pd.DataFrame(metadata)


df = convert_to_df(all_metadata)

# Define feature definitions
feature_definitions = [
    FeatureDefinition('image_id', FeatureTypeEnum.STRING),
    FeatureDefinition('image_location', FeatureTypeEnum.STRING),
    FeatureDefinition('label', FeatureTypeEnum.INTEGRAL),
    FeatureDefinition('image_type', FeatureTypeEnum.STRING),
    FeatureDefinition('event_time', FeatureTypeEnum.STRING)
]

In [15]:
# Ingest data into the feature group
feature_group.ingest(data_frame=df, max_workers=3, wait=True)

IngestionManagerPandas(feature_group_name='fire-image-feature-group', feature_definitions={'image_id': {'FeatureName': 'image_id', 'FeatureType': 'String'}, 'image_location': {'FeatureName': 'image_location', 'FeatureType': 'String'}, 'label': {'FeatureName': 'label', 'FeatureType': 'Integral'}, 'image_type': {'FeatureName': 'image_type', 'FeatureType': 'String'}, 'event_time': {'FeatureName': 'event_time', 'FeatureType': 'String'}}, sagemaker_fs_runtime_client_config=<botocore.config.Config object at 0x7f99311f25c0>, sagemaker_session=<sagemaker.session.Session object at 0x7f99314f8100>, max_workers=3, max_processes=1, profile_name=None, _async_result=<multiprocess.pool.MapResult object at 0x7f99313f1f60>, _processing_pool=<pool ProcessPool(ncpus=1)>, _failed_indices=[])