In [1]:
# Setup and Load Data

import pandas as pd
import boto3



# Read CSV
s3 = boto3.client('s3', "us-east-2")
read_file = s3.get_object(Bucket="sagemaker-ftb-dev", Key="wine_data/wine_data.csv")
df = pd.read_csv(read_file['Body'],sep=';')

# Display the first few rows
df.head()


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [2]:
from sklearn.model_selection import train_test_split
import time

# Select 4 features and the target variable
features = df[['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar']]
target = df['quality']
df['record_id'] = range(len(df))
current_time_sec = int(round(time.time()))
df['event_time'] = pd.Series([current_time_sec]*len(df), dtype="float64")
df.columns = df.columns.str.replace(' ', '_')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Test the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.5337124668435269


In [4]:
import sagemaker
from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.session import Session
from sagemaker import get_execution_role

# Initialize SageMaker session and role
session = Session()
role = get_execution_role()


# Define feature definitions
from sagemaker.feature_store.feature_definition import FeatureDefinition, FeatureTypeEnum

# feature_definitions = [
#     FeatureDefinition(feature_name='fixed_acidity', feature_type=FeatureTypeEnum.FRACTIONAL),
#     FeatureDefinition(feature_name='volatile_acidity', feature_type=FeatureTypeEnum.FRACTIONAL),
#     FeatureDefinition(feature_name='citric_acid', feature_type=FeatureTypeEnum.FRACTIONAL),
#     FeatureDefinition(feature_name='residual_sugar', feature_type=FeatureTypeEnum.FRACTIONAL),
#     FeatureDefinition(feature_name='quality', feature_type=FeatureTypeEnum.FRACTIONAL),
#     FeatureDefinition(feature_name='event_time', feature_type=FeatureTypeEnum.FRACTIONAL),
#     FeatureDefinition(feature_name='record_id', feature_type=FeatureTypeEnum.INTEGRAL),

# ]

# Define feature group
feature_group_name = "wine-quality-feature-group"
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=session)
# feature_group.load_feature_definitions(data_frame = df)

# # Create feature group
# feature_group.create(
#     s3_uri=f"s3://{session.default_bucket()}/feature-store",
#     record_identifier_name="record_id",
#     event_time_feature_name="event_time",
#     role_arn=role,
#     offline_store_kms_key_id="arn:aws:kms:us-east-2:721071202713:key/3f944c2b-2f65-42c3-80dd-0112a93764e7"
#     # feature_definitions = feature_definitions
# )


# Ingest data into feature store

feature_group.describe()

feature_group.ingest(data_frame=df, max_workers=3, wait=True)



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


IngestionManagerPandas(feature_group_name='wine-quality-feature-group', feature_definitions={'fixed_acidity': {'FeatureName': 'fixed_acidity', 'FeatureType': 'Fractional'}, 'volatile_acidity': {'FeatureName': 'volatile_acidity', 'FeatureType': 'Fractional'}, 'citric_acid': {'FeatureName': 'citric_acid', 'FeatureType': 'Fractional'}, 'residual_sugar': {'FeatureName': 'residual_sugar', 'FeatureType': 'Fractional'}, 'chlorides': {'FeatureName': 'chlorides', 'FeatureType': 'Fractional'}, 'free_sulfur_dioxide': {'FeatureName': 'free_sulfur_dioxide', 'FeatureType': 'Fractional'}, 'total_sulfur_dioxide': {'FeatureName': 'total_sulfur_dioxide', 'FeatureType': 'Fractional'}, 'density': {'FeatureName': 'density', 'FeatureType': 'Fractional'}, 'pH': {'FeatureName': 'pH', 'FeatureType': 'Fractional'}, 'sulphates': {'FeatureName': 'sulphates', 'FeatureType': 'Fractional'}, 'alcohol': {'FeatureName': 'alcohol', 'FeatureType': 'Fractional'}, 'quality': {'FeatureName': 'quality', 'FeatureType': 'Integ

In [5]:
boto_session = boto3.Session(region_name="us-east-2")
featurestore_runtime = boto_session.client(service_name='featurestore-runtime', region_name="us-east-2")

feature_store_session = Session(
    boto_session=boto_session,
    sagemaker_client=session,
    sagemaker_featurestore_runtime_client=featurestore_runtime
)
record_identifier_value = str(1)
featurestore_runtime.get_record(FeatureGroupName=feature_group_name, RecordIdentifierValueAsString=record_identifier_value)

UnknownServiceError: Unknown service: 'featurestore-runtime'. Valid service names are: accessanalyzer, account, acm, acm-pca, amp, amplify, amplifybackend, amplifyuibuilder, apigateway, apigatewaymanagementapi, apigatewayv2, appconfig, appconfigdata, appfabric, appflow, appintegrations, application-autoscaling, application-insights, application-signals, applicationcostprofiler, appmesh, apprunner, appstream, appsync, apptest, arc-zonal-shift, artifact, athena, auditmanager, autoscaling, autoscaling-plans, b2bi, backup, backup-gateway, batch, bcm-data-exports, bedrock, bedrock-agent, bedrock-agent-runtime, bedrock-runtime, billingconductor, braket, budgets, ce, chatbot, chime, chime-sdk-identity, chime-sdk-media-pipelines, chime-sdk-meetings, chime-sdk-messaging, chime-sdk-voice, cleanrooms, cleanroomsml, cloud9, cloudcontrol, clouddirectory, cloudformation, cloudfront, cloudfront-keyvaluestore, cloudhsm, cloudhsmv2, cloudsearch, cloudsearchdomain, cloudtrail, cloudtrail-data, cloudwatch, codeartifact, codebuild, codecatalyst, codecommit, codeconnections, codedeploy, codeguru-reviewer, codeguru-security, codeguruprofiler, codepipeline, codestar, codestar-connections, codestar-notifications, cognito-identity, cognito-idp, cognito-sync, comprehend, comprehendmedical, compute-optimizer, config, connect, connect-contact-lens, connectcampaigns, connectcases, connectparticipant, controlcatalog, controltower, cost-optimization-hub, cur, customer-profiles, databrew, dataexchange, datapipeline, datasync, datazone, dax, deadline, detective, devicefarm, devops-guru, directconnect, discovery, dlm, dms, docdb, docdb-elastic, drs, ds, dynamodb, dynamodbstreams, ebs, ec2, ec2-instance-connect, ecr, ecr-public, ecs, efs, eks, eks-auth, elastic-inference, elasticache, elasticbeanstalk, elastictranscoder, elb, elbv2, emr, emr-containers, emr-serverless, entityresolution, es, events, evidently, finspace, finspace-data, firehose, fis, fms, forecast, forecastquery, frauddetector, freetier, fsx, gamelift, glacier, globalaccelerator, glue, grafana, greengrass, greengrassv2, groundstation, guardduty, health, healthlake, iam, identitystore, imagebuilder, importexport, inspector, inspector-scan, inspector2, internetmonitor, iot, iot-data, iot-jobs-data, iot1click-devices, iot1click-projects, iotanalytics, iotdeviceadvisor, iotevents, iotevents-data, iotfleethub, iotfleetwise, iotsecuretunneling, iotsitewise, iotthingsgraph, iottwinmaker, iotwireless, ivs, ivs-realtime, ivschat, kafka, kafkaconnect, kendra, kendra-ranking, keyspaces, kinesis, kinesis-video-archived-media, kinesis-video-media, kinesis-video-signaling, kinesis-video-webrtc-storage, kinesisanalytics, kinesisanalyticsv2, kinesisvideo, kms, lakeformation, lambda, launch-wizard, lex-models, lex-runtime, lexv2-models, lexv2-runtime, license-manager, license-manager-linux-subscriptions, license-manager-user-subscriptions, lightsail, location, logs, lookoutequipment, lookoutmetrics, lookoutvision, m2, machinelearning, macie2, mailmanager, managedblockchain, managedblockchain-query, marketplace-agreement, marketplace-catalog, marketplace-deployment, marketplace-entitlement, marketplacecommerceanalytics, mediaconnect, mediaconvert, medialive, mediapackage, mediapackage-vod, mediapackagev2, mediastore, mediastore-data, mediatailor, medical-imaging, memorydb, meteringmarketplace, mgh, mgn, migration-hub-refactor-spaces, migrationhub-config, migrationhuborchestrator, migrationhubstrategy, mobile, mq, mturk, mwaa, neptune, neptune-graph, neptunedata, network-firewall, networkmanager, networkmonitor, nimble, oam, omics, opensearch, opensearchserverless, opsworks, opsworkscm, organizations, osis, outposts, panorama, payment-cryptography, payment-cryptography-data, pca-connector-ad, pca-connector-scep, personalize, personalize-events, personalize-runtime, pi, pinpoint, pinpoint-email, pinpoint-sms-voice, pinpoint-sms-voice-v2, pipes, polly, pricing, privatenetworks, proton, qbusiness, qconnect, qldb, qldb-session, quicksight, ram, rbin, rds, rds-data, redshift, redshift-data, redshift-serverless, rekognition, repostspace, resiliencehub, resource-explorer-2, resource-groups, resourcegroupstaggingapi, robomaker, rolesanywhere, route53, route53-recovery-cluster, route53-recovery-control-config, route53-recovery-readiness, route53domains, route53profiles, route53resolver, rum, s3, s3control, s3outposts, sagemaker, sagemaker-a2i-runtime, sagemaker-edge, sagemaker-featurestore-runtime, sagemaker-geospatial, sagemaker-metrics, sagemaker-runtime, savingsplans, scheduler, schemas, sdb, secretsmanager, securityhub, securitylake, serverlessrepo, service-quotas, servicecatalog, servicecatalog-appregistry, servicediscovery, ses, sesv2, shield, signer, simspaceweaver, sms, sms-voice, snow-device-management, snowball, sns, sqs, ssm, ssm-contacts, ssm-incidents, ssm-sap, sso, sso-admin, sso-oidc, stepfunctions, storagegateway, sts, supplychain, support, support-app, swf, synthetics, taxsettings, textract, timestream-influxdb, timestream-query, timestream-write, tnb, transcribe, transfer, translate, trustedadvisor, verifiedpermissions, voice-id, vpc-lattice, waf, waf-regional, wafv2, wellarchitected, wisdom, workdocs, worklink, workmail, workmailmessageflow, workspaces, workspaces-thin-client, workspaces-web, xray