In [None]:
from datetime import datetime, timedelta, timezone
import json
import os
import re
import boto3
import io
import requests
import tempfile
import warnings
import pandas as pd
import numpy as np
import time


from time import sleep, gmtime, strftime
from threading import Thread

from sagemaker import get_execution_role, session, Session, image_uris
from sagemaker.s3 import S3Downloader, S3Uploader
from sagemaker.processing import ProcessingJob
from sagemaker.serializers import CSVSerializer
from sagemaker.feature_store.feature_group import FeatureGroup
from sagemaker.model import Model
from sagemaker.model_monitor import DataCaptureConfig


region = boto3.Session().region_name
boto_session = boto3.Session(region_name=region)
sagemaker_client = boto_session.client(service_name='sagemaker', region_name=region)
featurestore_runtime = boto_session.client(service_name='sagemaker-featurestore-runtime', region_name=region)
session = Session()
feature_store_session = Session(
boto_session=boto_session,
sagemaker_client=sagemaker_client,
sagemaker_featurestore_runtime_client=featurestore_runtime
)
s3 = boto3.client('s3')
warnings.filterwarnings('ignore')

In [None]:
# Get Execution role
role = get_execution_role()
print("RoleArn:", role)

region = session.boto_region_name
print("Region:", region)

In [None]:
# Pipeline Data bucket
data_bucket = 'data-us-east-2-500842391574'
raw_key = 'input/raw/abalone.csv'
print("Raw Data bucket:", data_bucket)

# Setup S3 bucket parmaters for the production logs bucket
# Enter the name of the Production Logs Bucket, created by the MLOps Pipeline
bucket = 'proddeploymentstage-prodappl-logss3bucket004b0f70-17h5c8ln5qm5a'
print("Production Logs Bucket:", bucket)

# S3 prefixes
data_capture_prefix = 'endpoint-data-capture'
s3_capture_upload_path = f's3://{bucket}/{data_capture_prefix}'
ground_truth_upload_path = f's3://{bucket}/ground-truth-data/{datetime.now():%Y-%m-%d-%H-%M-%S}'

# Get the model monitor image
monitor_image_uri = image_uris.retrieve(framework="model-monitor", region=region)

print("Image URI:", monitor_image_uri)
print(f"Capture path: {s3_capture_upload_path}")
print(f"Ground truth path: {ground_truth_upload_path}")

__GET RAW DATA__

In [None]:
# 'raw' data column names
names = [
    'sex',
    'length',
    'diameter',
    'height',
    'whole_weight',
    'shucked_weight',
    'viscera_weight',
    'shell_weight',
    'rings'
]

# Location of the 'raw' data
obj = s3.get_object(Bucket=data_bucket, Key=raw_key)
raw_df = pd.read_csv(io.BytesIO(obj['Body'].read()), encoding='utf8', names=names)
raw_df.describe()

---

__GET FAKE DATA__

In [None]:
fake_df = pd.read_csv(
    'fake-abalone.csv',
    names=names,
)
fake_df.describe()

In [None]:
new_df = pd.concat([raw_df, fake_df])

In [None]:
new_df.describe()

In [None]:
new_df.to_csv('../data/abalone.csv', header=False, index=False)