In [1]:
import boto3
import time
from botocore.exceptions import ClientError

In [4]:
# Initialize the SageMaker client
sagemaker_client = boto3.client('sagemaker')

def check_ingestion_status(feature_group_name):
    try:
        # Get the details of the feature group
        response = sagemaker_client.describe_feature_group(FeatureGroupName=feature_group_name)
        ingestion_status = response['FeatureGroupStatus']
        
        while ingestion_status == 'Creating':
            # Wait for a short period before checking again
            time.sleep(30)  # Adjust the sleep time as needed
            response = sagemaker_client.describe_feature_group(FeatureGroupName=feature_group_name)
            ingestion_status = response['FeatureGroupStatus']
            
        print(f"Ingestion Status: {ingestion_status}")
        if ingestion_status == 'CreateFailed':
            print(f"Failure Reason: {response.get('FailureReason', 'Unknown')}")
    except ClientError as e:
        print(f"An error occurred: {e}")

# Call the function to check the status
check_ingestion_status(feature_group_name="fraud-feature-group")


Ingestion Status: Created


In [32]:
import boto3

athena_client = boto3.client('athena')

# Define your query
query_string = f"""
SELECT * FROM fraud_feature_group_1704739102
"""

# Define the S3 bucket for query results
output_location = 's3://sagemaker-us-east-1-470086202700/fraud_train'

# Execute the query
response = athena_client.start_query_execution(
    QueryString=query_string,
    QueryExecutionContext={
        'Database': 'sagemaker_featurestore'  # The default database name
    },
    ResultConfiguration={
        'OutputLocation': output_location,
    }
)

# Get the query execution ID
query_execution_id = response['QueryExecutionId']

In [34]:
def get_query_results(query_execution_id):
    # Check if the query has finished
    query_status = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
    query_execution_status = query_status['QueryExecution']['Status']['State']
    #print(query_status['QueryExecution']['Status'])

    if query_execution_status == 'SUCCEEDED':
        print("Query succeeded, results are in:", output_location)
        retun 200
    elif query_execution_status in ['FAILED', 'CANCELLED']:
        print(f"Query {query_execution_status.lower()}.")
        return 201
    else:
        print("Query in progress...")
        return -1

get_query_results(query_execution_id)

Query succeeded, results are in: s3://sagemaker-us-east-1-470086202700/fraud_train


In [35]:
import boto3
s3 = boto3.client('s3')

bucket_name = "sagemaker-us-east-1-470086202700"
prefix="fraud_train"
csv_key = f'{prefix}/{query_execution_id}.csv'  
metadata_key = f'{prefix}/{query_execution_id}.csv.metadata'  

# Specify the local file paths where you want to download the files
local_csv_path = 'query_results.csv'
local_metadata_path = 'query_metadata.txt'

# Download the files
s3.download_file(bucket_name, csv_key, local_csv_path)
s3.download_file(bucket_name, metadata_key, local_metadata_path)


In [36]:
import pandas as pd
df = pd.read_csv(local_csv_path)
# Display the first few rows of the DataFrame
len(df)

284807

In [37]:
df.columns

Index(['index', 'time', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9',
       'v10', 'v11', 'v12', 'v13', 'v14', 'v15', 'v16', 'v17', 'v18', 'v19',
       'v20', 'v21', 'v22', 'v23', 'v24', 'v25', 'v26', 'v27', 'v28', 'amount',
       'class', 'event_time', 'write_time', 'api_invocation_time',
       'is_deleted'],
      dtype='object')

In [4]:
import boto3

# Initialize the clients
eventbridge_client = boto3.client('events')
lambda_client = boto3.client('lambda')

# The name of your EventBridge rule and Lambda function
rule_name = 'model-approval-rule-specific-group'
lambda_function_name = 'fraud-endpoint-deployment'

# Describe the rule
rule_response = eventbridge_client.describe_rule(Name=rule_name)
print("Rule Description:", rule_response)

Rule Description: {'Name': 'model-approval-rule-specific-group', 'Arn': 'arn:aws:events:us-east-1:470086202700:rule/model-approval-rule-specific-group', 'EventPattern': '{"source": ["aws.sagemaker"], "detail-type": ["SageMaker Model Package State Change"], "detail": {"ModelPackageGroupName": ["fraud-detection"], "ModelPackageStatus": ["Approved"]}}', 'State': 'ENABLED', 'EventBusName': 'default', 'CreatedBy': '470086202700', 'ResponseMetadata': {'RequestId': 'cc739661-319a-46bd-af4b-0029733ceefe', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'cc739661-319a-46bd-af4b-0029733ceefe', 'content-type': 'application/x-amz-json-1.1', 'content-length': '417', 'date': 'Wed, 10 Jan 2024 20:38:19 GMT'}, 'RetryAttempts': 0}}


In [5]:
# List the targets for the rule
targets_response = eventbridge_client.list_targets_by_rule(Rule=rule_name)
print("Rule Targets:", targets_response)

Rule Targets: {'Targets': [{'Id': '1', 'Arn': 'arn:aws:lambda:us-east-1:470086202700:function:fraud-endpoint-deployment'}], 'ResponseMetadata': {'RequestId': '5c3d86e9-0398-41bb-9232-82dcd1b23832', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '5c3d86e9-0398-41bb-9232-82dcd1b23832', 'content-type': 'application/x-amz-json-1.1', 'content-length': '105', 'date': 'Wed, 10 Jan 2024 20:38:30 GMT'}, 'RetryAttempts': 0}}


In [6]:
# Assuming your EventBridge client is configured for the intended region
eventbridge_client = boto3.client('events')
eventbridge_region = eventbridge_client.meta.region_name
print("EventBridge Client Region:", eventbridge_region)

EventBridge Client Region: us-east-1


In [2]:
# Check if the Lambda function is a target
is_lambda_target = any(target['Arn'].endswith(lambda_function_name) for target in targets_response['Targets'])
print("Is Lambda function a target of the rule?", is_lambda_target)


Is Lambda function a target of the rule? True


In [1]:
import pandas as pd
local_csv_path = 'query_results.csv'
data = pd.read_csv(local_csv_path)
# Display the first few rows of the DataFrame
len(data)

  from pandas.core.computation.check import NUMEXPR_INSTALLED


284807

In [2]:
data = data.drop(['event_time', 'write_time', 'api_invocation_time', 'is_deleted','index'], axis=1)
## Moving class to first column
model_data = data
model_data.head()
model_data = pd.concat([model_data['class'], model_data.drop(['class'], axis=1)], axis=1)

In [3]:
train_data, validation_data, test_data = np.split(model_data.sample(frac=1, random_state=1229), [int(0.7 * len(model_data)), int(0.9 * len(model_data))])

NameError: name 'np' is not defined

In [None]:
from sagemaker.serializers import CSVSerializer

def real_time_predictor(data, endpoint_name):
    """
    Function to perform real-time predictions using a SageMaker endpoint with CSV input.
    """
    # Create a SageMaker runtime client
    runtime_client = boto3.client('sagemaker-runtime')

    # Create a CSV serializer
    serializer = CSVSerializer()

    # Serialize the data
    serialized_data = serializer.serialize(data)
    
    #print(f"Serialized data is {serialized_data}")

    #Send the data to the SageMaker endpoint for prediction
    response = runtime_client.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType='text/csv',  # Specify the content type as CSV
        Body=serialized_data
        )

    # Decode the response

    result = response['Body'].read().decode()
    result_array = [float(val) for val in result.strip().split('\n')]

    return result_array


In [4]:
import json

def predict(data, rows=500):
    # Convert the DataFrame to a NumPy array
    data_array = data.to_numpy() if hasattr(data, 'to_numpy') else data.values    
    # Split the array into smaller arrays of size 'rows'
    split_array = np.array_split(data_array, int(data_array.shape[0] / float(rows) + 1))   
    predictions = []
    for array in split_array:
        # Assuming xgb_predictor is your model's predictor and it can handle NumPy arrays directly
        prediction_result = real_time_predictor(array,"fraud-detection-endpoint")
        predictions.extend(prediction_result)
    return predictions

In [5]:
predictions = predict(test_data.iloc[:, 1:])
labels =test_data.iloc[:,0]

NameError: name 'test_data' is not defined

In [6]:
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn.metrics import precision_recall_fscore_support

rounded_predictions = np.round(predictions)

# Calculate Confusion Matrix
conf_matrix = confusion_matrix(labels, rounded_predictions)

# Calculate F1 Score
f1score = f1_score(labels, rounded_predictions)

# Precision Recall
f_Suport = precision_recall_fscore_support(labels,rounded_predictions)

print('precision: ', round(f_Suport [0][1], 2))
print('recall: ', round(f_Suport [1][1], 2))
print('F1 Score: ', round(f1score, 2))

NameError: name 'np' is not defined