In [87]:
import boto3
import sagemaker
from sagemaker.xgboost.model import XGBoostModel
from datetime import datetime, timezone
from sagemaker.serializers import CSVSerializer
import pandas as pd
from io import StringIO 
from sagemaker.model_monitor import DataCaptureConfig
import os

#  Config 
bucket = "verikai-heart-risk-pipeline"
prefix = "models/heart_attack/"
role = "arn:aws:iam::904233112003:role/SageMakerExecutionRole-rev"
region = "us-east-1"

#  Helper: get latest model.tar.gz
def get_latest_model_artifact(bucket, prefix):
    s3 = boto3.client("s3")
    response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
    models = [obj["Key"] for obj in response.get("Contents", []) if obj["Key"].endswith("model.tar.gz")]
    return f"s3://{bucket}/{sorted(models, reverse=True)[0]}" if models else None

#  Fetch latest model from S3
model_artifact = get_latest_model_artifact(bucket, prefix)
print("Using model:", model_artifact)

#  Set up SageMaker model + deployment
session = sagemaker.Session(boto3.Session(region_name=region))
timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
endpoint_name = f"xgb-heart-risk-endpoint-{timestamp}"

xgb_model = XGBoostModel(
    model_data=model_artifact,
    role=role,
    framework_version="1.5-1",
    sagemaker_session=session
)


data_capture_config = DataCaptureConfig(
    enable_capture=True,
    sampling_percentage=100,
    destination_s3_uri="s3://verikai-heart-risk-pipeline/inference-logs/",
    capture_options=["Input", "Output"]
)
#  Deploy endpoint
predictor = xgb_model.deploy(
    instance_type="ml.m5.large",
    initial_instance_count=1,
    endpoint_name=endpoint_name,
    serializer=CSVSerializer(),
    data_capture_config=data_capture_config
)
os.makedirs("data", exist_ok=True)
with open("data/latest_endpoint.txt", "w") as f:
    f.write(endpoint_name)

print("Endpoint name saved:", endpoint_name)

Using model: s3://verikai-heart-risk-pipeline/models/heart_attack/20250510T063154Z/output/xgb-heart-20250510T063154Z/output/model.tar.gz
--------!Endpoint name saved: xgb-heart-risk-endpoint-20250510T221028Z


In [70]:
import random

def generate_synthetic_input(n=5, seed=42):
    random.seed(seed)
    inputs = []
    for _ in range(n):
        sample = [
            round(random.uniform(0, 1), 2)  # simulates normalized features or dummy flags
            for _ in range(44)
        ]
        inputs.append(",".join(map(str, sample)))
    return inputs

# Generate
test_inputs = generate_synthetic_input()

# Preview one
print("Sample payload for inference:")
print(test_inputs[0])

Sample payload for inference:
0.64,0.03,0.28,0.22,0.74,0.68,0.89,0.09,0.42,0.03,0.22,0.51,0.03,0.2,0.65,0.54,0.22,0.59,0.81,0.01,0.81,0.7,0.34,0.16,0.96,0.34,0.09,0.1,0.85,0.6,0.81,0.73,0.54,0.97,0.38,0.55,0.83,0.62,0.86,0.58,0.7,0.05,0.23,0.29


In [77]:
result = predictor.predict(test_inputs[0])
print("Prediction:", result)

Prediction: [['0.6982229351997375']]


In [79]:
for i, csv_input in enumerate(test_inputs):
    try:
        result = predictor.predict(csv_input)
        print(f"[{i}] Prediction: {result}")
    except Exception as e:
        print(f"[{i}] Error: {e}")

[0] ✅ Prediction: [['0.6982229351997375']]
[1] ✅ Prediction: [['0.9065232276916504']]
[2] ✅ Prediction: [['0.717840313911438']]
[3] ✅ Prediction: [['0.7726213335990906']]
[4] ✅ Prediction: [['5.945413249719422e-06']]


In [None]:
import json

metadata = {
    "endpoint_name": endpoint_name
}

with open("/opt/airflow/out/notebook_output_metadata.json", "w") as f:
    json.dump(metadata, f)