In [None]:
# import os
# import pandas as pd

# # Local cache directory and file path
# cache_dir = '/home/sagemaker-user/data'
# os.makedirs(cache_dir, exist_ok=True)
# local_parquet_path = os.path.join(cache_dir, 'med_xgb_data.parquet')

# # Your existing CSV S3 path
# s3_csv_path = 's3://mlops-sandbox-bucket/med_xgb_data.csv'

# # Parquet S3 path (same bucket/folder but different filename)
# s3_parquet_path = 's3://mlops-sandbox-bucket/med_xgb_data.parquet'

# if os.path.exists(local_parquet_path):
#     # Load from local cache if exists
#     df = pd.read_parquet(local_parquet_path)
#     print("Loaded data from local parquet cache")
# else:
#     # First time loading: read CSV from S3 (slow)
#     df = pd.read_csv(s3_csv_path)
#     print("Loaded data from CSV on S3")

#     # Save parquet locally for future quick loads
#     df.to_parquet(local_parquet_path)
#     print(f"Saved parquet locally at {local_parquet_path}")

#     # Also save parquet back to S3 for persistence/sharing
#     df.to_parquet(s3_parquet_path, index=False)
#     print(f"Saved parquet to S3 at {s3_parquet_path}")


In [1]:
# Use the Conversation API to send a text message to Amazon Nova.

import boto3
from botocore.exceptions import ClientError

# Create a Bedrock Runtime client in the AWS Region you want to use.
client = boto3.client("bedrock-runtime", region_name="us-east-2")

# Set the model ID, e.g., Amazon Nova Lite.
model_id = "us.amazon.nova-lite-v1:0"

# Start a conversation with the user message.
user_message = "Describe the purpose of a 'hello world' program in one line."
conversation = [
    {
        "role": "user",
        "content": [{"text": user_message}],
    }
]

try:
    # Send the message to the model, using a basic inference configuration.
    response = client.converse(
        modelId=model_id,
        messages=conversation,
        inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9},
    )

    # Extract and print the response text.
    response_text = response["output"]["message"]["content"][0]["text"]
    print(response_text)

except (ClientError, Exception) as e:
    print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
    exit(1)




In [5]:
# # Use the Conversation API to send a text message to Meta Llama.

# import boto3
# from botocore.exceptions import ClientError

# # Create a Bedrock Runtime client in the AWS Region you want to use.
# client = boto3.client("bedrock-runtime", region_name="us-east-2")

# # Set the model ID, e.g., Llama 3 8b Instruct.
# model_id = "meta.llama3-3-70b-instruct-v1:0"

# # Start a conversation with the user message.
# user_message = "Describe the purpose of a 'hello world' program in one line."
# conversation = [
#     {
#         "role": "user",
#         "content": [{"text": user_message}],
#     }
# ]

# try:
#     # Send the message to the model, using a basic inference configuration.
#     response = client.converse(
#         modelId=model_id,
#         messages=conversation,
#         inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9},
#     )

#     # Extract and print the response text.
#     response_text = response["output"]["message"]["content"][0]["text"]
#     print(response_text)

# except (ClientError, Exception) as e:
#     print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
#     exit(1)




The purpose of a 'hello world' program is to serve as a simple, introductory example that verifies a programming environment is set up correctly and allows beginners to understand the basic syntax of a programming language.


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, log_loss, confusion_matrix, ConfusionMatrixDisplay
)
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
import mlflow.xgboost

In [2]:
mlflow.set_tracking_uri("arn:aws:sagemaker:us-east-2:168264144360:mlflow-tracking-server/mlflow-tracker")

In [4]:
df = pd.read_parquet('/home/sagemaker-user/mlops-sandbox-repo/data_sample.parquet')

In [5]:
df

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_91,feature_92,feature_93,feature_94,feature_95,feature_96,feature_97,feature_98,feature_99,target
0,0.584852,0.755651,0.368659,0.119589,0.580436,0.527097,0.653506,0.607716,0.609799,0.653168,...,0.249470,0.009453,0.832620,0.264304,0.460975,0.390494,0.830921,0.601923,0.156969,1.0
1,0.743696,0.561383,0.761666,0.529731,0.718923,0.315159,0.508582,0.479328,0.742755,0.347100,...,0.663798,0.303814,0.534612,0.190985,0.880898,0.083525,0.057494,0.925466,0.037726,1.0
2,0.712096,0.122051,0.861112,0.574324,0.063007,0.840001,0.813438,0.850219,0.468779,0.614024,...,0.597584,0.629592,0.015659,0.798951,0.983474,0.183603,0.613524,0.010773,0.430275,1.0
3,0.637077,0.815515,0.841188,0.629551,0.845315,0.448948,0.362523,0.212586,0.370824,0.823708,...,0.166979,0.130431,0.067339,0.205508,0.885936,0.702324,0.071777,0.571713,0.861474,0.0
4,0.071479,0.417033,0.254182,0.159191,0.116450,0.923360,0.291746,0.567617,0.040516,0.199343,...,0.614008,0.548293,0.183154,0.693368,0.445939,0.371817,0.073328,0.609666,0.463049,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1999995,0.155987,0.768402,0.828251,0.006655,0.195163,0.714458,0.746153,0.071879,0.166756,0.139765,...,0.274778,0.675356,0.345437,0.709106,0.970674,0.819077,0.984688,0.165571,0.714123,1.0
1999996,0.906121,0.421379,0.680619,0.793932,0.138649,0.798813,0.751569,0.265311,0.302109,0.617057,...,0.376165,0.491770,0.090508,0.943953,0.604491,0.019372,0.224623,0.680869,0.771995,1.0
1999997,0.942616,0.971797,0.591011,0.899205,0.777570,0.231627,0.635901,0.504529,0.927475,0.683067,...,0.371231,0.945209,0.556831,0.761642,0.335193,0.493106,0.150414,0.533906,0.611235,0.0
1999998,0.203001,0.827755,0.271172,0.783206,0.398350,0.734786,0.003106,0.460240,0.380496,0.515562,...,0.853176,0.351570,0.404323,0.294957,0.554594,0.616262,0.198557,0.737909,0.521828,1.0


In [7]:
import mlflow.sklearn
import joblib

# Load the model
model = joblib.load("xgb_model/model.pkl")

In [8]:
target_col = 'target' 
X = df.drop(columns=[target_col])
y = df[target_col]

In [12]:
from mlflow.models import infer_signature
import numpy as np

# Create some dummy input/output data to infer signature
X_sample = X.iloc[0:1]  # adjust to match your model input shape
y_sample = model.predict(X_sample)
signature = infer_signature(X_sample, y_sample)

with mlflow.start_run() as run:
    mlflow.xgboost.log_model(
        xgb_model=model,
        artifact_path="xgb-model",
        signature=signature
    )
    model_uri = f"runs:/{run.info.run_id}/xgb-model"
    print("Model URI:", model_uri)




Model URI: runs:/1bc836bde27249dba1a97bdc88136bb9/xgb-model
🏃 View run amazing-bear-524 at: https://us-east-2.experiments.sagemaker.aws/#/experiments/0/runs/1bc836bde27249dba1a97bdc88136bb9
🧪 View experiment at: https://us-east-2.experiments.sagemaker.aws/#/experiments/0


In [14]:
import mlflow
logged_model = 'runs:/1bc836bde27249dba1a97bdc88136bb9/xgb-model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(X.iloc[1:2])

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

array([0])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Train the model (no MLflow here)
model = xgb.XGBClassifier(
    n_estimators=50,
    max_depth=3,
    learning_rate=0.1,
    subsample=0.7,
    colsample_bytree=0.7,
    tree_method='hist',
    eval_metric='logloss',
    random_state=42
)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]


In [None]:
mlflow.set_experiment("test_experiment")

with mlflow.start_run(run_name="param_test"):
    mlflow.log_param("n_estimators", 50)
    print("✅ Logged single parameter to MLflow")

In [None]:
mlflow.set_experiment("xgb_experiment")

with mlflow.start_run(run_name="post_training_logging"):
    for k, v in metrics.items():
        mlflow.log_metric(k, v)
        
    # mlflow.xgboost.log_model(model, artifact_path="model")

    print("✅ Metrics and model logged post training.")
