In [1]:
# Parameters
N_Trials = 2


In [2]:
import json

# Load configuration from JSON
config_file = "pipeline_config.json"

# Read the N_Trials value
try:
    with open(config_file, "r") as f:
        config = json.load(f)
        N_TRIALS = config.get("N_Trials", 1)  # Default to 1 if not found
except FileNotFoundError:
    print(f"Config file {config_file} not found. Using default values.")
    N_TRIALS = 1
except json.JSONDecodeError:
    print(f"Error parsing {config_file}. Using default values.")
    N_TRIALS = 1


In [3]:
!pip install shap
!pip install xgboost









In [4]:
import pandas as pd
import numpy as np

import boto3
from pyathena import connect
import sagemaker
from sagemaker.feature_store.feature_group import FeatureGroup, FeatureDefinition, FeatureTypeEnum
from sagemaker.session import Session
from sagemaker import get_execution_role

from pyathena import connect

import time
import shap
import json
import joblib
import io

import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score, roc_auc_score, precision_score, recall_score

from itertools import combinations
import pickle
import warnings

import model_methods


# S3 and Athena details
bucket_name = "group3-project-bucket"
database_name = "group_project_db"
table_name = "hospital_readmissions"
s3_output = f"s3://{bucket_name}/athena-results/"
region = "us-east-1"
s3_client = boto3.client("s3", region_name=region)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml


sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


## Find the latest feature store to generate a new model!

In [5]:
glue_client = boto3.client("glue")

# List databases in AWS Glue
response = glue_client.get_databases()
print("\nAvailable Databases in Glue:")
for db in response["DatabaseList"]:
    print(f"- {db['Name']}")

# List tables in the `sagemaker_featurestore` database (if it exists)
database_name = "sagemaker_featurestore"

try:
    response = glue_client.get_tables(DatabaseName=database_name)
    print(f"\nTables in `{database_name}` database:")
    for table in response["TableList"]:
        print(f"- {table['Name']}")
except glue_client.exceptions.EntityNotFoundException:
    print(f"\nDatabase `{database_name}` not found in Glue.")


Available Databases in Glue:
- default
- group_project_db
- sagemaker_featurestore

Tables in `sagemaker_featurestore` database:
- hospital_readmissions_features_1740354579
- hospital_readmissions_features_1740867183_1740867183
- hospital_readmissions_features_1740867247_1740867247
- hospital_readmissions_features_1740867455_1740867455
- hospital_readmissions_features_1740867741_1740867741
- hospital_readmissions_features_1740868107_1740868107
- hospital_readmissions_features_1740868421_1740868421
- hospital_readmissions_features_1740868692_1740868692
- hospital_readmissions_features_1740869180_1740869180
- hospital_readmissions_features_1740869682_1740869682
- hospital_readmissions_features_1740871225_1740871225
- hospital_readmissions_features_1740871778_1740871779


Query Athenta Tables for Data Splitting

In [6]:
# Query the feature store in Athena

latest_table = response["TableList"][-1]
query = f"""
SELECT * 
FROM "sagemaker_featurestore"."{latest_table["Name"]}"
"""

# Connect to Athena
connection = connect(
    s3_staging_dir=f"s3://{bucket_name}/athena-results/",
    region_name="us-east-1"
)

# Retrieve all feature data
df = pd.read_sql(query, connection)

  df = pd.read_sql(query, connection)


In [7]:
df = df.drop(columns=["event_time", "write_time", "api_invocation_time", "is_deleted"])
display(df.head())


print("Base Features: ", len(df.columns))
print("Data Samples: ", len(df))

categorical_columns, num_cats = model_methods.get_categorical_columns_from_s3(bucket_name, s3_client)
print("Categorical columns: ", categorical_columns)
print("Num classes per category: ", num_cats)

Unnamed: 0,age,time_in_hospital,n_lab_procedures,n_procedures,n_medications,n_outpatient,n_inpatient,n_emergency,medical_specialty,diag_1,diag_2,diag_3,glucose_test,a1ctest,change,diabetes_med,readmitted
0,3,8,72,1,18,2,0,0,4,0,7,6,1,1,0,1,0
1,3,4,35,0,21,0,1,2,4,0,6,0,1,1,0,1,0
2,3,5,53,0,16,0,0,0,1,6,6,6,1,1,1,1,0
3,3,9,69,2,17,0,4,1,1,3,6,6,1,1,1,1,1
4,3,8,83,0,18,0,0,0,1,0,6,6,1,1,0,1,0


Base Features:  17
Data Samples:  16459
Identified categorical columns: ['glucose_test', 'A1Ctest', 'age', 'medical_specialty', 'diag_1', 'diag_2', 'diag_3', 'change', 'diabetes_med', 'readmitted']
Category counts per categorical column: {'glucose_test': 3, 'A1Ctest': 3, 'age': 6, 'medical_specialty': 7, 'diag_1': 8, 'diag_2': 8, 'diag_3': 8, 'change': 2, 'diabetes_med': 2, 'readmitted': 2}
Categorical columns:  ['glucose_test', 'A1Ctest', 'age', 'medical_specialty', 'diag_1', 'diag_2', 'diag_3', 'change', 'diabetes_med', 'readmitted']
Num classes per category:  {'glucose_test': 3, 'A1Ctest': 3, 'age': 6, 'medical_specialty': 7, 'diag_1': 8, 'diag_2': 8, 'diag_3': 8, 'change': 2, 'diabetes_med': 2, 'readmitted': 2}


Train an XGB model raw, use SHAP to visual feature importance.

In [8]:
# Split data into train (50%), test (10%), production (40%)
train_df, temp_df = train_test_split(df, test_size=0.5, random_state=42)
test_df, prod_df = train_test_split(temp_df, test_size=0.8, random_state=42)

# Separate features and target variable
target_column = "readmitted"
X_train = train_df.drop(columns=[target_column])
y_train = train_df[target_column]
X_test = test_df.drop(columns=[target_column])
y_test = test_df[target_column]

# Train XGBoost model on production dataset
dmatrix_train = xgb.DMatrix(X_train, label=y_train)
params = {
    "objective": "binary:logistic",
    "eval_metric": "logloss",
    "seed": 42
}
model = xgb.train(params, dmatrix_train, num_boost_round=100)

def eval_model(model, X_test, y_test):
    """
    Evaluates the XGBoost model and returns evaluation metrics.
    Ensures all relevant metrics are logged for the model card.
    """
    # Convert test data into DMatrix
    dmatrix_test = xgb.DMatrix(X_test, label=y_test, enable_categorical=True)
    
    # Make predictions
    y_pred_proba = model.predict(dmatrix_test)
    y_pred = (y_pred_proba >= 0.5).astype(int)  # Convert probabilities to binary predictions
    
    # Compute evaluation metrics
    test_log_loss = log_loss(y_test, y_pred_proba)
    test_accuracy = accuracy_score(y_test, y_pred)
    test_auc = roc_auc_score(y_test, y_pred_proba)
    test_precision = precision_score(y_test, y_pred)
    test_recall = recall_score(y_test, y_pred)

    # Print evaluation metrics
    print(f"✅ Test Log Loss: {test_log_loss:.4f}")
    print(f"✅ Test Accuracy: {test_accuracy:.4f}")
    print(f"✅ Test AUC: {test_auc:.4f}")
    print(f"✅ Test Precision: {test_precision:.4f}")
    print(f"✅ Test Recall: {test_recall:.4f}")

    return test_accuracy, test_auc, test_precision, test_recall


eval_model(model, X_test, y_test)

✅ Test Log Loss: 0.7285
✅ Test Accuracy: 0.5662
✅ Test AUC: 0.6001
✅ Test Precision: 0.5645
✅ Test Recall: 0.5142


(0.5662211421628189,
 0.6000948115067498,
 0.5644504748982361,
 0.5142150803461063)

Extract Important Features from XGBoost Model.. create interacting features from statistitically most important features.

In [9]:
X_train_final, X_test_final = model_methods.shap_feature_engineering(model, X_train, X_test, bucket_name, s3_client)
display(X_train_final.head())
display(X_test_final.head())

print(X_train_final.dtypes)

Identified categorical columns: ['glucose_test', 'A1Ctest', 'age', 'medical_specialty', 'diag_1', 'diag_2', 'diag_3', 'change', 'diabetes_med', 'readmitted']
Category counts per categorical column: {'glucose_test': 3, 'A1Ctest': 3, 'age': 6, 'medical_specialty': 7, 'diag_1': 8, 'diag_2': 8, 'diag_3': 8, 'change': 2, 'diabetes_med': 2, 'readmitted': 2}


Top Features: ['n_inpatient', 'n_lab_procedures', 'n_medications', 'medical_specialty', 'n_outpatient', 'diag_1', 'diag_2', 'time_in_hospital', 'n_procedures', 'n_emergency', 'diabetes_med', 'age', 'diag_3', 'a1ctest', 'change', 'glucose_test']
Skipping interaction: n_inpatient × medical_specialty (Categorical × Numeric)
Skipping interaction: n_lab_procedures × medical_specialty (Categorical × Numeric)
Skipping interaction: n_medications × medical_specialty (Categorical × Numeric)
Skipping interaction: medical_specialty × n_outpatient (Categorical × Numeric)
Interaction features saved to s3://group3-project-bucket/config/interaction_features.json


Unnamed: 0,n_inpatient,n_lab_procedures,n_medications,medical_specialty,n_outpatient,diag_1,diag_2,time_in_hospital,n_procedures,n_emergency,...,diag_3,a1ctest,change,glucose_test,n_inpatient_x_n_lab_procedures,n_inpatient_x_n_medications,n_inpatient_x_n_outpatient,n_lab_procedures_x_n_medications,n_lab_procedures_x_n_outpatient,n_medications_x_n_outpatient
1603,1,44,14,4,0,0,0,2,5,0,...,2,1,0,1,44,14,0,616,0,0
8014,2,62,30,4,11,7,0,6,2,0,...,1,1,1,1,124,60,22,1860,682,330
4065,0,38,4,3,0,2,1,3,0,0,...,6,1,0,1,0,0,0,152,0,0
1294,0,2,14,4,0,0,0,2,0,0,...,1,1,1,1,0,0,0,28,0,0
15494,1,46,10,4,0,6,6,4,1,0,...,6,1,0,1,46,10,0,460,0,0


Unnamed: 0,n_inpatient,n_lab_procedures,n_medications,medical_specialty,n_outpatient,diag_1,diag_2,time_in_hospital,n_procedures,n_emergency,...,diag_3,a1ctest,change,glucose_test,n_inpatient_x_n_lab_procedures,n_inpatient_x_n_medications,n_inpatient_x_n_outpatient,n_lab_procedures_x_n_medications,n_lab_procedures_x_n_outpatient,n_medications_x_n_outpatient
5064,3,65,21,4,4,7,0,4,0,0,...,0,1,1,1,195,63,12,1365,260,84
6561,0,53,19,5,0,2,1,1,1,0,...,2,1,1,1,0,0,0,1007,0,0
14458,0,31,14,4,5,0,1,2,6,0,...,0,1,0,1,0,0,0,434,155,70
13293,0,48,11,3,0,7,6,4,0,0,...,0,1,0,1,0,0,0,528,0,0
10114,2,63,9,1,2,6,6,3,1,0,...,6,1,0,1,126,18,4,567,126,18


n_inpatient                         int64
n_lab_procedures                    int64
n_medications                       int64
medical_specialty                   int64
n_outpatient                        int64
diag_1                              int64
diag_2                              int64
time_in_hospital                    int64
n_procedures                        int64
n_emergency                         int64
diabetes_med                        int64
age                                 int64
diag_3                              int64
a1ctest                             int64
change                              int64
glucose_test                        int64
n_inpatient_x_n_lab_procedures      int64
n_inpatient_x_n_medications         int64
n_inpatient_x_n_outpatient          int64
n_lab_procedures_x_n_medications    int64
n_lab_procedures_x_n_outpatient     int64
n_medications_x_n_outpatient        int64
dtype: object


## Save Production Data to an s3 bucket for Later

In [10]:
X_prod = prod_df.drop(columns=[target_column])
y_prod = prod_df[target_column]

X_prod_final = model_methods.apply_interaction_features(X_prod, bucket_name, s3_client)
print(X_prod_final.columns)

Index(['age', 'time_in_hospital', 'n_lab_procedures', 'n_procedures',
       'n_medications', 'n_outpatient', 'n_inpatient', 'n_emergency',
       'medical_specialty', 'diag_1', 'diag_2', 'diag_3', 'glucose_test',
       'a1ctest', 'change', 'diabetes_med', 'n_inpatient_x_n_lab_procedures',
       'n_inpatient_x_n_medications', 'n_inpatient_x_n_outpatient',
       'n_lab_procedures_x_n_medications', 'n_lab_procedures_x_n_outpatient',
       'n_medications_x_n_outpatient'],
      dtype='object')


In [11]:
# Define S3 bucket and file path
def save_to_prod(df, name):
    s3_filename = f"production_data/{name}.csv"
    s3_uri = f"s3://{bucket_name}/{s3_filename}"
    
    # Convert DataFrame to CSV in memory
    csv_buffer = io.StringIO()
    df.to_csv(csv_buffer, index=False)
    
    # Upload CSV file to S3
    s3_client.put_object(
        Bucket=bucket_name,
        Key=s3_filename,
        Body=csv_buffer.getvalue()
    )
    
    print(f"Data saved to S3: {s3_uri}")

save_to_prod(X_prod_final, "X_prod_final")
save_to_prod(y_prod, "y_prod")
save_to_prod(X_train_final, "X_train_final")
save_to_prod(y_train, "y_train")

Data saved to S3: s3://group3-project-bucket/production_data/X_prod_final.csv


Data saved to S3: s3://group3-project-bucket/production_data/y_prod.csv


Data saved to S3: s3://group3-project-bucket/production_data/X_train_final.csv


Data saved to S3: s3://group3-project-bucket/production_data/y_train.csv


Bayesian Optimization Procedure to find best XGB model

In [12]:
!pip install optuna





In [13]:
import optuna
from sklearn.metrics import roc_auc_score

def xgb_objective(trial):
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'logloss',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'min_child_weight': trial.suggest_float('min_child_weight', 1, 10),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3, 1.0),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'seed': 42
    }
    
    dtrain = xgb.DMatrix(X_train_final, label=y_train, enable_categorical=True)
    dtest = xgb.DMatrix(X_test_final, label=y_test, enable_categorical=True)
    model = xgb.train(params, dtrain, num_boost_round=100)
    preds = model.predict(dtest)
    
    return roc_auc_score(y_test, preds)

study = optuna.create_study(direction="maximize")
study.optimize(xgb_objective, n_trials=N_TRIALS)

best_params = study.best_params
best_params['max_depth'] = int(best_params['max_depth'])

[I 2025-03-01 23:32:30,219] A new study created in memory with name: no-name-f2e1180f-b150-4545-9527-6eb89a27a408


[I 2025-03-01 23:32:30,425] Trial 0 finished with value: 0.6319039243398269 and parameters: {'learning_rate': 0.10880750423205379, 'max_depth': 3, 'min_child_weight': 9.289093558360852, 'colsample_bytree': 0.5799636704556652, 'subsample': 0.8585775986235836}. Best is trial 0 with value: 0.6319039243398269.


[I 2025-03-01 23:32:30,744] Trial 1 finished with value: 0.6360980782209699 and parameters: {'learning_rate': 0.024129725134068247, 'max_depth': 7, 'min_child_weight': 9.961013136461482, 'colsample_bytree': 0.4126921202775962, 'subsample': 0.6086506024074798}. Best is trial 1 with value: 0.6360980782209699.


In [14]:
# Train the best model
dmatrix_train = xgb.DMatrix(X_train_final, label=y_train, enable_categorical=True)
model = xgb.train(best_params, dmatrix_train, num_boost_round=500)

In [15]:
# Visualize Optuna Trials
fig = optuna.visualization.matplotlib.plot_optimization_history(study)
plt.savefig("figures/optuna_optimization_history.png")
plt.close()

fig = optuna.visualization.matplotlib.plot_param_importances(study)
plt.savefig("figures/optuna_param_importance.png")
plt.close()

  fig = optuna.visualization.matplotlib.plot_optimization_history(study)


  fig = optuna.visualization.matplotlib.plot_param_importances(study)


In [16]:
explainer = shap.Explainer(model, X_test_final)
shap_values = explainer(X_test_final)

# Save SHAP summary plot
plt.figure()
shap.summary_plot(shap_values, X_test_final, show=False)
plt.savefig("figures/final_shap_summary.png")
plt.close()

# Save SHAP dependence plot for the first feature
plt.figure()
shap.dependence_plot(0, shap_values.values, X_test_final, show=False)
plt.savefig("figures/final_shap_dependence_0.png")
plt.close()

 15%|===                 | 249/1646 [00:11<01:01]       

 16%|===                 | 271/1646 [00:12<01:00]       

 18%|====                | 303/1646 [00:13<00:57]       

 20%|====                | 334/1646 [00:14<00:54]       

 22%|====                | 364/1646 [00:15<00:52]       

 24%|=====               | 394/1646 [00:16<00:50]       

 25%|=====               | 416/1646 [00:17<00:50]       

 27%|=====               | 437/1646 [00:18<00:49]       































































































<Figure size 640x480 with 0 Axes>

In [17]:
print("TEST")
acc, roc, precision, recall = eval_model(model, X_test_final, y_test)

print("TRAIN")
acc_train, roc_train, precision_train, recall_train = eval_model(model, X_train_final, y_train)

TEST
✅ Test Log Loss: 0.6762
✅ Test Accuracy: 0.5832
✅ Test AUC: 0.6243
✅ Test Precision: 0.5922
✅ Test Recall: 0.4883
TRAIN


✅ Test Log Loss: 0.4868
✅ Test Accuracy: 0.8174
✅ Test AUC: 0.9077
✅ Test Precision: 0.8802
✅ Test Recall: 0.7145


In [18]:
# Save the model
model.save_model("models/tuned_xgboost_model.model")
print("Model saved to models/tuned_xgboost_model.model")

Model saved to models/tuned_xgboost_model.model




## PUT XGBOOST MODEL IN MODEL STORE

In [19]:
import tarfile
import json

region = boto3.Session().region_name
role = get_execution_role()
sagemaker_session = sagemaker.Session()

best_params = study.best_params

# ✅ SageMaker client
sm_client = boto3.client("sagemaker", region_name=region)
s3_client = boto3.client("s3", region_name=region)

# ✅ **Model Package Group Name (Unique Identifier)**
model_package_group_name = "xgboost-hospital-readmissions-" + str(int(time.time()))
with open("pipeline_model_package.json", "w") as json_file:
        json.dump({
            "model_package": model_package_group_name
        },json_file, indent=4)

model_package_group_description = "XGBoost model for predicting hospital readmissions."


# ✅ **Create Model Package Group**
create_model_package_group_response = sm_client.create_model_package_group(
    ModelPackageGroupName=model_package_group_name,
    ModelPackageGroupDescription=model_package_group_description,
)

print("✅ ModelPackageGroup Arn:", create_model_package_group_response["ModelPackageGroupArn"])

# ✅ **Save the trained XGBoost model**
joblib.dump(model, "model.joblib")

# ✅ **Save the model as a compressed `.tar.gz` file**
tar_filename = "model.tar.gz"
with tarfile.open(tar_filename, "w:gz") as tar:
    tar.add("model.joblib")

# ✅ **Upload model to S3**
timestamp = int(time.time())
prefix = "hospital-readmissions-xgboost"
s3_key = f"{prefix}/model-{timestamp}.tar.gz"
model_s3_uri = f"s3://{bucket_name}/{s3_key}"

s3_client.upload_file(tar_filename, bucket_name, s3_key)
print(f"✅ Model artifact uploaded to: {model_s3_uri}")

# ✅ **Define SageMaker Model Package**
model_package_name = "xgboost-hospital-readmissions-package-" + str(int(time.time()))
xgboost_image_uri = "683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.5-1"

inference_spec = {
    "Containers": [
        {
            "Image": xgboost_image_uri,
            "ModelDataUrl": model_s3_uri,
        }
    ],
    "SupportedTransformInstanceTypes": ["ml.m5.large", "ml.m5.xlarge"],
    "SupportedRealtimeInferenceInstanceTypes": ["ml.m5.large", "ml.m5.xlarge"],
    "SupportedContentTypes": ["text/csv"],
    "SupportedResponseMIMETypes": ["text/csv", "application/json"],
}

create_model_package_response = sm_client.create_model_package(
    ModelPackageGroupName=model_package_group_name,
    ModelPackageDescription="XGBoost model for predicting hospital readmissions.",
    InferenceSpecification=inference_spec,
    ModelApprovalStatus="PendingManualApproval",
)

print("✅ Model Package ARN:", create_model_package_response["ModelPackageArn"])

# ✅ **Describe the Model Package**
response = sm_client.describe_model_package(
    ModelPackageName=create_model_package_response["ModelPackageArn"]
)

print("Model Package Details:", response)

# ✅ **Create a Model Card**
model_card_name = f"xgboost-hospital-readmissions-card-{int(time.time())}"

# ✅ Update Model Card with **full evaluation details**
model_card_content = {
    "model_overview": {
        "model_description": "XGBoost model predicting hospital readmissions.",
        "model_creator": "Your Team",
        "model_artifact": [model_s3_uri],
        "algorithm_type": "XGBoost",
        "problem_type": "Binary Classification",
        "model_owner": "Group 3",
    },
    "intended_uses": {
        "purpose_of_model": "Predict likelihood of hospital readmission within 30 days.",
        "intended_uses": "Hospital decision support, optimizing patient care.",
        "risk_rating": "Medium",
        "explanations_for_risk_rating": "Incorrect predictions may lead to unnecessary interventions or missed readmissions.",
    },
    "business_details": {
        "business_problem": "Reducing hospital readmission rates.",
        "business_stakeholders": "Healthcare providers, insurers, hospital administrators.",
    },
    "training_details": {
        "training_observations": "Model trained on historical patient admission data.",
        "training_job_details": {
            "training_datasets": [f"sagemaker_featurestore.{latest_table['Name']}"],
            "training_environment": {
                "container_image": [xgboost_image_uri],
            },
            "training_metrics": [
                {"name": "accuracy", "value": acc_train},
                {"name": "roc_auc", "value": roc_train},
                {"name": "precision", "value": precision_train},
                {"name": "recall", "value": recall_train},
            ],
            "hyper_parameters": [
                {"name": k, "value": str(v)} for k, v in best_params.items()
            ],
        },
    },
    "evaluation_details": [
        {
            "name": "evaluation-1",
            "datasets": [f"sagemaker_featurestore.{latest_table['Name']}"],
            "metric_groups": [
                {
                    "name": "readmission-metrics",
                    "metric_data": [
                        {"name": "accuracy", "type": "number", "value": acc},
                        {"name": "roc_auc", "type": "number", "value": roc},
                        {"name": "precision", "type": "number", "value": precision},
                        {"name": "recall", "type": "number", "value": recall},
                    ],
                }
            ],
            "evaluation_observation": "Evaluated on a holdout test dataset.",
        }
    ],
    "additional_information": {
        "ethical_considerations": "Model should not be used to make final medical decisions.",
        "caveats_and_recommendations": "Should be used alongside physician assessment.",
    },
}


# ✅ **Create the Model Card**
response = sm_client.create_model_card(
    ModelCardName=model_card_name,
    Content=json.dumps(model_card_content),
    ModelCardStatus="Draft",
)

print("✅ Model Card ARN:", response["ModelCardArn"])
print("✅ Model Card Name:", model_card_name)

# ✅ **Describe the Model Card**
describe_response = sm_client.describe_model_card(ModelCardName=model_card_name)
print(describe_response)

✅ ModelPackageGroup Arn: arn:aws:sagemaker:us-east-1:321261761338:model-package-group/xgboost-hospital-readmissions-1740872021


✅ Model artifact uploaded to: s3://group3-project-bucket/hospital-readmissions-xgboost/model-1740872022.tar.gz


✅ Model Package ARN: arn:aws:sagemaker:us-east-1:321261761338:model-package/xgboost-hospital-readmissions-1740872021/1
Model Package Details: {'ModelPackageGroupName': 'xgboost-hospital-readmissions-1740872021', 'ModelPackageVersion': 1, 'ModelPackageArn': 'arn:aws:sagemaker:us-east-1:321261761338:model-package/xgboost-hospital-readmissions-1740872021/1', 'ModelPackageDescription': 'XGBoost model for predicting hospital readmissions.', 'CreationTime': datetime.datetime(2025, 3, 1, 23, 33, 43, 168000, tzinfo=tzlocal()), 'InferenceSpecification': {'Containers': [{'Image': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.5-1', 'ImageDigest': 'sha256:c764382b16cd0c921f1b2e66de8684fb999ccbd0c042c95679f0b69bc9cdd12c', 'ModelDataUrl': 's3://group3-project-bucket/hospital-readmissions-xgboost/model-1740872022.tar.gz', 'ModelDataETag': '74da3e90315ae74e5328674ac706ce4f'}], 'SupportedTransformInstanceTypes': ['ml.m5.large', 'ml.m5.xlarge'], 'SupportedRealtimeInferenceInstanceTyp

✅ Model Card ARN: arn:aws:sagemaker:us-east-1:321261761338:model-card/xgboost-hospital-readmissions-card-1740872023
✅ Model Card Name: xgboost-hospital-readmissions-card-1740872023


{'ModelCardArn': 'arn:aws:sagemaker:us-east-1:321261761338:model-card/xgboost-hospital-readmissions-card-1740872023', 'ModelCardName': 'xgboost-hospital-readmissions-card-1740872023', 'ModelCardVersion': 1, 'Content': '{"model_overview": {"model_description": "XGBoost model predicting hospital readmissions.", "model_creator": "Your Team", "model_artifact": ["s3://group3-project-bucket/hospital-readmissions-xgboost/model-1740872022.tar.gz"], "algorithm_type": "XGBoost", "problem_type": "Binary Classification", "model_owner": "Group 3"}, "intended_uses": {"purpose_of_model": "Predict likelihood of hospital readmission within 30 days.", "intended_uses": "Hospital decision support, optimizing patient care.", "risk_rating": "Medium", "explanations_for_risk_rating": "Incorrect predictions may lead to unnecessary interventions or missed readmissions."}, "business_details": {"business_problem": "Reducing hospital readmission rates.", "business_stakeholders": "Healthcare providers, insurers, ho