In [17]:
# --- Imports and Configuration ---

import sagemaker
import boto3
from sagemaker.inputs import TrainingInput
from sagemaker.xgboost import XGBoost
from sagemaker.workflow.pipeline_context import PipelineSession
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.workflow.steps import TrainingStep

In [18]:
# Initialize basic SageMaker clients
region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
role = "arn:aws:iam::344809604964:role/sage-maker-full-acess-role"


# Common project configuration
project_name = "titanic"
model_package_group_name = "TitanicModel"

# S3 paths
train_s3 = "s3://ml-pipeline-project-aniolmg/data/titanic_data.csv"
output_s3 = "s3://ml-pipeline-project-aniolmg/models/"

print(f"Using region: {region}")
print(f"Execution role: {role}")

Using region: eu-west-3
Execution role: arn:aws:iam::344809604964:role/sage-maker-full-acess-role


In [19]:
# --- Define and configure the XGBoost estimator ---

xgb_estimator = XGBoost(
    entry_point="train_model.py",
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    framework_version="1.7-1",
    py_version="py3",
    output_path=output_s3,
    hyperparameters={
        "max_depth": 8,
        "eta": 0.3,
        "objective": "binary:logistic",
        "num_round": 200,
    },
    sagemaker_session=sagemaker_session,
)


INFO:sagemaker.image_uris:Ignoring unnecessary Python version: py3.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: ml.m5.large.


In [20]:
# --- Define a pipeline session for CI/CD integration ---
pipeline_session = PipelineSession()

# --- Define a training step for the pipeline ---
train_step = TrainingStep(
    name="TrainTitanicModel",
    estimator=xgb_estimator,
    inputs={"train": TrainingInput(train_s3, content_type="csv")},
)

# --- Define model registration step ---
register_step = RegisterModel(
    name="RegisterTitanicModel",
    estimator=xgb_estimator,
    model_data=train_step.properties.ModelArtifacts.S3ModelArtifacts,
    content_types=["text/csv"],
    response_types=["text/csv"],
    inference_instances=["ml.m5.large", "ml.t2.medium"],
    transform_instances=["ml.m5.large"],
    model_package_group_name=model_package_group_name,
    approval_status="PendingManualApproval",  # Or "Approved" to allow auto-promotion
)

In [21]:
# --- Execute the training job manually ---
train_step.estimator.fit({"train": TrainingInput(train_s3, content_type="csv")})

# After completion, register model directly

inference_image = sagemaker.image_uris.retrieve(
    framework="xgboost",
    region=region,
    version="1.7-1",
    py_version="py3",
    image_scope="inference",
)

sm_client = boto3.client("sagemaker")

response = sm_client.create_model_package(
    ModelPackageGroupName=model_package_group_name,
    ModelPackageDescription="Auto-registration of Titanic model",
    InferenceSpecification={
        "Containers": [
            {
                "Image": inference_image,
                "ModelDataUrl": train_step.estimator.model_data,
            }
        ],
        "SupportedContentTypes": ["text/csv"],
        "SupportedResponseMIMETypes": ["text/csv"],
    },
    ModelApprovalStatus="PendingManualApproval",
)

print("✅ Model registered successfully!")
print("Model Package ARN:", response["ModelPackageArn"])


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-11-04-11-09-18-320


2025-11-04 11:09:20 Starting - Starting the training job...
2025-11-04 11:09:35 Starting - Preparing the instances for training...
2025-11-04 11:09:56 Downloading - Downloading input data...
  import pkg_resources[0m
[34m[2025-11-04 11:12:01.197 ip-10-0-248-97.eu-west-3.compute.internal:8 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2025-11-04 11:12:01.321 ip-10-0-248-97.eu-west-3.compute.internal:8 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2025-11-04:11:12:01:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2025-11-04:11:12:01:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2025-11-04:11:12:01:INFO] Invoking user training script.[0m
[34m[2025-11-04:11:12:02:INFO] Module train_model does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m[2025-11-04:11:12:02:INFO] Generating setup.cfg[0m
[34m[2025-11-04:11:12:02:INFO] Generating MANIFEST.in[0m
[34m[2025-11-04:11:12:02:INFO] Inst

INFO:sagemaker.image_uris:Ignoring unnecessary Python version: py3.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


Training seconds: 145
Billable seconds: 145
✅ Model registered successfully!
Model Package ARN: arn:aws:sagemaker:eu-west-3:344809604964:model-package/TitanicModel/4


In [22]:
# --- List registered versions in the Model Registry ---
packages = sm_client.list_model_packages(ModelPackageGroupName=model_package_group_name)
for pkg in packages["ModelPackageSummaryList"]:
    print(pkg["ModelPackageArn"], "|", pkg["ModelApprovalStatus"])


arn:aws:sagemaker:eu-west-3:344809604964:model-package/TitanicModel/4 | PendingManualApproval
arn:aws:sagemaker:eu-west-3:344809604964:model-package/TitanicModel/3 | Rejected
