In [None]:
import logging

logging.getLogger("sagemaker.config").setLevel(logging.WARNING)
logging.getLogger("sagemaker.experiments.run").setLevel(logging.WARNING)

In [None]:
import os
import boto3
import sagemaker
from datetime import datetime
from sagemaker.experiments.run import Run
from sagemaker.sklearn.estimator import SKLearn

In [None]:
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role_arn = sagemaker.get_execution_role()
region = sagemaker_session.boto_region_name
s3_client = boto3.client('s3', region_name=region)
sagemaker_client = boto3.client('sagemaker')

In [None]:
s3_train_uri = "s3://sagemaker-us-east-1-891377032677/customerriskrating/processingjob/train"
s3_validation_uri = "sagemaker-us-east-1-891377032677/customerriskrating/processingjob/validation"
s3_test_uri = "s3://sagemaker-us-east-1-891377032677/customerriskrating/processingjob/test"

MODEL TRAINING XGBoost

In [None]:
FRAMEWORK = 'xgboost'
FRAMEWORK_VERSION = '1.5-1'

container = sagemaker.image_uris.retrieve(
    region=boto3.Session().region_name,
    framework=FRAMEWORK,
    version=FRAMEWORK_VERSION 
)
print(f"Container: {container}")

In [None]:
_datetime = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')

_prefix = "training-xgboost1p"

with Run(
    experiment_name = "CustomerRiskRating",
    run_name=f"{_prefix}-{_datetime}",
    sagemaker_session=sagemaker_session, 
) as run:
    
    job_name = f"{_prefix}-{_datetime}"

    xgb = sagemaker.estimator.Estimator(
        container,
        role_arn,
        instance_count=1,
        instance_type='ml.m4.xlarge',
        sagemaker_session=sagemaker_session
    )
    xgb.set_hyperparameters(
        max_depth=5,
        eta=0.2,
        gamma=4,
        min_child_weight=6,
        subsample=0.8,
        objective='binary:logistic',
        num_round=100
    )

    xgb.fit(
        {
            'train': sagemaker.inputs.TrainingInput(
                s3_data=s3_train_uri,
                content_type='csv'
            ),
            'validation': sagemaker.inputs.TrainingInput(
                s3_data=s3_validation_uri,
                content_type='csv'
            )
        },
        wait=False,
        job_name=job_name
    )

   

In [None]:
MODEL_PACKAGE_GROUP_NAME = "Customer-Risk-Rating"
MODEL_PACKAGE_GROUP_DESC = "Models trained on customer risk rating dataset to detect risk"

In [None]:
model_package_group_input_dict = {
    "ModelPackageGroupName" : MODEL_PACKAGE_GROUP_NAME,
    "ModelPackageGroupDescription" : MODEL_PACKAGE_GROUP_DESC
}
create_model_package_group_response = sagemaker_client.create_model_package_group(
    **model_package_group_input_dict
)
print(f'Created ModelPackageGroup Arn : {create_model_package_group_response["ModelPackageGroupArn"]}')

REGISTER MODELS TO MODEL GROUP 

In [None]:
best_training_job = "training-xgboost1p-2024-02-20-11-26-02"

In [None]:
best_session = sagemaker.estimator.Estimator.attach(best_training_job)

In [None]:
best_session.model_data

In [None]:
best_session.register(
    content_types=["application/json", "text/csv"],
    response_types=["application/json", "text/csv"],
    inference_instances=["ml.c5.xlarge", "ml.m5.xlarge"],
    transform_instances=["ml.c5.xlarge", "ml.m5.xlarge"],
    model_package_group_name=MODEL_PACKAGE_GROUP_NAME,
    approval_status="PendingManualApproval",
    description="Customer Risk Rating model using xgboost classifier",
    domain="MACHINE_LEARNING",
    framework="XGBOOST",
    framework_version=FRAMEWORK_VERSION,
    nearest_model_name="XGBoostClassification"
)


In [None]:
xgb_predictor = xgb.deploy(initial_instance_count=1,
                           instance_type='ml.m4.xlarge')