# Train, debug & profile Machine Learning Models
## 1. Train using a built-in AWS SageMaker Algorithm

In [3]:
import os
import sagemaker
import logging
import boto3
import time
import pandas as pd
import json
import botocore
from botocore.exceptions import ClientError


# ========================== low-level service client of the boto3 session ==========================
config = botocore.config.Config(user_agent_extra='bedissj-1699438736259')


sm = boto3.client(service_name='sagemaker', 
                  config=config)

sm_runtime = boto3.client('sagemaker-runtime',
                          config=config)

sess = sagemaker.Session(sagemaker_client=sm,
                         sagemaker_runtime_client=sm_runtime)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name


In [22]:
######## Compute resources & Xgboost framework version ########
FRAMEWORK_VERSION = '1.7-1'
instance_type = 'ml.m5.large'
instance_count = 1


######## Metric definitions ########
metric_definitions = [
    {'Name': 'validation:precision', 'Regex': 'val_precision: ([0-9.]+)'},
    {'Name': 'validation:recall', 'Regex': 'val_recall: ([0-9.]+)'},
    {'Name': 'validation:f1Score', 'Regex': 'val_f1score: ([0-9.]+)'},
    {'Name': 'validation:ROCAUC', 'Regex': 'val_roc_auc: ([0-9.]+)'},
    {'Name': 'validation:accuracy', 'Regex': 'val_accuracy: ([0-9.]+)'}
]


######## Tuner parameters ######## 
objective_metric = 'validation:accuracy'
tuning_strategy = 'Bayesian'


In [5]:
from sagemaker.parameter import CategoricalParameter, IntegerParameter, ContinuousParameter


######## Static hyperparmeters ########
hyperparameters_static = {
    'booster': 'gbtree',
    'objective': 'binary:logistic',
    'seed': 2024
} 


######## Hyperparameters ranges ########
hyperparameter_ranges = {
    'num_round': IntegerParameter(min_value=50,
                                   max_value=400,
                                   scaling_type='Auto'),
    
    'max_depth': IntegerParameter(min_value=3,
                                   max_value=7,
                                   scaling_type='Linear'),
    
    'alpha': ContinuousParameter(min_value=0,
                                max_value=.2,
                                scaling_type='Auto'),
    
    'eta': ContinuousParameter(min_value=.1,
                               max_value=.7,
                               scaling_type='Auto'),
    
    'gamma': ContinuousParameter(min_value=0,
                                 max_value=.2,
                                 scaling_type='Auto'),
    
    'lambda': ContinuousParameter(min_value=.3,
                                  max_value=1.2,
                                  scaling_type='Auto')
    
}


In [23]:
from sagemaker.estimator import Estimator


# ========================= Instanciate Xgboost estimator ========================
xgboost_container = sagemaker.image_uris.retrieve(framework='xgboost', region=region, version=FRAMEWORK_VERSION)

xgboost_estimator = Estimator(
        image_uri=xgboost_container,
        framework_version=FRAMEWORK_VERSION,
        instance_type=instance_type,
        instance_count=instance_count,
        role=role,
        metric_definitions=metric_definitions,
        hyperparameters=hyperparameters_static
)


In [24]:
from sagemaker.tuner import HyperparameterTuner


# =========================  Configure Hyperparameter   ========================
xgboost_tuner = HyperparameterTuner(
        estimator=xgboost_estimator,
        objective_metric_name=objective_metric,
        hyperparameter_ranges=hyperparameter_ranges,
        metric_definitions=metric_definitions,
        strategy=tuning_strategy,
        objective_type = 'Maximize',
        max_parallel_jobs=2,
        max_jobs=10,
        early_stopping_type='Auto'
    
)


In [26]:
from sagemaker.inputs import TrainingInput


train_data_s3_uri = 's3://{}/sagemaker-scikit-learn-2023-11-13-11-46-50-760/output/bank-churn-train/'.format(bucket)
validation_data_s3_uri = 's3://{}/sagemaker-scikit-learn-2023-11-13-11-46-50-760/output/bank-churn-validation/'.format(bucket)

# =========================  Configure data channels  ========================
data_channels = {
    'train': TrainingInput(s3_data=train_data_s3_uri),
    'validation': TrainingInput(s3_data=validation_data_s3_uri)
}


In [27]:
# =========================  Configure Hyperparameter   ========================
xgboost_tuner.fit(
    inputs=data_channels,
    wait=False,
    logs='All'
)


No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config
No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config


In [28]:
xgboost_tuner.stop_tuning_job()