# Train, debug & profile Machine Learning Models
## 1. Train & Profile built-in AWS SageMaker Algorithms

In [2]:
import os
import sagemaker
import logging
import boto3
import time
import pandas as pd
import json
import botocore
from botocore.exceptions import ClientError


# ========================== low-level service client of the boto3 session ==========================
config = botocore.config.Config(user_agent_extra='bedissj-1699438736259')


sm = boto3.client(service_name='sagemaker', 
                  config=config)

sm_runtime = boto3.client('sagemaker-runtime',
                          config=config)

sess = sagemaker.Session(sagemaker_client=sm,
                         sagemaker_runtime_client=sm_runtime)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [28]:
######## Compute resources & Xgboost framework version ########
FRAMEWORK_VERSION = '1.7-1'
instance_type = 'ml.m5.large'
instance_count = 1


######## Metric definitions ########
metric_definitions = [
    {'Name': 'validation:precision', 'Regex': 'val_precision: ([0-9.]+)'},
    {'Name': 'validation:recall', 'Regex': 'val_recall: ([0-9.]+)'},
    {'Name': 'validation:f1Score', 'Regex': 'val_f1score: ([0-9.]+)'},
    {'Name': 'validation:ROCAUC', 'Regex': 'val_roc_auc: ([0-9.]+)'},
    {'Name': 'validation:accuracy', 'Regex': 'val_accuracy: ([0-9.]+)'}
]


######## Tuner parameters ######## 
objective_metric = 'validation:accuracy'
tuning_strategy = 'Bayesian'


In [29]:
from sagemaker.parameter import CategoricalParameter, IntegerParameter, ContinuousParameter


######## Static hyperparmeters ########
hyperparameters_static = {
    'booster': 'gbtree',
    'objective': 'binary:logistic',
    'seed': 2024
} 


######## Hyperparameters ranges ########
hyperparameter_ranges = {
    'num_round': IntegerParameter(min_value=50,
                                   max_value=400,
                                   scaling_type='Auto'),
    
    'max_depth': IntegerParameter(min_value=3,
                                   max_value=7,
                                   scaling_type='Linear'),
    
    'alpha': ContinuousParameter(min_value=0,
                                max_value=.2,
                                scaling_type='Auto'),
    
    'eta': ContinuousParameter(min_value=.1,
                               max_value=.7,
                               scaling_type='Auto'),
    
    'gamma': ContinuousParameter(min_value=0,
                                 max_value=.2,
                                 scaling_type='Auto'),
    
    'lambda': ContinuousParameter(min_value=.3,
                                  max_value=1.2,
                                  scaling_type='Auto')
    
}


In [62]:
from sagemaker.debugger import Rule, ProfilerRule, rule_configs 
from sagemaker.debugger import DebuggerHookConfig
from sagemaker.debugger import ProfilerConfig, FrameworkProfile

# ==========================  Rules for Xgboost & System monitoring ==========================
rules = [
    ## Debugger - XGboost ##
    Rule.sagemaker(rule_configs.confusion()),
    Rule.sagemaker(rule_configs.tree_depth()),
    Rule.sagemaker(rule_configs.feature_importance_overweight()),
    
    ## Profiler - System ##
    ProfilerRule.sagemaker(rule_configs.CPUBottleneck()),
    ProfilerRule.sagemaker(rule_configs.LowGPUUtilization()),
    ProfilerRule.sagemaker(rule_configs.OverallSystemUsage())
]


debugger_hook_config = DebuggerHookConfig()

# ==========================  Profiler configuration ==========================
profiler_config = ProfilerConfig(
    system_monitor_interval_millis=500,
    framework_profile_params=FrameworkProfile(local_path="/opt/ml/output/profiler/", start_step=5, num_steps=10)
)


See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [64]:
from sagemaker.estimator import Estimator


# ========================= Instanciate Xgboost estimator ========================
xgboost_container = sagemaker.image_uris.retrieve(framework='xgboost', region=region, version=FRAMEWORK_VERSION)

xgboost_estimator = Estimator(
        image_uri=xgboost_container,
        framework_version=FRAMEWORK_VERSION,
        instance_type=instance_type,
        instance_count=instance_count,
        role=role,
        metric_definitions=metric_definitions,
        hyperparameters=hyperparameters_static,
        rules=rules,
        profiler_config=profiler_config
)


INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [31]:
from sagemaker.tuner import HyperparameterTuner


# =========================  Configure Hyperparameter   ========================
xgboost_tuner = HyperparameterTuner(
        estimator=xgboost_estimator,
        objective_metric_name=objective_metric,
        hyperparameter_ranges=hyperparameter_ranges,
        metric_definitions=metric_definitions,
        strategy=tuning_strategy,
        objective_type = 'Maximize',
        max_parallel_jobs=2,
        max_jobs=10,
        early_stopping_type='Auto'
    
)


In [32]:
from sagemaker.inputs import TrainingInput


train_data_s3_uri = 's3://{}/sagemaker-scikit-learn-2024-06-07-12-22-31-123/output/bank-churn-train/'.format(bucket)
validation_data_s3_uri = 's3://{}/sagemaker-scikit-learn-2024-06-07-12-22-31-123/output/bank-churn-validation/'.format(bucket)

# =========================  Configure data channels  ========================
content_type = 'text/csv'
data_channels = {
    'train': TrainingInput(s3_data=train_data_s3_uri, content_type=content_type),
    'validation': TrainingInput(s3_data=validation_data_s3_uri, content_type=content_type)
}


In [33]:
# =========================  Configure Hyperparameter   ========================
xgboost_tuner.fit(
    inputs=data_channels,
    wait=False,
    logs='All'
)


INFO:sagemaker:Creating hyperparameter tuning job with name: sagemaker-xgboost-240607-1421


In [54]:
from pprint import pprint


best_training_job = xgboost_tuner.describe().get('BestTrainingJob')
pprint(best_training_job)

{'CreationTime': datetime.datetime(2024, 6, 7, 14, 21, 28, tzinfo=tzlocal()),
 'FinalHyperParameterTuningJobObjectiveMetric': {'MetricName': 'validation:accuracy',
                                                 'Value': 1.0},
 'ObjectiveStatus': 'Succeeded',
 'TrainingEndTime': datetime.datetime(2024, 6, 7, 14, 24, 17, tzinfo=tzlocal()),
 'TrainingJobArn': 'arn:aws:sagemaker:eu-west-3:668303144976:training-job/sagemaker-xgboost-240607-1421-001-f6c347fa',
 'TrainingJobName': 'sagemaker-xgboost-240607-1421-001-f6c347fa',
 'TrainingJobStatus': 'Completed',
 'TrainingStartTime': datetime.datetime(2024, 6, 7, 14, 22, 12, tzinfo=tzlocal()),
 'TunedHyperParameters': {'alpha': '0.10520007714476219',
                          'eta': '0.3653354300270232',
                          'gamma': '0.07565527678531392',
                          'lambda': '0.918232832083758',
                          'max_depth': '5',
                          'num_round': '328'}}
