# Train, debug & profile Machine Learning Models
## 1. Training with Hyperparameter Tuning Jobs

In this notebook, we train the same **SKLearn** `RandomForestClassifier` to predict bank customers' churn, with the additional step of hyperparameters tuning. 

In [2]:
import os
import sagemaker
import logging
import boto3
import time
import pandas as pd
import json
import botocore
from botocore.exceptions import ClientError


# ========================== low-level service client of the boto3 session ==========================
config = botocore.config.Config(user_agent_extra='bedissj-1699438736259')


sm = boto3.client(service_name='sagemaker', 
                  config=config)

sm_runtime = boto3.client('sagemaker-runtime',
                          config=config)

sess = sagemaker.Session(sagemaker_client=sm,
                         sagemaker_runtime_client=sm_runtime)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [60]:
####### Training & Validation Datasets #######
train_data_s3_uri = 's3://{}/sagemaker-scikit-learn-2024-03-06-21-05-51-569/output/bank-churn-train/'.format(bucket)
validation_data_s3_uri = 's3://{}/sagemaker-scikit-learn-2024-03-06-21-05-51-569/output/bank-churn-validation/'.format(bucket)



####### Job parameters #######
FRAMEWORK_VERSION = '1.0-1'
instance_type = 'ml.m5.large'
instance_count = 1



####### Metric definitions #######
metric_definitions = [
    {'Name': 'validation:precision', 'Regex': 'val_precision: ([0-9.]+)'},
    {'Name': 'validation:recall', 'Regex': 'val_recall: ([0-9.]+)'},
    {'Name': 'validation:f1Score', 'Regex': 'val_f1score: ([0-9.]+)'},
    {'Name': 'validation:ROCAUC', 'Regex': 'val_roc_auc: ([0-9.]+)'},
    {'Name': 'validation:accuracy', 'Regex': 'val_accuracy: ([0-9.]+)'}
]

objective = 'validation:accuracy'


In [61]:
from sagemaker.parameter import CategoricalParameter, ContinuousParameter, IntegerParameter


####### Static hyperparameters #######
static_hyperparameters = {
    'random_state': 2024
}

####### Hyperparameter ranges #######
hyperparameter_ranges = {
    'n_estimators': IntegerParameter(min_value=10, 
                                     max_value=200, 
                                     scaling_type='Logarithmic'),
    
    'max_depth': IntegerParameter(min_value=3, 
                                  max_value=10, 
                                  scaling_type='Linear'),
    
    'criterion': CategoricalParameter(values=['gini', 'entropy'])
}


In [62]:
from sagemaker.sklearn.estimator import SKLearn


estimator = SKLearn(
        entry_point='src/training_hyperparameters.py',
        framework_version=FRAMEWORK_VERSION,
        instance_count=instance_count,
        instance_type=instance_type,
        role=role,
        hyperparameters=static_hyperparameters,
        metric_definitions= metric_definitions
)


In [63]:
from sagemaker.tuner import HyperparameterTuner


tuner = HyperparameterTuner(
        estimator=estimator,
        objective_metric_name=objective,
        hyperparameter_ranges=hyperparameter_ranges,
        metric_definitions=metric_definitions,
        strategy='Bayesian',
        objective_type='Maximize',
        max_jobs=12,                # maximum number of jobs to run
        max_parallel_jobs=1,        # maximum number of jobs to run in parallel
        early_stopping_type='Auto'
)


In [65]:
from sagemaker.inputs import TrainingInput

data_channels = {
    'train': TrainingInput(s3_data=train_data_s3_uri),
    'validation': TrainingInput(s3_data=validation_data_s3_uri)
}


In [66]:
tuner.fit(
    data_channels=data_channels,
    include_cls_metadata=False,
    wait=False,
)


No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config
No finished training job found associated with this estimator. Please make sure this estimator is only used for building workflow config


In [69]:
training_job = tuner.latest_tuning_job
training_job.wait()


...........................................................

KeyboardInterrupt: 

In [70]:
tuner.stop_tuning_job()