# Train, debug & profile Machine Learning Models
## Model training with AWS Sagemaker Training Job

Hyperparameters tuning is performed in this notebook. Then the resulting model artifacts are saved in the S3 Bucket. Here also, a custom script for hyperparameter tuning is used.
In the next notebook, we show training with the buil-in Sagemaker algorithms.


In [None]:
import os
import sagemaker
import logging
import boto3
import time
import pandas as pd
import json
import botocore
from botocore.exceptions import ClientError


# ========================== low-level service client of the boto3 session ==========================
config = botocore.config.Config(user_agent_extra='bedissj-1699438736259')


sm = boto3.client(service_name='sagemaker', 
                  config=config)

sm_runtime = boto3.client('sagemaker-runtime',
                          config=config)

sess = sagemaker.Session(sagemaker_client=sm,
                         sagemaker_runtime_client=sm_runtime)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name

In [None]:
train_data_s3_uri = 's3://{}/sagemaker-scikit-learn-2024-03-06-21-05-51-569/output/bank-churn-train/BankChurners_mon1.csv'.format(bucket)
validation_data_s3_uri = 's3://{}/sagemaker-scikit-learn-2024-03-06-21-05-51-569/output/bank-churn-validation/BankChurners_mon1.csv'.format(bucket)


FRAMEWORK_VERSION = '1.0-1'
instance_type = 'ml.m5.large'
instance_count = 1


# Model hyperparameters #
n_estimators = 200
max_depth = 5
criterion = 'gini'
random_state = 2024


In [None]:
metric_definitions = [
    {'Name': 'validation:precision', 'Regex': 'val_precision: ([0-9.]+)'},
    {'Name': 'validation:recall', 'Regex': 'val_recall: ([0-9.]+)'},
    {'Name': 'validation:f1Score', 'Regex': 'val_f1score: ([0-9.]+)'},
    {'Name': 'validation:ROCAUC', 'Regex': 'val_roc_auc: ([0-9.]+)'},
    {'Name': 'validation:accuracy', 'Regex': 'val_accuracy: ([0-9.]+)'}
]

In [None]:
hyperparameters = {
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'criterion': criterion,
    'random_state': random_state
}

In [None]:
from sagemaker.inputs import TrainingInput

data_channels = {
    'train': TrainingInput(s3_data=train_data_s3_uri),
    'validation': TrainingInput(s3_data=validation_data_s3_uri)
}

In [None]:
from sagemaker.sklearn.estimator import SKLearn

estimator = SKLearn(
    entry_pint='src/training.py',
    framework_version=FRAMEWORK_VERSION,
    instance_count=instance_count,
    instance_type=instance_type,
    role=role,
    metric_definitions=metric_definitions,
    hyperparameters=hyperparameters,
)

In [None]:
estimator.fit(
    inputs=data_channels, 
    wait=False,
    logs='All'
)