# Import

In [43]:
import os
import pandas as pd

import sagemaker
import boto3

from sagemaker.sklearn.estimator import SKLearn

from secret.config import ROLE

# SageMaker Setting

In [44]:
AWS_ACCESS_KEYS = pd.read_csv(os.path.join(os.getcwd(), "secret", "AccessKeys.csv"))

In [45]:
os.environ["AWS_DEFAULT_REGION"] = 'ap-southeast-2'

boto_session = boto3.Session(
    aws_access_key_id = AWS_ACCESS_KEYS.loc[0,'Access key ID'],
    aws_secret_access_key = AWS_ACCESS_KEYS.loc[0,'Secret access key'],
    region_name = os.environ['AWS_DEFAULT_REGION']
)
sagemaker_session = sagemaker.local.LocalSession(
    boto_session=boto_session,
    default_bucket="mlflow-artifacts-dir"
)

# Define & Train Estimator

In [46]:
ESTIMATOR_CFG = {
    'entry_point' : "./script/train_sklearn.py",
    'role' : ROLE,
    'framework_version' : '1.2-1',
    'py_version' : 'py3',
    'instance_type' : 'local',
    'instance_count' : 1,
    'output_path' : 's3://mlflow-tracking-dir/',
    'sagemaker_session' : sagemaker_session
}

PARAMETERS = {
    'n_estimators' : 200,
    'max_depth' : 7,
    'max_features' : 5,
    'experiment_name' : 'sklearn-estimator-local-lab',
    'run_name' : 'rf-clf-exp-1'
}

In [47]:
class RandomForestCLFEstimator:
    def __init__(self, estimator_cfg, param_set):
        self.estimator = SKLearn(
            entry_point=estimator_cfg['entry_point'],
            role=estimator_cfg['role'],
            framework_version=estimator_cfg['framework_version'],
            py_version=estimator_cfg['py_version'],
            instance_type=estimator_cfg['instance_type'],
            instance_count=estimator_cfg['instance_count'],
            sagemaker_session=estimator_cfg['sagemaker_session'],
            output_path=estimator_cfg['output_path'],
            hyperparameters=param_set
        )
    
    def fit(self):
        self.estimator.fit()

In [50]:
trainer = RandomForestCLFEstimator(
    estimator_cfg=ESTIMATOR_CFG,
    param_set=PARAMETERS
)

In [51]:
trainer.fit()

INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2025-08-23-07-34-43-900
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.local.image:'Docker Compose' found

 Container dbmgh86agn-algo-1-hd566  Creating
 algo-1-hd566 The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested 
 Container dbmgh86agn-algo-1-hd566  Created
Attaching to dbmgh86agn-algo-1-hd566
dbmgh86agn-algo-1-hd566  |   import pkg_resources
dbmgh86agn-algo-1-hd566  | 2025-08-23 07:34:45,419 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
dbmgh86agn-algo-1-hd566  | 2025-08-23 07:34:45,422 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
dbmgh86agn-algo-1-hd566  | 2025-08-23 07:34:45,424 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)
dbmgh86agn-algo-1-hd566  | 2025-08-23 07:34:45,428 sagemaker-training-toolkit INFO     instance_groups entry not present in resource_config
dbmgh86agn-algo-1-hd566  | 2025-08-23 07:34:45,451 sagemaker_sklearn_container.training INFO     Invoking user trai

INFO:sagemaker.local.image:===== Job Complete =====
