In [17]:
import time
import yaml
from datetime import timezone, datetime
import boto3
from pathlib import Path
import sagemaker
from sagemaker import get_execution_role
from sagemaker import Session
from sagemaker.local import LocalSession
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.tuner import ContinuousParameter, IntegerParameter, HyperparameterTuner
from sagemaker import HyperparameterTuningJobAnalytics, Session
from smexperiments.experiment import Experiment
from smexperiments.search_expression import Filter, Operator, SearchExpression
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent

In [18]:
project_directory = Path.cwd() / "../"

In [19]:
params = yaml.safe_load(open('../code/params.yaml'))
hyperparameters = params["hyperparameters"]
directories = params["directories"]
datasets = params["datasets"]
sagemaker_params = yaml.safe_load(open('../params.yaml'))
train_params = sagemaker_params["train"]

In [20]:
boto_session = boto3.Session()
sagemaker_client = boto_session.client("sagemaker")
sagemaker_session = Session(
    boto_session=boto_session,
    sagemaker_client=sagemaker_client
)
region = boto_session.region_name
role = get_execution_role()
sagemaker_bucket = sagemaker_session.default_bucket()

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


In [21]:
bucket = 'cad-alok-singh'
folder = 'us_in_season_corn_yield/8_stages/V0/train_test_2020'
preprocessed_training_data = f's3://{bucket}/{folder}/train'
preprocessed_testing_data = f's3://{bucket}/{folder}/test'
output_path =  f's3://{sagemaker_bucket}/{bucket}/{folder}/model/Tuning'

In [6]:
!aws s3 cp $preprocessed_training_data ../data/train --recursive

download: s3://cad-alok-singh/us_in_season_corn_yield/8_stages/V0/train_test_2020/train/train.csv to ../data/train/train.csv


In [7]:
!aws s3 cp $preprocessed_testing_data ../data/test --recursive

download: s3://cad-alok-singh/us_in_season_corn_yield/8_stages/V0/train_test_2020/test/test.csv to ../data/test/test.csv


In [22]:
sklearn_estimator = SKLearn(
    source_dir =  str(project_directory / train_params['source_dir']),
    entry_point=train_params["entry_point"],
    framework_version=train_params["framework_version"], 
    instance_type=train_params["instance_type"],
    role=role,
    instance_count=train_params["instance_count"],
    tags=train_params["tags"],
    base_job_name=train_params["base_job_name"],
    output_path=train_params["output_path"],
    hyperparameters=hyperparameters,
    container_log_level=train_params["container_log_level"],
    volume_size=train_params["volume_size"],
    max_run=train_params["max_run"],
    # max_wait=train_params["max_wait"],
    enable_sagemaker_metrics=train_params["enable_sagemaker_metrics"],
    metric_definitions=train_params["metric_definitions"],
    use_spot_instances=train_params["use_spot_instances"],
    security_group_ids= train_params["security_group_ids"],
    subnets= train_params["subnets"],
)


INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


In [23]:
train_params["metric_definitions"]

[{'Name': 'test:mae', 'Regex': 'mae: ([\\d*\\.?\\d*]+);'},
 {'Name': 'test:mape', 'Regex': 'mape: ([\\d*\\.?\\d*]+);'},
 {'Name': 'test:rmse', 'Regex': 'rmse: ([\\d*\\.?\\d*]+);'}]

In [24]:
hyperparameter_ranges = {
    "learning_rate": ContinuousParameter(0.00001, 0.1, scaling_type="Logarithmic"),
    "iterations": IntegerParameter(50, 250),
    "max_depth": IntegerParameter(1, 10),
    "l2_leaf_reg": IntegerParameter(1, 10),
    "subsample":  ContinuousParameter(0.1, 1.0, scaling_type="Linear"),
}

In [28]:
base_tuning_job_name = 'gda-cad-yeild-tuning'

objective_metric_name = "test:mae"
tuner = HyperparameterTuner(
    sklearn_estimator,
    objective_metric_name,
    hyperparameter_ranges,
    [{"Name": "test:mae", "Regex": "mae: ([\d*\.?\d*]+);"}],
    max_jobs=10,
    max_parallel_jobs=2,
    objective_type="Minimize",
    base_tuning_job_name=base_tuning_job_name,
    tags=train_params["tags"]
)

In [29]:
tuner.fit(
    inputs = train_params["inputs"],
)

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating hyperparameter tuning job with name: gda-cad-yeild-tuning-221128-1054


......................................................................................................................!


In [30]:
tuner.best_training_job()

'gda-cad-yeild-tuning-221128-1054-008-72c0067a'

In [31]:
results = tuner.analytics()

In [32]:
results.tuning_ranges

{'iterations': {'Name': 'iterations',
  'MinValue': '50',
  'MaxValue': '250',
  'ScalingType': 'Auto'},
 'max_depth': {'Name': 'max_depth',
  'MinValue': '1',
  'MaxValue': '10',
  'ScalingType': 'Auto'},
 'l2_leaf_reg': {'Name': 'l2_leaf_reg',
  'MinValue': '1',
  'MaxValue': '10',
  'ScalingType': 'Auto'},
 'learning_rate': {'Name': 'learning_rate',
  'MinValue': '1e-05',
  'MaxValue': '0.1',
  'ScalingType': 'Logarithmic'},
 'subsample': {'Name': 'subsample',
  'MinValue': '0.1',
  'MaxValue': '1.0',
  'ScalingType': 'Linear'}}

In [33]:
results.dataframe()

Unnamed: 0,iterations,l2_leaf_reg,learning_rate,max_depth,subsample,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,164.0,7.0,0.013781,4.0,0.546979,gda-cad-yeild-tuning-221128-1054-010-007622c1,Completed,26.02,2022-11-28 11:03:50+00:00,2022-11-28 11:04:47+00:00,57.0
1,104.0,1.0,0.011841,7.0,0.186971,gda-cad-yeild-tuning-221128-1054-009-c2142add,Completed,26.51,2022-11-28 11:03:22+00:00,2022-11-28 11:04:18+00:00,56.0
2,162.0,6.0,0.016482,9.0,0.998799,gda-cad-yeild-tuning-221128-1054-008-72c0067a,Completed,25.389999,2022-11-28 11:02:23+00:00,2022-11-28 11:03:35+00:00,72.0
3,77.0,3.0,0.04107,1.0,0.758613,gda-cad-yeild-tuning-221128-1054-007-aa428cd6,Completed,28.059999,2022-11-28 11:01:54+00:00,2022-11-28 11:03:04+00:00,70.0
4,103.0,1.0,0.022519,4.0,0.1,gda-cad-yeild-tuning-221128-1054-006-b5c3e508,Completed,26.389999,2022-11-28 11:01:08+00:00,2022-11-28 11:02:05+00:00,57.0
5,121.0,1.0,9.8e-05,9.0,0.253468,gda-cad-yeild-tuning-221128-1054-005-fc8b0300,Completed,31.459999,2022-11-28 11:00:25+00:00,2022-11-28 11:01:32+00:00,67.0
6,185.0,4.0,0.010094,7.0,0.243931,gda-cad-yeild-tuning-221128-1054-004-8ad10e56,Completed,25.76,2022-11-28 10:59:07+00:00,2022-11-28 11:00:50+00:00,103.0
7,142.0,2.0,0.000476,8.0,0.184063,gda-cad-yeild-tuning-221128-1054-003-0efe622f,Completed,31.01,2022-11-28 10:59:06+00:00,2022-11-28 11:00:14+00:00,68.0
8,96.0,6.0,0.000836,8.0,0.669637,gda-cad-yeild-tuning-221128-1054-002-977934ac,Completed,30.889999,2022-11-28 10:57:06+00:00,2022-11-28 10:58:38+00:00,92.0
9,247.0,4.0,0.000389,1.0,0.221506,gda-cad-yeild-tuning-221128-1054-001-e586b12f,Completed,31.299999,2022-11-28 10:56:47+00:00,2022-11-28 10:58:24+00:00,97.0


In [34]:
list_tuning_jobs_response = sagemaker_client.list_hyper_parameter_tuning_jobs(
    SortBy="CreationTime", 
    SortOrder="Descending"
)
tuning_jobs = list_tuning_jobs_response["HyperParameterTuningJobSummaries"]
print(f'Found {len(tuning_jobs)} tuning jobs.')

Found 10 tuning jobs.


In [35]:
most_recently_created_tuning_job = tuning_jobs[0]

tuning_job_name = most_recently_created_tuning_job["HyperParameterTuningJobName"]
print(tuning_job_name)

gda-cad-yeild-tuning-221128-1054
