In [1]:
import time
import yaml
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn import metrics
from pathlib import Path
from cloudpathlib import S3Path
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker import Session
from sagemaker.local import LocalSession
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.sklearn.model import SKLearnModel
from sagemaker import s3

from sagemaker.analytics import ExperimentAnalytics
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker

In [2]:
project_directory = Path.cwd() / "../"

In [3]:
params = yaml.safe_load(open('../code/params.yaml'))
hyperparameters = params["hyperparameters"]
directories = params["directories"]
datasets = params["datasets"]
sagemaker_params = yaml.safe_load(open('../params.yaml'))
train_params = sagemaker_params["train"]
model_params = sagemaker_params["model"]
inference_params = sagemaker_params["inference"]

In [4]:
boto_session = boto3.Session()
sagemaker_client = boto_session.client("sagemaker")
sagemaker_session = Session(
    boto_session=boto_session,
    sagemaker_client=sagemaker_client
)
region = boto_session.region_name
role = get_execution_role()
sagemaker_bucket = sagemaker_session.default_bucket()

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


In [5]:
train_s3_dir = train_params["inputs"]["train"]
test_s3_dir = train_params["inputs"]["test"]

In [6]:
!aws s3 cp $train_s3_dir ../data/train --recursive

download: s3://cad-alok-singh/us_in_season_corn_yield/8_stages/V0/train_test_2020/train/train.csv to ../data/train/train.csv


In [7]:
!aws s3 cp $test_s3_dir ../data/test --recursive

download: s3://cad-alok-singh/us_in_season_corn_yield/8_stages/V0/train_test_2020/test/test.csv to ../data/test/test.csv


In [8]:
# create the experiment if it doesn't exist
experiment_name = "gda-yield-experiment"
trial_name = "gda-yield-trial"
job_name = f"gda-yield-training-job"

In [9]:
# create the experiment if it doesn't exist
try:
    experiment = Experiment.load(experiment_name=experiment_name,)
except Exception as ex:
    if "ResourceNotFound" in str(ex):
        experiment = Experiment.create(
            experiment_name=experiment_name,
            description="Predict yield",
            tags=train_params["tags"],
            sagemaker_boto_client=sagemaker_client,
        )
    
for iterations in [50, 100, 150, 200, 250]:
    
    # create the trial if it doesn't exist
    try:
        trial = Trial.load(trial_name=f"{trial_name}-iterations-{iterations}")
    except Exception as ex:
        if "ResourceNotFound" in str(ex):
            trial = Trial.create(
                experiment_name=experiment.experiment_name, 
                trial_name=f"{trial_name}-iterations-{iterations}",
                tags=train_params["tags"],
                sagemaker_boto_client=sagemaker_client,
            )

    hyperparameters["iterations"] = iterations

    sklearn_estimator = SKLearn(
        source_dir =  str(project_directory / train_params['source_dir']),
        entry_point=train_params["entry_point"],
        framework_version=train_params["framework_version"], 
        instance_type=train_params["instance_type"],
        role=role,
        instance_count=train_params["instance_count"],
        tags=train_params["tags"],
        base_job_name=train_params["base_job_name"],
        output_path=train_params["output_path"],
        hyperparameters=hyperparameters,
        container_log_level=train_params["container_log_level"],
        volume_size=train_params["volume_size"],
        max_run=train_params["max_run"],
        # max_wait=train_params["max_wait"],
        enable_sagemaker_metrics=train_params["enable_sagemaker_metrics"],
        metric_definitions=train_params["metric_definitions"],
        use_spot_instances=train_params["use_spot_instances"],
        security_group_ids= train_params["security_group_ids"],
        subnets= train_params["subnets"],
    )
    
    time_stamp = datetime.now().strftime('%Y%m%d-%H%M%S')
    
    sklearn_estimator.fit(
        inputs = train_params["inputs"],
        experiment_config={
                "TrialName": trial.trial_name,
                "TrialComponentDisplayName": "Training",
            },
        wait=True
    )

    # Wait two seconds before dispatching the next training job
    time.sleep(2)

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: gda-yield-training-job-2022-11-28-11-16-39-869


2022-11-28 11:16:40 Starting - Starting the training job...
2022-11-28 11:17:07 Starting - Preparing the instances for trainingProfilerReport-1669634200: InProgress
............
2022-11-28 11:19:04 Downloading - Downloading input data...
2022-11-28 11:19:44 Training - Training image download completed. Training in progress...[34m2022-11-28 11:19:53,131 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-11-28 11:19:53,134 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-11-28 11:19:53,148 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-11-28 11:19:53,337 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/miniconda3/bin/python -m pip install -r requirements.txt[0m
[34mCollecting PyYAML
  Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: gda-yield-training-job-2022-11-28-11-21-00-276


2022-11-28 11:21:01 Starting - Starting the training job...
2022-11-28 11:21:29 Starting - Preparing the instances for trainingProfilerReport-1669634460: InProgress
............
2022-11-28 11:23:32 Downloading - Downloading input data
2022-11-28 11:23:32 Training - Downloading the training image...
2022-11-28 11:23:53 Training - Training image download completed. Training in progress..[34m2022-11-28 11:24:05,123 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-11-28 11:24:05,126 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-11-28 11:24:05,137 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-11-28 11:24:05,338 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/miniconda3/bin/python -m pip install -r requirements.txt[0m
[34mCollecting PyYAML
  Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manyl

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: gda-yield-training-job-2022-11-28-11-25-20-948


2022-11-28 11:25:21 Starting - Starting the training job...
2022-11-28 11:25:48 Starting - Preparing the instances for trainingProfilerReport-1669634721: InProgress
.........
2022-11-28 11:27:20 Downloading - Downloading input data...
2022-11-28 11:27:46 Training - Downloading the training image...
2022-11-28 11:28:26 Training - Training image download completed. Training in progress.[34m2022-11-28 11:28:17,395 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-11-28 11:28:17,398 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-11-28 11:28:17,408 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-11-28 11:28:17,602 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/miniconda3/bin/python -m pip install -r requirements.txt[0m
[34mCollecting PyYAML
  Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manyli

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: gda-yield-training-job-2022-11-28-11-29-42-543


2022-11-28 11:29:43 Starting - Starting the training job...
2022-11-28 11:30:08 Starting - Preparing the instances for trainingProfilerReport-1669634983: InProgress
.....................
2022-11-28 11:33:47 Downloading - Downloading input data...
2022-11-28 11:34:07 Training - Downloading the training image...
2022-11-28 11:34:47 Training - Training image download completed. Training in progress.[34m2022-11-28 11:34:35,822 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-11-28 11:34:35,825 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-11-28 11:34:35,836 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-11-28 11:34:36,037 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/miniconda3/bin/python -m pip install -r requirements.txt[0m
[34mCollecting PyYAML
  Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: gda-yield-training-job-2022-11-28-11-36-07-205


2022-11-28 11:36:07 Starting - Starting the training job...
2022-11-28 11:36:34 Starting - Preparing the instances for trainingProfilerReport-1669635367: InProgress
............
2022-11-28 11:38:38 Downloading - Downloading input data
2022-11-28 11:38:38 Training - Downloading the training image...
2022-11-28 11:39:11 Training - Training image download completed. Training in progress.[34m2022-11-28 11:39:03,520 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2022-11-28 11:39:03,524 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2022-11-28 11:39:03,536 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2022-11-28 11:39:03,781 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/miniconda3/bin/python -m pip install -r requirements.txt[0m
[34mCollecting PyYAML
  Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manyli

In [10]:
search_expression = {
    "Filters": [
        {
            "Name": "DisplayName",
            "Operator": "Equals",
            "Value": "Training",
        }
    ],
}

In [11]:
trial_component_analytics = ExperimentAnalytics(
    sagemaker_session=Session(boto_session, sagemaker_client),
    experiment_name=experiment.experiment_name,
    search_expression=search_expression,
    sort_by="metrics.test:mae.min",
    sort_order="Descending",
    metric_names=["test:mae"],
    parameter_names=[ "iterations"],
    # parameter_names=["learning_rate", "iterations", "max_depth", "l2_leaf_reg", "subsample", "random_state", "loss_function"],
)

In [12]:
trial_component_analytics.dataframe()

Unnamed: 0,TrialComponentName,DisplayName,SourceArn,iterations,test:mae - Min,test:mae - Max,test:mae - Avg,test:mae - StdDev,test:mae - Last,test:mae - Count,test - MediaType,test - Value,train - MediaType,train - Value,SageMaker.DebugHookOutput - MediaType,SageMaker.DebugHookOutput - Value,SageMaker.ModelArtifact - MediaType,SageMaker.ModelArtifact - Value,Trials,Experiments
0,gda-yield-training-job-2022-11-28-11-36-07-205...,Training,arn:aws:sagemaker:eu-central-1:226275233641:tr...,250.0,0.0,0.0,0.0,0.0,24.379999,0,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,[gda-yield-trial-iterations-250],[gda-yield-experiment]
1,gda-yield-training-job-2022-11-28-11-16-39-869...,Training,arn:aws:sagemaker:eu-central-1:226275233641:tr...,50.0,0.0,0.0,0.0,0.0,24.98,0,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,[gda-yield-trial-iterations-50],[gda-yield-experiment]
2,gda-yield-training-job-2022-11-28-11-25-20-948...,Training,arn:aws:sagemaker:eu-central-1:226275233641:tr...,150.0,0.0,0.0,0.0,0.0,24.219999,0,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,[gda-yield-trial-iterations-150],[gda-yield-experiment]
3,gda-yield-training-job-2022-11-28-11-29-42-543...,Training,arn:aws:sagemaker:eu-central-1:226275233641:tr...,200.0,0.0,0.0,0.0,0.0,24.290001,0,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,[gda-yield-trial-iterations-200],[gda-yield-experiment]
4,gda-yield-training-job-2022-11-28-11-21-00-276...,Training,arn:aws:sagemaker:eu-central-1:226275233641:tr...,100.0,0.0,0.0,0.0,0.0,24.360001,0,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://cad-alok-singh/us_in_season_corn_yield/8_...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,,s3://sagemaker-eu-central-1-226275233641/cad-a...,[gda-yield-trial-iterations-100],[gda-yield-experiment]


In [13]:
model_data = S3Path(trial_component_analytics.dataframe().iloc[0]['SageMaker.ModelArtifact - Value'])
output_data = model_data.parent / "output.tar.gz"

INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials


In [14]:
sklearn_inference = SKLearnModel(
    model_data=str(model_data),
    role=role,
    source_dir =str(project_directory / model_params['source_dir']),
    entry_point=model_params["entry_point"],
    framework_version=model_params["framework_version"], 
    name=model_params["name"],
    sagemaker_session=sagemaker_session
)

In [15]:
predictor = sklearn_inference.deploy(
    endpoint_name=inference_params["endpoint_name"],
    instance_type=inference_params["instance_type"],
    initial_instance_count=inference_params["initial_instance_count"],
    security_group_ids= inference_params["security_group_ids"],
    subnets= inference_params["subnets"],
    tags=inference_params["tags"],
)

INFO:sagemaker:Creating model with name: gda-yeild-infrence-provisioned
INFO:sagemaker:Creating endpoint-config with name gda-yeild-infrence-provisioned
INFO:sagemaker:Creating endpoint with name gda-yeild-infrence-provisioned


------!

In [16]:
df = pd.read_csv("../data/test/test.csv")
df = df.drop(datasets['drop_columns'], axis=1)

In [17]:
X_test = df.drop(datasets['y_column'], axis=1)
y_true = df[datasets['y_column']]

In [18]:
y_pred = predictor.predict(X_test)

In [19]:
mae = metrics.mean_absolute_error(y_true, y_pred)
rmse = (np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
r2 = metrics.r2_score(y_true, y_pred)

print("Testing performance")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R2: {r2:.2f}")

Testing performance
MAE: 24.38
RMSE: 29.67
R2: 0.37


In [20]:
predictor.delete_model()
predictor.delete_endpoint()

INFO:sagemaker:Deleting model with name: gda-yeild-infrence-provisioned
INFO:sagemaker:Deleting endpoint configuration with name: gda-yeild-infrence-provisioned
INFO:sagemaker:Deleting endpoint with name: gda-yeild-infrence-provisioned


In [21]:
def cleanup_sme_sdk(experiment):
    for trial_summary in experiment.list_trials():
        trial = Trial.load(trial_name=trial_summary.trial_name)
        for trial_component_summary in trial.list_trial_components():
            tc = TrialComponent.load(
                trial_component_name=trial_component_summary.trial_component_name)
            trial.remove_trial_component(tc)
            try:
                # comment out to keep trial components
                tc.delete()
            except:
                # tc is associated with another trial
                continue
            # to prevent throttling
            time.sleep(.5)
        trial.delete()
        experiment_name = experiment.experiment_name
    experiment.delete()
    print(f"\nExperiment {experiment_name} deleted")

In [22]:
experiment

Experiment(sagemaker_boto_client=<botocore.client.SageMaker object at 0x14a1146a0>,experiment_name='gda-yield-experiment',description='Predict yield',tags=[{'Key': 'Application', 'Value': 'GDA'}, {'Key': 'Cost Center', 'Value': '62644'}],experiment_arn='arn:aws:sagemaker:eu-central-1:226275233641:experiment/gda-yield-experiment',response_metadata={'RequestId': 'c73cda57-aff0-4734-9686-cc98e5d0aad0', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'c73cda57-aff0-4734-9686-cc98e5d0aad0', 'content-type': 'application/x-amz-json-1.1', 'content-length': '95', 'date': 'Mon, 28 Nov 2022 11:16:39 GMT'}, 'RetryAttempts': 0})

In [23]:
# cleanup_sme_sdk(experiment)