# Train and deploy the model

Training to deploy a model, and training a model with Python SDK

---

## Import and everything

In [3]:
import sys
!{sys.executable} -m pip install sagemaker-experiments
!{sys.executable} -m pip install --upgrade sagemaker

Collecting sagemaker-experiments
  Using cached sagemaker_experiments-0.1.45-py3-none-any.whl (42 kB)
Installing collected packages: sagemaker-experiments
Successfully installed sagemaker-experiments-0.1.45
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting sagemaker
  Using cached sagemaker-2.159.0-py2.py3-none-any.whl
Collecting attrs<24,>=23.1.0
  Using cached attrs-23.1.0-py3-none-any.whl (61 kB)
Collecting boto3<2.0,>=1.26.131
  Using cached boto3-1.26.140-py3-none-any.whl (135 kB)
Collecting importlib-metadata<5.0,>=1.4.0
  Using cached importlib_metadata-4.13.0-py3-none-any.whl (23 kB)
Collecting tblib==1.7.0
  Using cached tblib-1.7.0-py2.py3-none-any.whl (12 kB)
Collecting botocore<1.30.0,>=1.29.140
  Using cached botocore-1.29.140-py3-none-any.whl (10.8 MB)
Insta

In [33]:
import sagemaker

sess = sagemaker.Session()
bucket = "test-sagemaker-examples-1357942113492"
prefix = "Exp_2"

train_loc = "Train/Wine-Quality-2023-05-22T11-12-10/part-00000-5812d1e3-d6a0-4cc1-afd8-1f22f194c20b-c000.csv"
test_loc = "Test/Wine-Quality-2023-05-22T11-12-10/part-00000-272ca3b5-c289-4a25-ad9d-cfd7e98bd977-c000.csv"
validation_loc = "Validation/Wine-Quality-2023-05-22T11-12-10/part-00000-61ee30c8-9b7c-4e79-bad1-1277435f4268-c000.csv"

role = sagemaker.get_execution_role()

In [34]:
import os
import pandas as pd
import numpy as np
import boto3
import time

from sagemaker.serializers import CSVSerializer
from sagemaker.inputs import TrainingInput

from time import strftime, gmtime
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker
from botocore.exceptions import ClientError

## Test to read CSV

In [35]:
test = pd.read_csv(f"s3://{bucket}/{prefix}/{test_loc}")
test

Unnamed: 0,quality,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,If_White_Then_1
0,3.0,7.0,0.270,0.36,20.7,0.045,45,170,1.00100,3.00,0.45,8.8,1
1,2.0,8.6,0.230,0.40,4.2,0.035,17,109,0.99470,3.14,0.53,9.7,1
2,5.0,6.2,0.660,0.48,1.2,0.029,29,75,0.98920,3.33,0.39,12.8,1
3,3.0,7.4,0.340,0.42,1.1,0.033,17,171,0.99170,3.12,0.53,11.3,1
4,4.0,7.2,0.320,0.36,2.0,0.033,37,114,0.99060,3.10,0.71,12.3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1255,3.0,6.7,0.670,0.02,1.9,0.061,26,42,0.99489,3.39,0.82,10.9,0
1256,3.0,6.7,0.160,0.64,2.1,0.059,24,52,0.99494,3.34,0.71,11.2,0
1257,2.0,6.2,0.560,0.09,1.7,0.053,24,32,0.99402,3.54,0.60,11.3,0
1258,2.0,6.1,0.715,0.10,2.6,0.053,13,27,0.99362,3.57,0.50,11.9,0


In [36]:
train = pd.read_csv(f"s3://{bucket}/{prefix}/{train_loc}")
train

Unnamed: 0,quality,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,If_White_Then_1
0,3.0,7.0,0.270,0.36,20.7,0.045,45,170,1.00100,3.00,0.45,8.8,1
1,3.0,6.3,0.300,0.34,1.6,0.049,14,132,0.99400,3.30,0.49,9.5,1
2,3.0,8.1,0.280,0.40,6.9,0.050,30,97,0.99510,3.26,0.44,10.1,1
3,3.0,7.2,0.230,0.32,8.5,0.058,47,186,0.99560,3.19,0.40,9.9,1
4,3.0,7.2,0.230,0.32,8.5,0.058,47,186,0.99560,3.19,0.40,9.9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4031,2.0,6.2,0.600,0.08,2.0,0.090,32,44,0.99490,3.45,0.58,10.5,0
4032,3.0,5.9,0.550,0.10,2.2,0.062,39,51,0.99512,3.52,0.76,11.2,0
4033,3.0,6.3,0.510,0.13,2.3,0.076,29,40,0.99574,3.42,0.75,11.0,0
4034,2.0,5.9,0.645,0.12,2.0,0.075,32,44,0.99547,3.57,0.71,10.2,0


## Create Experiments and Trials

In [37]:
create_date = strftime("%Y-%m-%d-%H-%M-%S", gmtime())
exp_name = "Wine-Quality-Experiment"
exp_desc = "Practise Project"

try:
    experiment = Experiment.create(experiment_name=exp_name.format(create_date), 
                                   description=exp_desc)
except ClientError as e:
    print(f'{exp_name} already exists and will be reused.')

Wine-Quality-Experiment already exists and will be reused.


In [38]:
trial_name = "Wine-Quality-Trial-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

demo_trial = Trial.create(trial_name = trial_name.format(create_date),
                          experiment_name = exp_name)

print("Trial Name, ", trial_name)

Trial Name,  Wine-Quality-Trial-2023-05-25-10-17-24


## Train

In [39]:
s3_input_train = TrainingInput(
    s3_data=f"s3://{bucket}/{prefix}/{train_loc}",
    content_type="csv",
)
s3_input_validation = TrainingInput(
    s3_data=f"s3://{bucket}/{prefix}/{validation_loc}",
    content_type="csv",
)

In [40]:
container = sagemaker.image_uris.retrieve("xgboost", sess.boto_region_name, '1.5-1')

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [41]:
exp_config = {"ExperimentName": exp_name,
              "TrialName": trial_name,
              "TrialComponentDisplayName": "TrainingJob"}

In [42]:
hyper_par = {
    "max_depth":"5",
    "eta": "0.2",
    "gamma":"4",
    "min_child_weight":"6",
    "subsample": "0.8",
    "objective": "multi:softmax",
    "eval_metric": "merror",
    "num_round":"100",
    "num_class":"10",}

output_path = f"s3://{bucket}/{prefix}/Output"

train_use_spot = True
train_max_run = 1800
train_max_wait = 3600 if train_use_spot else None
checkpoint_s3_uri = (f's3://{bucket}/{prefix}/checkpoints/{trial_name}' if train_use_spot else none)



xgb = sagemaker.estimator.Estimator(
    image_uri=container,
    hyperparameters=hyper_par,
    role=role,
    instance_count=1,
    instance_type="ml.m4.xlarge",
    output_path=output_path,
    sagemaker_session=sess,
    use_spot_instances=train_use_spot,
    max_run=train_max_run,
    max_wait=train_max_wait,
    checkpoint_s3_uri=checkpoint_s3_uri
)

In [43]:
xgb.fit({"train": s3_input_train, "validation": s3_input_validation}, experiment_config=exp_config)

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-05-25-10-18-48-314


2023-05-25 10:18:48 Starting - Starting the training job...
2023-05-25 10:19:12 Starting - Preparing the instances for training......
2023-05-25 10:20:15 Downloading - Downloading input data...
2023-05-25 10:20:41 Training - Downloading the training image......
2023-05-25 10:21:31 Training - Training image download completed. Training in progress.[34m[2023-05-25 10:21:42.542 ip-10-0-159-72.eu-west-1.compute.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2023-05-25 10:21:42.618 ip-10-0-159-72.eu-west-1.compute.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2023-05-25:10:21:43:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2023-05-25:10:21:43:INFO] Failed to parse hyperparameter eval_metric value merror to Json.[0m
[34mReturning the value itself[0m
[34m[2023-05-25:10:21:43:INFO] Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34m[

In [44]:
from sagemaker.tuner import IntegerParameter, ContinuousParameter, HyperparameterTuner

n_jobs = 20
n_parallel_jobs = 3

hpt_ranges = {
    'eta': ContinuousParameter(0.1, .5),
    'min_child_weight': ContinuousParameter(0., 10.),
    'max_depth': IntegerParameter(1, 10),
    'gamma': IntegerParameter(2,8),
    'subsample': ContinuousParameter(0.1, 1.)
}


tuner_parameters = {
    'estimator': xgb,
    'base_tuning_job_name': 'bayesian',                   
    'objective_metric_name': 'validation:accuracy',
    'objective_type': 'Maximize',
    'hyperparameter_ranges': hpt_ranges,
    'strategy': 'Bayesian',
    'max_jobs': n_jobs,
    'max_parallel_jobs': n_parallel_jobs
}


In [45]:
tuner = HyperparameterTuner(**tuner_parameters)
tuner.fit({"train": s3_input_train, "validation": s3_input_validation}, wait=False)
tuner_name = tuner.describe()['HyperParameterTuningJobName']
print(f'Tuning job in progress: {tuner_name}')

INFO:sagemaker:Creating hyperparameter tuning job with name: bayesian-230525-1025


Tuning job in progress: bayesian-230525-1025


In [46]:
tuner.wait()

........................................................................................................................................................................................................................................................................................................!


In [56]:
pd.set_option("display.max_columns", 500)
sagemaker.HyperparameterTuningJobAnalytics(tuner_name).dataframe()[:10]

Unnamed: 0,eta,gamma,max_depth,min_child_weight,subsample,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,0.223843,2.0,8.0,5.498946,0.835093,bayesian-230525-1025-020-c6a3a2db,Completed,0.63429,2023-05-25 10:49:20+00:00,2023-05-25 10:51:12+00:00,112.0
1,0.413506,2.0,8.0,0.240721,0.882182,bayesian-230525-1025-019-dc869303,Completed,0.63925,2023-05-25 10:49:21+00:00,2023-05-25 10:51:08+00:00,107.0
2,0.474937,7.0,10.0,3.086998,0.368576,bayesian-230525-1025-018-8e7324ce,Completed,0.55699,2023-05-25 10:45:54+00:00,2023-05-25 10:47:41+00:00,107.0
3,0.403181,2.0,8.0,4.414952,0.983488,bayesian-230525-1025-017-f486fa0b,Completed,0.64817,2023-05-25 10:45:49+00:00,2023-05-25 10:47:41+00:00,112.0
4,0.187881,2.0,8.0,3.05934,0.648387,bayesian-230525-1025-016-5dba57d2,Completed,0.64024,2023-05-25 10:45:45+00:00,2023-05-25 10:47:37+00:00,112.0
5,0.425849,2.0,4.0,0.535063,0.979384,bayesian-230525-1025-015-32ad6dec,Completed,0.61943,2023-05-25 10:41:54+00:00,2023-05-25 10:43:42+00:00,108.0
6,0.22967,6.0,1.0,6.22976,0.986678,bayesian-230525-1025-014-5d124387,Completed,0.56194,2023-05-25 10:41:56+00:00,2023-05-25 10:43:44+00:00,108.0
7,0.133803,2.0,8.0,0.374678,0.972953,bayesian-230525-1025-013-00de1299,Completed,0.65015,2023-05-25 10:41:57+00:00,2023-05-25 10:44:02+00:00,125.0
8,0.492116,2.0,10.0,10.0,1.0,bayesian-230525-1025-012-a3e8078d,Completed,0.62438,2023-05-25 10:38:02+00:00,2023-05-25 10:39:54+00:00,112.0
9,0.272324,2.0,10.0,9.520856,0.938726,bayesian-230525-1025-011-3f1c7938,Completed,0.62339,2023-05-25 10:38:09+00:00,2023-05-25 10:39:56+00:00,107.0


In [52]:
# tuner.rename(columns = {"validation:accuracy": "validation_accuracy"}, inplace=True)

In [60]:
boto3.client('sagemaker').describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=tuner_name)['BestTrainingJob']

{'TrainingJobName': 'bayesian-230525-1025-013-00de1299',
 'TrainingJobArn': 'arn:aws:sagemaker:eu-west-1:790592228004:training-job/bayesian-230525-1025-013-00de1299',
 'CreationTime': datetime.datetime(2023, 5, 25, 10, 40, 26, tzinfo=tzlocal()),
 'TrainingStartTime': datetime.datetime(2023, 5, 25, 10, 41, 57, tzinfo=tzlocal()),
 'TrainingEndTime': datetime.datetime(2023, 5, 25, 10, 44, 2, tzinfo=tzlocal()),
 'TrainingJobStatus': 'Completed',
 'TunedHyperParameters': {'eta': '0.1338033352961009',
  'gamma': '2',
  'max_depth': '8',
  'min_child_weight': '0.3746783273156751',
  'subsample': '0.9729526987805376'},
 'FinalHyperParameterTuningJobObjectiveMetric': {'MetricName': 'validation:accuracy',
  'Value': 0.6501500010490417},
 'ObjectiveStatus': 'Succeeded'}

In [54]:
!pip install -Uq pip altair

[0m

In [67]:
from scripts.visualization6 import visualize_tuning_job
visualize_tuning_job(tuner, trials_only=True)

Setting altair renderer to default.
Tuning job bayesian-230525-1025      status: Completed

Number of training jobs with valid objective: 20
Lowest: 0.5421199798583984 Highest 0.6501500010490417


Unnamed: 0,eta,gamma,max_depth,min_child_weight,subsample,TrainingJobName,TrainingJobStatus,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds,TuningJobName,validation_accuracy
7,0.133803,2.0,8.0,0.374678,0.972953,bayesian-230525-1025-013-00de1299,Completed,2023-05-25 10:41:57+00:00,2023-05-25 10:44:02+00:00,125.0,bayesian-230525-1025,0.65015
3,0.403181,2.0,8.0,4.414952,0.983488,bayesian-230525-1025-017-f486fa0b,Completed,2023-05-25 10:45:49+00:00,2023-05-25 10:47:41+00:00,112.0,bayesian-230525-1025,0.64817
4,0.187881,2.0,8.0,3.05934,0.648387,bayesian-230525-1025-016-5dba57d2,Completed,2023-05-25 10:45:45+00:00,2023-05-25 10:47:37+00:00,112.0,bayesian-230525-1025,0.64024
1,0.413506,2.0,8.0,0.240721,0.882182,bayesian-230525-1025-019-dc869303,Completed,2023-05-25 10:49:21+00:00,2023-05-25 10:51:08+00:00,107.0,bayesian-230525-1025,0.63925
0,0.223843,2.0,8.0,5.498946,0.835093,bayesian-230525-1025-020-c6a3a2db,Completed,2023-05-25 10:49:20+00:00,2023-05-25 10:51:12+00:00,112.0,bayesian-230525-1025,0.63429
8,0.492116,2.0,10.0,10.0,1.0,bayesian-230525-1025-012-a3e8078d,Completed,2023-05-25 10:38:02+00:00,2023-05-25 10:39:54+00:00,112.0,bayesian-230525-1025,0.62438
9,0.272324,2.0,10.0,9.520856,0.938726,bayesian-230525-1025-011-3f1c7938,Completed,2023-05-25 10:38:09+00:00,2023-05-25 10:39:56+00:00,107.0,bayesian-230525-1025,0.62339
5,0.425849,2.0,4.0,0.535063,0.979384,bayesian-230525-1025-015-32ad6dec,Completed,2023-05-25 10:41:54+00:00,2023-05-25 10:43:42+00:00,108.0,bayesian-230525-1025,0.61943
12,0.439848,2.0,6.0,8.999163,0.697583,bayesian-230525-1025-008-b9b2848f,Completed,2023-05-25 10:34:30+00:00,2023-05-25 10:36:17+00:00,107.0,bayesian-230525-1025,0.61744
19,0.140372,2.0,6.0,9.203041,0.643044,bayesian-230525-1025-001-4c118f9d,Completed,2023-05-25 10:27:19+00:00,2023-05-25 10:29:06+00:00,107.0,bayesian-230525-1025,0.61546


['max_depth', 'gamma', 'eta', 'min_child_weight', 'subsample']
['max_depth', 'gamma', 'eta', 'min_child_weight', 'subsample']
max_depth
gamma
eta
min_child_weight
subsample
