In [1]:
import sagemaker
from sagemaker.serializers import CSVSerializer
from sagemaker.debugger import Rule, rule_configs
from sagemaker.session import TrainingInput

In [2]:
region = sagemaker.Session().boto_region_name
print("AWS Region: {}".format(region))

role = sagemaker.get_execution_role()
print("RoleArn: {}".format(role))

AWS Region: us-east-2
RoleArn: arn:aws:iam::257056996471:role/cb-sagemaker


In [3]:
prefix = 'sagemaker'
model_name = 'xgboost_model_v4_wide_tc_ddos'
s3_model_output_location ='s3://{}/{}/{}'.format('cb-analytics-exports-us-east-2-prd', prefix, 'xgboost_model')
inputs_bucket_name = 'cb-analytics-us-east-2-prd'

In [4]:
container=sagemaker.image_uris.retrieve("xgboost", region, "1.2-1")
print(container)

xgb_model=sagemaker.estimator.Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type='ml.m4.xlarge',
    volume_size=10,
    output_path=s3_model_output_location,
    sagemaker_session=sagemaker.Session(),
    rules=[Rule.sagemaker(rule_configs.create_xgboost_report())]
)

257758044811.dkr.ecr.us-east-2.amazonaws.com/sagemaker-xgboost:1.2-1


In [6]:
xgb_model.set_hyperparameters(
    max_depth = 6,
    objective = "reg:squarederror",
    num_round = 100,
    alpha = 0.5, # l1
    eta = 0.3, # step size
#     lambda = 1, # l2
    gamma = 10 # min gain for split
)

In [7]:
train_input = TrainingInput(
    "s3://{}/{}/{}".format(inputs_bucket_name, prefix, "data/train_sm_v4.csv"), content_type="csv"
)
validation_input = TrainingInput(
    "s3://{}/{}/{}".format(inputs_bucket_name, prefix, "data/val_sm_v4.csv"), content_type="csv"
)

In [None]:
xgb_model.fit({"train": train_input, "validation": validation_input}, wait=True)

2022-02-23 21:06:31 Starting - Starting the training job...
2022-02-23 21:06:57 Starting - Preparing the instances for trainingCreateXgboostReport: InProgress
ProfilerReport-1645650391: InProgress
.........
2022-02-23 21:08:15 Downloading - Downloading input data.........
2022-02-23 21:09:55 Training - Downloading the training image...
2022-02-23 21:10:27 Training - Training image download completed. Training in progress..[34m[2022-02-23 21:10:31.780 ip-10-0-191-36.us-east-2.compute.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined deli

In [None]:
rule_output_path = xgb_model.output_path + "/" + xgb_model.latest_training_job.name + "/rule-output"
! aws s3 ls {rule_output_path} --recursive

In [12]:
! aws s3 cp {rule_output_path} ./ --recursive

download: s3://cb-analytics-exports-us-east-2-prd/sagemaker/xgboost_model/sagemaker-xgboost-2022-02-23-21-06-31-404/rule-output/CreateXgboostReport/xgboost-reports/FeatureImportance.json to CreateXgboostReport/xgboost-reports/FeatureImportance.json
download: s3://cb-analytics-exports-us-east-2-prd/sagemaker/xgboost_model/sagemaker-xgboost-2022-02-23-21-06-31-404/rule-output/CreateXgboostReport/xgboost-reports/AbsoluteValidationErrorPerLabelBins.json to CreateXgboostReport/xgboost-reports/AbsoluteValidationErrorPerLabelBins.json
download: s3://cb-analytics-exports-us-east-2-prd/sagemaker/xgboost_model/sagemaker-xgboost-2022-02-23-21-06-31-404/rule-output/CreateXgboostReport/xgboost-reports/LossData.json to CreateXgboostReport/xgboost-reports/LossData.json
download: s3://cb-analytics-exports-us-east-2-prd/sagemaker/xgboost_model/sagemaker-xgboost-2022-02-23-21-06-31-404/rule-output/CreateXgboostReport/xgboost-reports/ResidualDistribution.json to CreateXgboostReport/xgboost-reports/Residu

In [13]:
from IPython.display import FileLink, FileLinks
display("Click link below to view the XGBoost Training report", FileLink("CreateXgboostReport/xgboost_report.html"))

'Click link below to view the XGBoost Training report'

In [14]:
profiler_report_name = [rule["RuleConfigurationName"] 
                        for rule in xgb_model.latest_training_job.rule_job_summary() 
                        if "Profiler" in rule["RuleConfigurationName"]][0]
profiler_report_name
display("Click link below to view the profiler report", FileLink(profiler_report_name+"/profiler-output/profiler-report.html"))

'Click link below to view the profiler report'

In [24]:
xgb_model.model_data

's3://cb-analytics-exports-us-east-2-prd/sagemaker/xgboost_model/sagemaker-xgboost-2022-02-21-23-13-06-261/output/model.tar.gz'

In [25]:

xgb_predictor=xgb_model.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',
    serializer=CSVSerializer()
)

-------------!

In [26]:
xgb_predictor.endpoint_name

'sagemaker-xgboost-2022-02-21-23-34-51-319'