In [None]:
!pip3 install -U sagemaker

In [None]:
import sagemaker
import boto3
from sagemaker import image_uris
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput

# initialize hyperparameters
hyperparameters = {
        "tree_method": "hist",
        "objective": "rank:pairwise",
        "lambdarank_pair_method":"topk"}


# set an output path where the trained model will be saved

# UPDATE BUCKET NAME EACH TIME A NEW BUCKET IS CREATED
# if CDK was used, the name will most likely be "amzn-s3-slotify-sagemaker"
bucket = "sagemaker-eu-west-2-971422692105" # REMEMBER TO CHECK S3 BUCKET NAME
prefix = "MeetingImportance/xgboost-scripts"
output_path = 's3://{}/{}/{}/output'.format(bucket, prefix, 'xgb-built-in')

role = "sagemaker_local_user"
region = sagemaker.Session().boto_region_name

# this line automatically looks for the XGBoost image URI and builds an XGBoost container.
# specify the repo_version depending on your preference.
xgboost_container = sagemaker.image_uris.retrieve("xgboost", region, "1.7-1")

# construct a SageMaker AI estimator that calls the xgboost-container
estimator = sagemaker.estimator.Estimator(image_uri=xgboost_container, 
                                          hyperparameters=hyperparameters,
                                          role= role,
                                          instance_count=1, 
                                          instance_type='ml.m5.large', 
                                          volume_size=5, # 5 GB 
                                          output_path=output_path)

# define the data type and paths to the training and validation datasets
content_type = "csv"
train_input = TrainingInput("s3://{}/{}/{}/".format(bucket, prefix, 'train'), content_type=content_type)
validation_input = TrainingInput("s3://{}/{}/{}/".format(bucket, prefix, 'validation'), content_type=content_type)

In [None]:
# Train the model
estimator.fit({'train': train_input, 'validation': validation_input})

In [None]:
# Deploy the model
xgb_predictor = estimator.deploy(initial_instance_count=1, 
                                 instance_type='ml.m5.large',
                                 endpoint_name="importantAI",
                                 model_name="ImportanceModel"
                                )

In [None]:
# from sagemaker.serializers import CSVSerializer
# xgb_predictor.serializer = CSVSerializer()

# predictions = xgb_predictor.predict([1,
# "(""Weekly Meeting"",2,""90 minutes"")",
# "(""Weekly Meeting"",2,""60 minutes"")",
# "(""Weekly Meeting"",2,""45 minutes"")",
# "(""Weekly Meeting"",2,""30 minutes"")",
# "(""Weekly Meeting"",2,""15 minutes"")"]).decode('utf-8)