In [1]:
# 1: creating initial variables

# Create Sagemaker execution role
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
import boto3
import os

role = sagemaker.get_execution_role()
sess = sagemaker.Session()
bucket = sess.default_bucket()

s3_prefix = "script-mode-workflow"
pickle_s3_prefix = f"{s3_prefix}/pickle"
pickle_s3_uri = f"s3://{bucket}/{s3_prefix}/pickle"
pickle_train_s3_uri = f"{pickle_s3_uri}/train"

train_dir = os.path.join(os.getcwd(), "")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
# 2: uploading training data to S3

s3_resource_bucket = boto3.Session().resource("s3").Bucket(bucket)
s3_resource_bucket.Object(os.path.join(pickle_s3_prefix, "train.pickle")).upload_file(
    train_dir + "/train.pickle"
)

In [3]:
# 3: creating hyperparameters

# This is not required as these values are the defaults:
hyperparameters = {
    "copy_X": True,
    "fit_intercept": True,
    "normalize": False,
}

In [4]:
# 4: configuring estimator parameters

# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Modify this based on your script name !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
entry_point = "regressionScript.py"

# Modify this based on your instance type / size
train_instance_type = "ml.m5.large"

estimator_parameters = {
    "entry_point": entry_point,
    "source_dir": "script",
    "framework_version": "0.23-1",
    "py_version": "py3",
    "instance_type": train_instance_type,
    "instance_count": 1,
    "hyperparameters": hyperparameters,
    "role": role,
    "base_job_name": "linearregression-model",
}

estimator = SKLearn(**estimator_parameters)

In [5]:
# 5: running the training job

inputs = {
    "train": pickle_train_s3_uri
}

# starting the training job
estimator.fit(inputs)

INFO:sagemaker:Creating training-job with name: linearregression-model-2024-11-22-05-26-08-544


2024-11-22 05:26:10 Starting - Starting the training job...
2024-11-22 05:26:25 Starting - Preparing the instances for training...
2024-11-22 05:26:50 Downloading - Downloading input data...
2024-11-22 05:27:21 Downloading - Downloading the training image...
2024-11-22 05:28:12 Training - Training image download completed. Training in progress.
2024-11-22 05:28:12 Uploading - Uploading generated training model.[34m2024-11-22 05:28:05,825 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2024-11-22 05:28:05,828 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-11-22 05:28:05,867 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2024-11-22 05:28:06,048 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024-11-22 05:28:06,059 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2024

In [6]:
# 6: deploying a model to an endpoint

sklearn_predictor = estimator.deploy(initial_instance_count=1,
                                     instance_type='ml.m5.large',
                                     endpoint_name='linearregression-endpoint')

sklearn_predictor.predict([[0],[1],[2],[3]])

INFO:sagemaker:Creating model with name: linearregression-model-2024-11-22-05-28-56-103
INFO:sagemaker:Creating endpoint-config with name linearregression-endpoint
INFO:sagemaker:Creating endpoint with name linearregression-endpoint


-----!

array([-0.81191577, 19.18455459, 39.18102495, 59.17749531])

In [7]:
# 7: deleting the endpoint

sklearn_predictor.delete_endpoint(True)

INFO:sagemaker:Deleting endpoint configuration with name: linearregression-endpoint
INFO:sagemaker:Deleting endpoint with name: linearregression-endpoint
