In [26]:
# Step 1: SageMaker and Boto3 Setup
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
import boto3
import os

# Set up session, role, and bucket
session = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = 'ucicustomerdata'  # Replace with your bucket name
region = boto3.Session().region_name

# Input/Output paths in S3
input_s3_uri = "s3://ucicustomerdata/raw/Online_Retail.csv"
output_s3_uri = f"s3://{bucket}/output/"


In [21]:
# Step 2: Define the SKLearn Estimator
sklearn_estimator = SKLearn(
    entry_point='train.py',                 # Your training script
    source_dir='.',                         # '.' means current directory in SageMaker Studio
    role=role,
    instance_type="ml.m5.large",
    framework_version="0.23-1",
    sagemaker_session=session,
    output_path=output_s3_uri
)


In [22]:
# Step 3: Launch the Training Job
# This triggers SageMaker to upload train.py, start a container, run training, and save model to output path
sklearn_estimator.fit({'raw': input_s3_uri})


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2025-05-29-21-56-40-699


2025-05-29 21:56:47 Starting - Starting the training job...
2025-05-29 21:57:00 Starting - Preparing the instances for training...
2025-05-29 21:57:24 Downloading - Downloading input data...
2025-05-29 21:57:54 Downloading - Downloading the training image...
2025-05-29 21:58:46 Training - Training image download completed. Training in progress.
2025-05-29 21:58:46 Uploading - Uploading generated training model[34m2025-05-29 21:58:39,003 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2025-05-29 21:58:39,008 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2025-05-29 21:58:39,052 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2025-05-29 21:58:39,658 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2025-05-29 21:58:39,671 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2025-

In [23]:
# Step 4: View the Output Model Artifacts
print("Model saved to:", sklearn_estimator.model_data)


Model saved to: s3://ucicustomerdata/output/sagemaker-scikit-learn-2025-05-29-21-56-40-699/output/model.tar.gz
