In [81]:
import sagemaker
import boto3
from sagemaker.processing import ScriptProcessor
from sagemaker import get_execution_role
from sagemaker import image_uris

role = get_execution_role()
sm_session = sagemaker.Session()

boto_session = boto3.Session()
region = boto_session.region_name

constants = {
    "bucket_name": "wildfires",
    "sklearn_image_uri_version": "1.2-1",
    "region": boto_session.region_name
}

In [82]:
script_path = sm_session.upload_data(
    path='data_processing.py',
    bucket=constants["bucket_name"],
    key_prefix="scripts"
)

sklearn_image_uri = image_uris.retrieve(
        framework="sklearn",
        region=constants["region"],
        version=constants["sklearn_image_uri_version"],
    )

INFO:sagemaker.image_uris:Defaulting to only available Python version: py3
INFO:sagemaker.image_uris:Defaulting to only supported image scope: cpu.


In [83]:
script_processor = ScriptProcessor(
    role=role,
    image_uri=sklearn_image_uri,
    command=['python3'],
    instance_count=1,
    instance_type='ml.m5.large'
)

script_processor.run(
    job_name='wildfire-data-processing-job-018',
    code=script_path,
    arguments=[
        '--src_bucket', constants["bucket_name"],
        '--src_prefix', 'download/',
        '--dest_bucket', constants["bucket_name"],
        '--dest_prefix', 'data/raw_data/',
        '--region', region,
        '--seed', '42'
    ]
)

INFO:sagemaker:Creating processing-job with name wildfire-data-processing-job-018


.............[34mCollecting sagemaker
  Downloading sagemaker-2.224.1-py3-none-any.whl.metadata (15 kB)[0m
[34mCollecting attrs<24,>=23.1.0 (from sagemaker)
  Downloading attrs-23.2.0-py3-none-any.whl.metadata (9.5 kB)[0m
[34mCollecting boto3
  Downloading boto3-1.34.132-py3-none-any.whl.metadata (6.6 kB)[0m
[34mCollecting cloudpickle==2.2.1 (from sagemaker)
  Downloading cloudpickle-2.2.1-py3-none-any.whl.metadata (6.9 kB)[0m
[34mCollecting google-pasta (from sagemaker)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)[0m
[34mCollecting smdebug-rulesconfig==1.0.1 (from sagemaker)
  Downloading smdebug_rulesconfig-1.0.1-py2.py3-none-any.whl.metadata (943 bytes)[0m
[34mCollecting importlib-metadata<7.0,>=1.4.0 (from sagemaker)
  Downloading importlib_metadata-6.11.0-py3-none-any.whl.metadata (4.9 kB)[0m
[34mCollecting packaging>=20.0 (from sagemaker)
  Downloading packaging-24.1-py3-none-any.whl.metadata (3.2 kB)[0m
[34mCollecting pathos (from sagem