# Train a SKLearn Model using Script Mode

- https://sagemaker-examples.readthedocs.io/en/latest/sagemaker-script-mode/sklearn/sklearn_byom_outputs.html

---

In [2]:
!pip install -U sagemaker

Collecting sagemaker
  Using cached sagemaker-2.156.0-py2.py3-none-any.whl
Collecting importlib-metadata<5.0,>=1.4.0
  Using cached importlib_metadata-4.13.0-py3-none-any.whl (23 kB)
Collecting tblib==1.7.0
  Using cached tblib-1.7.0-py2.py3-none-any.whl (12 kB)
Collecting attrs<24,>=23.1.0
  Using cached attrs-23.1.0-py3-none-any.whl (61 kB)
Collecting boto3<2.0,>=1.26.131
  Using cached boto3-1.26.135-py3-none-any.whl (135 kB)
Collecting botocore<1.30.0,>=1.29.135
  Using cached botocore-1.29.135-py3-none-any.whl (10.7 MB)
Installing collected packages: tblib, importlib-metadata, botocore, attrs, boto3, sagemaker
  Attempting uninstall: tblib
    Found existing installation: tblib 1.6.0
    Uninstalling tblib-1.6.0:
      Successfully uninstalled tblib-1.6.0
  Attempting uninstall: importlib-metadata
    Found existing installation: importlib-metadata 6.3.0
    Uninstalling importlib-metadata-6.3.0:
      Successfully uninstalled importlib-metadata-6.3.0
  Attempting uninstall: botoc

In [3]:
import boto3
import pandas as pd
import numpy as np

s3 = boto3.client("s3")
s3.download_file(f"sagemaker-sample-files", "datasets/tabular/iris/iris.data", "iris.data")

df = pd.read_csv(
    "iris.data", header=None, names=["sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"]
)
df.head()

Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
df["class-cat"] = df["class"].astype("category").cat.codes
categories_map = dict(enumerate(df["class"].astype("category").cat.categories))
print(categories_map)
df.head()

{0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}


Unnamed: 0,sepal_len,sepal_wid,petal_len,petal_wid,class,class-cat
0,5.1,3.5,1.4,0.2,Iris-setosa,0
1,4.9,3.0,1.4,0.2,Iris-setosa,0
2,4.7,3.2,1.3,0.2,Iris-setosa,0
3,4.6,3.1,1.5,0.2,Iris-setosa,0
4,5.0,3.6,1.4,0.2,Iris-setosa,0


In [5]:
num_samples = df.shape[0]
split = round(num_samples * 0.8)
train = df.iloc[:split, :]
test = df.iloc[split:, :]
print("{} train, {} test".format(split, num_samples - split))

120 train, 30 test


In [6]:
train.to_csv("train.csv", index=False)
test.to_csv("test.csv", index=False)

In [7]:
import sagemaker

sagemaker_session = sagemaker.Session()

prefix = "DEMO-sklearn-iris"
training_input_path = sagemaker_session.upload_data("train.csv", key_prefix=prefix + "/training")

In [8]:
role = sagemaker.get_execution_role()
print(role)

arn:aws:iam::790592228004:role/service-role/AmazonSageMaker-ExecutionRole-20230510T135961


In [9]:
from sagemaker.sklearn import SKLearn

sk_estimator = SKLearn(
    entry_point="train.py",
    role=role,
    instance_count=1,
    instance_type="ml.c5.xlarge",
    py_version="py3",
    framework_version="1.2-1",
    script_mode=True,
    hyperparameters={"estimators": 20},
)

sk_estimator.fit({"train": training_input_path})

INFO:sagemaker:Creating training-job with name: sagemaker-scikit-learn-2023-05-17-12-49-46-709


Using provided s3_resource
2023-05-17 12:49:47 Starting - Starting the training job...
2023-05-17 12:50:01 Starting - Preparing the instances for training......
2023-05-17 12:51:14 Downloading - Downloading input data
2023-05-17 12:51:14 Training - Downloading the training image...
2023-05-17 12:51:45 Uploading - Uploading generated training model[34m2023-05-17 12:51:39,736 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2023-05-17 12:51:39,739 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-05-17 12:51:39,746 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2023-05-17 12:51:39,936 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-05-17 12:51:39,947 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-05-17 12:51:39,957 sagemaker-training-toolkit INFO     No GPUs d

UnexpectedStatusException: Error for Training job sagemaker-scikit-learn-2023-05-17-12-49-46-709: Failed. Reason: AlgorithmError: framework error: 
Traceback (most recent call last):
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_containers/_trainer.py", line 84, in train
    entrypoint()
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_sklearn_container/training.py", line 39, in main
    train(environment.Environment())
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_sklearn_container/training.py", line 31, in train
    entry_point.run(uri=training_environment.module_dir,
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_training/entry_point.py", line 99, in run
    return runner.get(runner_type, user_entry_point, args, env_vars, extra_opts).run(
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_training/process.py", line 286, in run
    process = check_error(
  File "/miniconda3/lib/python3.8/site-packages/sagemaker_training/process.py", line 204, in check_error
    raise error_class(
sagemaker_training.errors.ExecuteUserScriptError: ExecuteUserScriptError:
ExitCode 1
Error