In [1]:
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.tuner import HyperparameterTuner, ContinuousParameter, IntegerParameter
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

In [2]:
estimator = SKLearn(
    entry_point="train.py",
    source_dir="./training",
    framework_version="0.23-1",
    instance_type="local",
    instance_count=1,
    base_job_name="training_tests",
    role="arn:aws:iam::111111111111:role/service-role/AmazonSageMaker-ExecutionRole-20200101T000001",
)


In [3]:
base_uri = "s3://sagemaker-us-east-1-681301531115/processing_tests-2021-12-07-02-39-45-682/output/"
estimator.fit(
    inputs={
        "train": base_uri + "train_data/train.csv",
        "dev": base_uri + "dev_data/dev.csv",
        "test": base_uri + "test_data/test.csv",
        "feats": base_uri + "features_info/features_info.json",
        "txtml": base_uri + "text_model/text_model.joblib",
    }
)

Creating 53303rdge8-algo-1-obdbm ... 
Creating 53303rdge8-algo-1-obdbm ... done
Attaching to 53303rdge8-algo-1-obdbm
[36m53303rdge8-algo-1-obdbm |[0m 2021-12-10 03:10:39,949 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training
[36m53303rdge8-algo-1-obdbm |[0m 2021-12-10 03:10:39,951 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36m53303rdge8-algo-1-obdbm |[0m 2021-12-10 03:10:39,960 sagemaker_sklearn_container.training INFO     Invoking user training script.
[36m53303rdge8-algo-1-obdbm |[0m 2021-12-10 03:10:40,989 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:
[36m53303rdge8-algo-1-obdbm |[0m /miniconda3/bin/python -m pip install -r requirements.txt
[36m53303rdge8-algo-1-obdbm |[0m Collecting imbalanced-learn==0.7.*
[36m53303rdge8-algo-1-obdbm |[0m   Downloading imbalanced_learn-0.7.0-py3-none-any.whl (167 kB)
[K     |████████████████████████████████| 167 kB 252 kB/

In [8]:
tuner = HyperparameterTuner(
    estimator=estimator,
    objective_metric_name='f1_1_dev',
    hyperparameter_ranges={
        "num_leaves": IntegerParameter(1, 30, "Linear"),
        "learning_rate": ContinuousParameter(0.0001, 0.5, "Logarithmic"),
        "n_estimators": IntegerParameter(50, 5000, "Logarithmic"),
        "min_child_samples": IntegerParameter(1, 5000, "Logarithmic"),
        "colsample_bytree": ContinuousParameter(0.2, 1.0, "Linear"),
        "subsample": ContinuousParameter(0.2, 1.0, "Linear"),
        "subsample_freq": IntegerParameter(1, 10, "Linear"),
        "reg_alpha": ContinuousParameter(0.0, 1.0, "Logarithmic"),
        "reg_lambda": ContinuousParameter(0.0, 1.0, "Logarithmic"),
    },
    metric_definitions=[{'Name': 'f1_1_dev', 'Regex': 'f1_1_dev: (\d\.\d+)'}],
    max_jobs=50,
    max_parallel_jobs=1,
)

In [None]:
base_uri = "s3://sagemaker-us-east-1-681301531115/processing_tests-2021-12-06-02-27-49-133/output/"
tuner.fit(
    inputs={
        "train": base_uri + "train_data/train.csv",
        "dev": base_uri + "dev_data/dev.csv",
        "test": base_uri + "test_data/test.csv",
        "feats": base_uri + "features_info/features_info.json",
        "txtml": base_uri + "text_model/text_model.pkl",
    }
)

In [4]:
predictor = estimator.deploy(
    initial_instance_count=1, instance_type="local", serializer=JSONSerializer(), deserializer=JSONDeserializer()
)


Attaching to svt0lb071h-algo-1-25gzt
[36msvt0lb071h-algo-1-25gzt |[0m 2021-12-10 03:11:06,683 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36msvt0lb071h-algo-1-25gzt |[0m 2021-12-10 03:11:06,685 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36msvt0lb071h-algo-1-25gzt |[0m 2021-12-10 03:11:06,686 INFO - sagemaker-containers - nginx config: 
[36msvt0lb071h-algo-1-25gzt |[0m worker_processes auto;
[36msvt0lb071h-algo-1-25gzt |[0m daemon off;
[36msvt0lb071h-algo-1-25gzt |[0m pid /tmp/nginx.pid;
[36msvt0lb071h-algo-1-25gzt |[0m error_log  /dev/stderr;
[36msvt0lb071h-algo-1-25gzt |[0m 
[36msvt0lb071h-algo-1-25gzt |[0m worker_rlimit_nofile 4096;
[36msvt0lb071h-algo-1-25gzt |[0m 
[36msvt0lb071h-algo-1-25gzt |[0m events {
[36msvt0lb071h-algo-1-25gzt |[0m   worker_connections 2048;
[36msvt0lb071h-algo-1-25gzt |[0m }
[36msvt0lb071h-algo-1-25gzt |[0m 
[36msvt0lb071h-algo-1-25gzt |[0m http {
[36msvt0lb07

In [5]:
exp = {'id': 'd3d56eed-8a0c-4bd7-b163-39d17b4aa3ea',
 'a': 4,
 'b': 0.5879,
 'c': 246212.65,
 'd': 27.0,
 'e': 1.627860465116279,
 'f': 0.0,
 'g': 'AR',
 'h': 41,
 'i': 'Cable Micro Hdmi A Hdmi Kolke 1.8mts C/ Doble Filtro Full Hd',
 'j': 'cat_efed068',
 'k': 0.8372673560737631,
 'l': 1641.0,
 'm': 38.0,
 'n': 1,
 'o': 'N',
 'p': 'N',
 'fecha': '2020-04-07 17:28:24',
 'monto': 3.33}

In [6]:
predictor.predict(exp)

[36msvt0lb071h-algo-1-25gzt |[0m 2021-12-10 03:11:37,645 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)
[36msvt0lb071h-algo-1-25gzt |[0m 172.18.0.1 - - [10/Dec/2021:03:11:39 +0000] "POST /invocations HTTP/1.1" 200 54 "-" "python-urllib3/1.26.7"


{'fraude': 'sim', 'probabilidade': 0.8561303035221127}