In [40]:
import pandas as pd
from sklearn.datasets import load_iris

In [39]:
X, y = load_iris(return_X_y=True, as_frame=True)
dados = pd.concat([y, X], axis=1)
dados.to_csv("data/dados.csv", header=False, index=False)

In [None]:
import boto3
import sagemaker

In [41]:
from sagemaker import image_uris
from sagemaker.session import Session

In [42]:
bucket = sagemaker.Session().default_bucket()

In [44]:
xgboost_container = sagemaker.image_uris.retrieve("xgboost", "us-east-1", "1.2-1")

In [46]:
role = "arn:aws:iam::885248014373:role/service-role/AmazonSageMaker-ExecutionRole-20210305T230941"

In [59]:
# initialize hyperparameters
hyperparameters = {
        "max_depth":"5",
        "eta":"0.2",
        "gamma":"4",
        "min_child_weight":"6",
        "subsample":"0.7",
        "objective":"multi:softmax",
        "num_round":"2",
        "num_class": "3"}

In [60]:
    estimator = sagemaker.estimator.Estimator(image_uri=xgboost_container, 
                                              role=role,
                                              hyperparameters=hyperparameters,
                                              instance_count=1, 
                                              instance_type='ml.m5.2xlarge', 
                                              volume_size=5,
                                              output_path=f"s3://{bucket}")

In [63]:
input_data = sagemaker.Session().upload_data(path="data", bucket=bucket)
input_data

's3://sagemaker-us-east-1-885248014373/data'

In [64]:
    train_input = TrainingInput(input_data, content_type="csv")

In [65]:
estimator.fit({'train': train_input})

2021-03-09 02:53:32 Starting - Starting the training job...
2021-03-09 02:53:56 Starting - Launching requested ML instancesProfilerReport-1615258411: InProgress
......
2021-03-09 02:54:57 Starting - Preparing the instances for training...
2021-03-09 02:55:37 Downloading - Downloading input data...
2021-03-09 02:55:57 Training - Downloading the training image......
2021-03-09 02:57:18 Uploading - Uploading generated training model[34m[2021-03-09 02:57:11.854 ip-10-0-130-56.ec2.internal:1 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of C

In [None]:
estimator.deploy(initial_instance_count=1,
                 instance_type="ml.t2.medium", 
                 data_capture_config = )

In [15]:
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

In [5]:
X, y = make_classification(n_samples=1000, n_features=4,
                           n_informative=2, n_redundant=0,   
                           random_state=0, shuffle=False)

In [6]:
clf = RandomForestClassifier(max_depth=2, random_state=0)

In [7]:
clf.fit(X, y)

RandomForestClassifier(max_depth=2, random_state=0)

In [10]:
clf.predict([[0, 0, 0, 0]])

array([1])

In [16]:
joblib.dump(clf, "modelo.joblib")

['modelo.joblib']

In [17]:
modelo = joblib.load("modelo.joblib")

In [20]:
type([[0, 0, 0, 0]])

list

In [23]:
list(modelo.predict([[0, 0, 0, 0]]))

[1]