#### 라이브러리 Import

In [1]:
import boto3, re, sys, math, json, os, sagemaker, urllib.request
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker import get_execution_role
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
from IPython.display import display
from time import gmtime, strftime
from sagemaker.predictor import csv_serializer

In [2]:
role = get_execution_role()

role

'arn:aws:iam::860981721775:role/service-role/AmazonSageMaker-ExecutionRole-20220118T115216'

In [3]:
prefix = 'sagemaker/DEMO-xgboost-dm'

In [4]:
my_region = boto3.session.Session().region_name

my_region

'ap-northeast-1'

In [5]:
container = get_image_uri(my_region ,'xgboost',repo_version='0.90-2')

container

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


'354813040037.dkr.ecr.ap-northeast-1.amazonaws.com/sagemaker-xgboost:0.90-2-cpu-py3'

In [6]:
bucket_name = 'sagemaker-bucket-jaedong' # <--- CHANGE THIS VARIABLE TO A UNIQUE NAME FOR YOUR BUCKET
s3 = boto3.resource('s3')

In [7]:
predictor = sagemaker.predictor.Predictor(
    endpoint_name='cori-endpoint',
    serializer=sagemaker.serializers.CSVSerializer(),
    deserializer=sagemaker.deserializers.CSVDeserializer()
)

In [8]:
try:
    urllib.request.urlretrieve ("https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv", "bank_clean.csv")
    print('Success: downloaded bank_clean.csv.')
except Exception as e:
    print('Data load error: ',e)

try:
    model_data = pd.read_csv('./bank_clean.csv',index_col=0)
    print('Success: Data loaded into dataframe.')
except Exception as e:
    print('Data load error: ',e)

Success: downloaded bank_clean.csv.
Success: Data loaded into dataframe.


In [9]:
train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])
print(train_data.shape, test_data.shape)

(28831, 61) (12357, 61)


In [10]:
pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
s3_input_train = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')

In [11]:
s3_input_train

<sagemaker.inputs.TrainingInput at 0x7f0bf7ff8fd0>

In [12]:
test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).values

In [13]:
predictor.serializer = csv_serializer # set the serializer type

In [23]:
# predictions = predictor.predict(test_data_array).decode('utf-8') # predict!
predictions = predictor.predict(test_data_array)

The csv_serializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [19]:
np.shape(predictions)

(1, 12357)

In [20]:
predictions

[['0.06372358649969101',
  '0.08991933614015579',
  '0.04901290312409401',
  '0.32814913988113403',
  '0.11094612628221512',
  '0.06898301094770432',
  '0.03412386775016785',
  '0.030153866857290268',
  '0.033666107803583145',
  '0.02613597922027111',
  '0.10095620155334473',
  '0.04858618602156639',
  '0.038228295743465424',
  '0.06188351288437843',
  '0.08098895847797394',
  '0.03026619926095009',
  '0.08061674982309341',
  '0.023862453177571297',
  '0.0916794016957283',
  '0.0820452868938446',
  '0.040649473667144775',
  '0.04021625593304634',
  '0.029627172276377678',
  '0.0718991681933403',
  '0.057991500943899155',
  '0.0617767758667469',
  '0.06239861249923706',
  '0.03391808643937111',
  '0.04446716606616974',
  '0.03145658224821091',
  '0.11199136078357697',
  '0.012473742477595806',
  '0.10638505220413208',
  '0.03214266151189804',
  '0.05845409631729126',
  '0.04668162763118744',
  '0.2278384566307068',
  '0.03256712481379509',
  '0.45274725556373596',
  '0.07685521990060806

In [None]:
for data in test_data_array:
    print(type(data))