# Importing Important liabraries:

Steps:
 - Import Libraries.
 - Create S3 Bucket.
 - mapping train and test data in S3.
 - Mapping the path of the models  in S3.

In [1]:
import sagemaker
import boto3
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.session import s3_input, Session

In [2]:
bucket_name = 'chrn'
my_region = boto3.session.Session().region_name
print(my_region)

us-east-1


In [3]:
s3 = boto3.resource('s3')
try: 
    if my_region == 'us-east-1':
        s3.create_bucket(Bucket= bucket_name)
    print('S3 Bucket Created Successfully')
except Exception as e:
    print('S3 Error', e)

S3 Bucket Created Successfully


In [4]:
# Set an output path to save my model in the s3 bucket:
prefix = 'xgboost-as-an-bulit-in-algo'
output_path = 's3://{}/{}/output'.format(bucket_name, prefix)
print(output_path)

s3://chrn/xgboost-as-an-bulit-in-algo/output


# Downloading Dataset and storing it in s3:

In [5]:
import pandas as pd
model_data = pd.read_csv('churndata.csv', index_col=0)

In [6]:
model_data = model_data.dropna()

In [7]:
model_data.isna().sum()

churned                                  0
age                                      0
gender                                   0
loyalty_level                            0
how_long_a_customer                      0
how_far_away                             0
feel_valued_as_customer                  0
get_often_inspiration                    0
good_product_overview                    0
pick_up_flexibility                      0
fair_prices                              0
looking_forward_to_the_next_12_months    0
see_improvements_recently                0
informed_about_new_products              0
find_productst_i_want                    0
i_enjoy_buying                           0
dtype: int64

In [8]:
model_data.columns

Index(['churned', 'age', 'gender', 'loyalty_level', 'how_long_a_customer',
       'how_far_away', 'feel_valued_as_customer', 'get_often_inspiration',
       'good_product_overview', 'pick_up_flexibility', 'fair_prices',
       'looking_forward_to_the_next_12_months', 'see_improvements_recently',
       'informed_about_new_products', 'find_productst_i_want',
       'i_enjoy_buying'],
      dtype='object')

In [9]:
# Train Test Split:
import numpy as np 
train_data, test_data = np.split(model_data.sample(frac=1, random_state = 1729),[int(0.7*len(model_data))])
print(train_data.shape, test_data.shape)

(5224, 16) (2239, 16)


In [10]:
# Saving Training and Testing datasets in S3:

import os
train_data = pd.DataFrame(train_data)
train_data= train_data.to_csv('train.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
s3_input_train = sagemaker.TrainingInput(s3_data='s3://{}/{}/train'.format(bucket_name,prefix), content_type='csv')

In [11]:
test_data = pd.DataFrame(test_data)
test_data= test_data.to_csv('test.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'test/test.csv')).upload_file('test.csv')
s3_input_test = sagemaker.TrainingInput(s3_data='s3://{}/{}/test'.format(bucket_name,prefix), content_type='csv')

# Model Building XGBoost:

In [13]:
container = get_image_uri(boto3.Session().region_name,'xgboost',repo_version= '1.0-1')

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [24]:
# Hyperparameter Tuning:
hyperparameters = {
        "max_depth":"5",
        "eta":"0.2",
        "gamma":"4",
        "min_child_weight":"6",
        "subsample":"0.7",
        "objective":"binary:logistic",
        "num_round":50
        }

In [25]:
# Construction of Sagemaker estimator that will call xgboost-container:
estimator = sagemaker.estimator.Estimator(image_uri=container, 
                                          hyperparameters=hyperparameters,
                                          role=sagemaker.get_execution_role(),
                                          train_instance_count=1, 
                                          train_instance_type='ml.m5.2xlarge', 
                                          train_volume_size=5, # 5 GB 
                                          output_path=output_path,
                                          train_use_spot_instances=True,
                                          train_max_run=300,
                                          train_max_wait=600
                                         )

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_max_run has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_use_spot_instances has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_max_wait has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_volume_size has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [26]:
estimator.fit({'train':s3_input_train, 'validation':s3_input_test})

2021-02-18 22:32:27 Starting - Starting the training job...
2021-02-18 22:32:51 Starting - Launching requested ML instancesProfilerReport-1613687547: InProgress
......
2021-02-18 22:33:51 Starting - Preparing the instances for training...
2021-02-18 22:34:27 Downloading - Downloading input data
2021-02-18 22:34:27 Training - Downloading the training image......
2021-02-18 22:35:23 Uploading - Uploading generated training model[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of 

# Deploying ML Model:

In [27]:
xgb_predictor = estimator.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')

---------------!