In [23]:
import sagemaker
import boto3
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.session import s3_input, Session

In [86]:
bucket_name = 'churnbnking1'
my_region = boto3.session.Session().region_name
print(my_region)

us-east-1


In [27]:
s3 = boto3.resource('s3')
try:
    if my_region == 'us-east-1':
        s3.create_bucket(Bucket=bucket_name)
    print("s3 bucket created successfully")
except Exception as e:
    print("s3 error", e)

s3 bucket created successfully


In [87]:
prefix = "xgboost_model"
op_path = 's3://{}/{}/output4'.format(bucket_name,prefix)

In [32]:
import pandas as pd

In [35]:
df = pd.read_csv("churn.csv")

In [62]:
df.columns

Index(['CustomerId', 'Surname', 'CreditScore', 'Geography', 'Gender', 'Age',
       'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember',
       'EstimatedSalary', 'Exited'],
      dtype='object')

In [63]:
df.drop(['CustomerId', 'Surname','Geography'],axis=1,inplace=True)

In [90]:
df.drop(['Gender'],axis=1,inplace=True)

In [64]:
import numpy as np

In [91]:
train_data,test_data = np.split(df.sample(frac=1,random_state=2),[int(0.8*len(df))])

In [92]:
import os
pd.concat([train_data['Exited'],train_data.drop('Exited',axis=1)],axis=1).to_csv('train.csv', index=False, header=False)

In [93]:
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'train/train.csv')).upload_file('train.csv')
s3_input_train = sagemaker.session.s3_input(s3_data = 's3://{}/{}/train'.format(bucket_name,prefix), content_type='csv')

The class sagemaker.session.s3_input has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [95]:
pd.concat([test_data['Exited'],test_data.drop('Exited',axis=1)],axis=1).to_csv('test.csv', index=False, header=False)

In [96]:
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'test/test.csv')).upload_file('test.csv')
s3_input_test = sagemaker.session.s3_input(s3_data = 's3://{}/{}/test'.format(bucket_name,prefix), content_type='csv')

The class sagemaker.session.s3_input has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [97]:
container = get_image_uri(boto3.Session().region_name, 'xgboost', repo_version='1.0-1')

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [98]:
container

'683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3'

In [99]:
hp = {'max_depth':'5','eta':'0.2','gamma':'4','min_child_weight':'5','subsample':'0.7','objective':'binary:logistic','num_round' : '1000'}

In [100]:
estimator = sagemaker.estimator.Estimator(image_uri = container,hyperparameters=hp,role=sagemaker.get_execution_role(),
                                         train_instance_count=1,train_instance_type='ml.m5.2xlarge',
                                         train_volume_size=5,output_path=op_path, train_use_spot_instances=True,train_max_run=200,
                                         train_max_wait=500)

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_max_run has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_use_spot_instances has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_max_wait has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_volume_size has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [101]:
estimator.fit({'train':s3_input_train, 'validation':s3_input_test})

2021-11-26 05:16:50 Starting - Starting the training job...
2021-11-26 05:17:13 Starting - Launching requested ML instancesProfilerReport-1637903810: InProgress
...
2021-11-26 05:17:50 Starting - Preparing the instances for training.........
2021-11-26 05:19:21 Downloading - Downloading input data...
2021-11-26 05:19:39 Training - Downloading the training image..[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value binary:logistic to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)[0m
[34mINFO:sagemaker_xgboost_container.training:Running XGBoost Sagemaker in algorithm mode[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34mINFO:root:Determined delimiter of CSV input is ','[0m
[34m[05:19:57] 8000x8 matrix with 64000 ent

In [102]:
xgb_predictor = estimator.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')

----------!

In [106]:
from sagemaker.predictor import csv_serializer
test_data_array = test_data.drop(['Exited'],axis=1).values
#xgb_predictor.content_type = 'text/csv'
xgb_predictor.serializer = csv_serializer
predictions = xgb_predictor.predict(test_data_array, initial_args={'ContentType': 'text/csv'}).decode('utf-8')
predictions_array = np.fromstring(predictions[1:],sep=',')
print(predictions_array.shape)

The csv_serializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


(2000,)


In [107]:
predictions_array

array([0.22591969, 0.84921354, 0.0093817 , ..., 0.00319957, 0.95756274,
       0.00371034])

In [115]:
cm=pd.crosstab(index=test_data['Exited'],columns=np.round(predictions_array),rownames=['Observed'],colnames=['Predicted'])
tn = cm.iloc[0,0]; fn = cm.iloc[1,0]; tp = cm.iloc[1,1]; fp = cm.iloc[0,1]; p = (tp+tn)/(tp+tn+fp+fn)*100
print("\n{0:<20}{1:<4.1f}%\n".format("overall classification rate:",p))
print("{0:<15}{1:<15}{2:>8}".format("Predicted","Not Exited","Exited"))
print("Observed")
print("{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})".format("Not Exited", tn/(tn+fn)*100,tn, fp/(tp+fp)*100,fp))
print("{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \n".format("Exited", fn/(tn+fn)*100,fn, fp/(tp+fp)*100,tp))


overall classification rate:84.5%

Predicted      Not Exited       Exited
Observed
Not Exited     88% (1505)    35% (101)
Exited          12% (208)     35% (186) 



In [118]:
# Deleteing the endpoints

In [None]:
sagemaker.session.Session().delete_endpoint(xgb_predictor.endpoint)
bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)
bucket_to_delete.objects.all().delete()