# Developement of Random fores model with SageMaker

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from time import time
import sagemaker
from sagemaker import get_execution_role
import json
import boto3

## Initializations

In [2]:
sess = sagemaker.Session()
role = get_execution_role()
bucket = 'sagemaker-churns-prediction'
region_name = boto3.Session().region_name
account = sess.boto_session.client('sts').get_caller_identity()['Account']
image = '{}.dkr.ecr.{}.amazonaws.com/sagemaker-random-forest:latest'.format(account, region_name)
print(f'Account: {account}')
print(f'Role: {role}')
print(f'Image: {image}')

Account: 254464376720
Role: arn:aws:iam::254464376720:role/service-role/AmazonSageMaker-ExecutionRole-20200522T014344
Image: 254464376720.dkr.ecr.us-east-1.amazonaws.com/sagemaker-random-forest:latest


## Build and push docker container

In [3]:
%%sh
docker logout

Not logged in to https://index.docker.io/v1/


In [4]:
%%sh

# This script shows how to build the Docker image and push it to ECR to be ready for use
# by SageMaker.

# The argument to this script is the image name. This will be used as the image on the local
# machine and combined with the account and region to form the repository name for ECR.
algorithm_name=sagemaker-random-forest

cd container

chmod +x random_forest/train
chmod +x random_forest/serve

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-2}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}


Login Succeeded
Sending build context to Docker daemon  2.134MB
Step 1/9 : FROM ubuntu:16.04
 ---> 005d2078bdfa
Step 2/9 : MAINTAINER Amazon AI <sage-learner@amazon.com>
 ---> Using cache
 ---> eb343bceaa44
Step 3/9 : RUN apt-get -y update && apt-get install -y --no-install-recommends          wget          python          nginx          ca-certificates     && rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> 0484954b4436
Step 4/9 : RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py &&     pip install numpy==1.16.2 scipy==1.2.1 scikit-learn==0.20.2 pandas flask gevent gunicorn &&         (cd /usr/local/lib/python2.7/dist-packages/scipy/.libs; rm *; ln ../../numpy/.libs/* .) &&         rm -rf /root/.cache
 ---> Using cache
 ---> 6228845cd650
Step 5/9 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> b66cae485a7b
Step 6/9 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> c3c8090cfe46
Step 7/9 : ENV PATH="/opt/program:${PATH}"
 ---> Using cache
 ---> 24b3dc2

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



## Model training

In [5]:
clf = sagemaker.estimator.Estimator(image_name=image, role=role, 
                                    train_instance_count=1,
                                   train_instance_type='ml.c4.2xlarge',
                                   output_path='s3://{}/output'.format('sagemaker-churns-prediction'),
                                   sagemaker_session=sess)

In [6]:
data_location = 's3://{}/{}'.format(bucket, 'raw_data/churn_train.csv')
data_location

's3://sagemaker-churns-prediction/raw_data/churn_train.csv'

In [7]:
%time clf.fit(data_location)

2020-06-02 21:36:46 Starting - Starting the training job...
2020-06-02 21:36:48 Starting - Launching requested ML instances.........
2020-06-02 21:38:30 Starting - Preparing the instances for training......
2020-06-02 21:39:47 Downloading - Downloading input data...
2020-06-02 21:39:53 Training - Downloading the training image..[34mStarting the training.[0m
[34mTraining complete.[0m

2020-06-02 21:40:35 Uploading - Uploading generated training model
2020-06-02 21:40:35 Completed - Training job completed
Training seconds: 48
Billable seconds: 48
CPU times: user 481 ms, sys: 20.9 ms, total: 502 ms
Wall time: 4min 12s
