# Import 

In [6]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.processing import SKLearnProcessor

region = boto3.session.Session().region_name

role = get_execution_role()
sklearn_processor = SKLearnProcessor(framework_version='0.20.0',
                                     role=role,
                                     instance_type='ml.m5.xlarge',
                                     instance_count=1)

# Script for Custom model

In [16]:
%%writefile script.py
import argparse
import pandas as pd
import os
#from sklearn.externals
import joblib
from sklearn.linear_model import LinearRegression
import numpy as np
import subprocess
import sys

def install(package):
    subprocess.call([sys.executable, "-m", "pip", "install", package])

install('s3fs')

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    # Sagemaker specific arguments. Defaults are set in the environment variables.
    parser.add_argument('--output-data-dir', type=str, default='s3://medicalcost-bucket/output_data')
    parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
    parser.add_argument('--train', type=str, default='s3://medicalcost-bucket/data.csv')

    args = parser.parse_args()

    # Load the training data into a Pandas dataframe and make sure it is in the appropriate format
    train_data = pd.read_csv(args.train)
    
    train_y = train_data['charges']
    train_X = train_data.drop('charges', axis =1)

    # Now, fit the nearest neighbors model
    linear = LinearRegression()
    model_linear= linear.fit(train_X, train_y)
    print('model has been fitted')

    # Save the model to the output location in S3
    joblib.dump(model_linear, os.path.join(args.model_dir, "model.joblib"))
def model_fn(model_dir):
    """Deserialized and return fitted model

    Note that this should have the same name as the serialized model in the main method
    """
    regressor = joblib.load(os.path.join(model_dir, "model.joblib"))
    return regressor

Overwriting script.py


# Estimator for custom model

In [17]:
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"
script_path = 'script.py'

sklearn = SKLearn(
    entry_point=script_path,
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.c4.xlarge",
    role=role)

# Train

In [18]:
sklearn.fit({'train': 's3://medicalcost-bucket/data.csv'})

2020-12-14 05:52:29 Starting - Starting the training job...
2020-12-14 05:52:53 Starting - Launching requested ML instancesProfilerReport-1607925149: InProgress
......
2020-12-14 05:53:53 Starting - Preparing the instances for training......
2020-12-14 05:54:54 Downloading - Downloading input data
2020-12-14 05:54:54 Training - Downloading the training image...
2020-12-14 05:55:25 Uploading - Uploading generated training model[34m2020-12-14 05:55:15,005 sagemaker-training-toolkit INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-12-14 05:55:15,006 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-12-14 05:55:15,016 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-12-14 05:55:15,347 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-12-14 05:55:15,362 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus in

# Deploy

In [19]:
predictor = sklearn.deploy(initial_instance_count=1, instance_type="ml.m5.xlarge")

-------------!

# Test Dataset

In [None]:
#Test endpoint
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.processing import SKLearnProcessor

region = boto3.session.Session().region_name

role = get_execution_role()

In [None]:
import itertools
import pandas as pd

input_data = 's3://medicalcost-bucket/deploy_test.csv'.format(region)
test_data = pd.read_csv(input_data)

test_y = test_data['charges']
test_X = test_data.drop('charges', axis =1)

In [None]:
print(predictor.predict(test_X.values))
print(test_y.values)

# Delete endpoint

In [None]:
predictor.delete_endpoint()