# Instructor Do: Create and Deploy a Machine Learning Model in Amazon SageMaker

In [1]:
# Initial imports
import os
import io
import json
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


# Amazon SageMaker and related imports
import sagemaker
import sagemaker.amazon.common as smac
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
import boto3  # AWS Python sdk

%matplotlib inline

  from pandas.core.computation.check import NUMEXPR_INSTALLED


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


## Reading Data

In [2]:
# Read the weather features data
file_path = Path("X_data.csv")
X = pd.read_csv(file_path)

# Transforming the "TempAvgF" column to a vector
X = X.to_numpy()

In [3]:
# Read the target data (precipitation sum inches)
file_path = Path("y_data.csv")
y = pd.read_csv(file_path)

# Transforming y into a vector
y = y.iloc[:, 0].values

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Creating the Linear Regression Model

### Initial Configurations

In [5]:
# Define Amazon S3 Bucket to store model data
bucket = "fel-final-project-west"  #'<your_s3_bucket_name_here>'

# Define prefix for data files
prefix = "covid2020-regression"

# AWS IAM role
role = get_execution_role()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


### Uploading Training Data to Amazon S3

In [6]:
# Encode the training data as Protocol Buffer
buf = io.BytesIO()
vectors = np.array(X_train).astype("float32")
labels = np.array(y_train).astype("float32")
smac.write_numpy_to_dense_tensor(buf, vectors, labels)
buf.seek(0)

# Upload encoded training data to Amazon S3
key = "linear_train.data"
boto3.resource("s3").Bucket(bucket).Object(
    os.path.join(prefix, "train", key)
).upload_fileobj(buf)
s3_train_data = "s3://{}/{}/train/{}".format(bucket, prefix, key)
print("Training data uploaded to: {}".format(s3_train_data))

Training data uploaded to: s3://fel-final-project-west/covid2020-regression/train/linear_train.data


### Upload Test Data to Amazon S3

In [7]:
# Encode the testing data as Protocol Buffer
buf = io.BytesIO()
vectors = np.array(X_test).astype("float32")
labels = np.array(y_test).astype("float32")
smac.write_numpy_to_dense_tensor(buf, vectors, labels)
buf.seek(0)

# Upload encoded testing data to Amazon S3
key = "linear_test.data"
boto3.resource("s3").Bucket(bucket).Object(
    os.path.join(prefix, "test", key)
).upload_fileobj(buf)
s3_test_data = "s3://{}/{}/test/{}".format(bucket, prefix, key)
print("Testing data uploaded to: {}".format(s3_test_data))

Testing data uploaded to: s3://fel-final-project-west/covid2020-regression/test/linear_test.data


### Creating Model Instance

In [8]:
# Create an instance of the linear learner algorithm
container = get_image_uri(boto3.Session().region_name, "linear-learner")

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [9]:
# Start the Amazon SageMaker session
sess = sagemaker.Session()

# Create an instance of the linear learner estimator
linear = sagemaker.estimator.Estimator(
    container,
    role,
    train_instance_count=1,
    train_instance_type="ml.m4.xlarge",
    output_path="s3://{}/{}/output".format(bucket, prefix),
    sagemaker_session=sess,
)

# Define linear learner hyperparameters
linear.set_hyperparameters(
    feature_dim=28,
    mini_batch_size=100,
    predictor_type="binary_classifier",
    epochs=10,
    num_models=32,
    loss="logistic",
)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


## Training the Machine Learning Model

In [10]:
# Fitting the linear learner model with the training data
linear.fit({"train": s3_train_data, "test": s3_test_data})

INFO:sagemaker:Creating training-job with name: linear-learner-2023-11-16-03-37-58-753


2023-11-16 03:37:58 Starting - Starting the training job...
2023-11-16 03:38:23 Starting - Preparing the instances for training.........
2023-11-16 03:39:40 Downloading - Downloading input data...
2023-11-16 03:40:15 Training - Downloading the training image......
2023-11-16 03:41:26 Training - Training image download completed. Training in progress....[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[11/16/2023 03:41:45 INFO 140453937297216] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'o

## Deploying the Model to Make Predictions

In [11]:
# An instance of the linear-learner predictor is created
linear_predictor = linear.deploy(initial_instance_count=1, instance_type="ml.t2.medium")

INFO:sagemaker:Creating model with name: linear-learner-2023-11-16-03-56-35-657
INFO:sagemaker:Creating endpoint-config with name linear-learner-2023-11-16-03-56-35-657
INFO:sagemaker:Creating endpoint with name linear-learner-2023-11-16-03-56-35-657


-------------------!

In [12]:
# Linear predictor configurations
linear_predictor.serializer = CSVSerializer()
linear_predictor.deserializer = JSONDeserializer()

In [15]:
# Making some predictions using the test data.
result = linear_predictor.predict(X_test)
y_predictions = np.array([r["predicted_label"] for r in result["predictions"]])
y_predictions[:5]

array([1, 0, 0, 1, 0])

## Model Evaluation

In [16]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, y_predictions)

In [18]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test,y_predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,39928,2140
Actual 1,2330,40110


Accuracy Score : 0.9471055994698727
Classification Report
              precision    recall  f1-score   support

           0       0.94      0.95      0.95     42068
           1       0.95      0.95      0.95     42440

    accuracy                           0.95     84508
   macro avg       0.95      0.95      0.95     84508
weighted avg       0.95      0.95      0.95     84508



## Delete Amazon SageMaker end-point

In [19]:
sagemaker.Session().delete_endpoint(linear_predictor.endpoint)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker:Deleting endpoint with name: linear-learner-2023-11-16-03-56-35-657
