# Set up Env

In [1]:
import sys
import os

import sagemaker
from sagemaker import get_execution_role

# Add the parent directory to the sys.path
sys.path.insert(0, os.path.abspath(".."))

# Define IAM role
role = get_execution_role()
role

# Establish S3 bucket connection
import boto3

s3 = boto3.client("s3")
bucket = "capstone-bucket-4-friends"

# Take a look at current dir
print(os.getcwd())

from file_utilities import s3_download

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
/home/sagemaker-user/capstone-2024-summer/src/rachel/try_deploy


In [2]:
# Import libraries and packages
from sklearn import datasets
import numpy as np
from sklearn.model_selection import train_test_split
import joblib

import xgboost as xgb
print(xgb.__version__)

1.7.6


#### Load Iris data set

In [3]:
# Load Iris Data
iris = datasets.load_iris()

In [4]:
# Split dataset into features and target variable
X = iris.data  
y = iris.target

In [5]:
print(X.shape)
X[:5]

(150, 4)


array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [6]:
print(y.shape)
y[:5]

(150,)


array([0, 0, 0, 0, 0])

#### Split Train, Test data sets for modeling 

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100, stratify=y)

In [8]:
X_train.shape

(120, 4)

#### Train a XGBoost Classifier Model

In [9]:
bt = xgb.XGBClassifier(
    max_depth=5, learning_rate=0.2, n_estimators=10, objective="multi:softmax"
)  # Setup xgboost model
bt.fit(X_train, y_train, verbose=False)  # Train it to our data

In [10]:
# Predict and compare with real labels
bt.predict(X_test)

array([1, 1, 1, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 0, 0, 1, 0, 2, 0, 1,
       1, 0, 1, 0, 0, 1, 2, 1], dtype=int32)

In [11]:
y_test

array([1, 1, 2, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 0, 2, 0, 0, 1, 0, 2, 0, 1,
       1, 0, 2, 0, 0, 2, 2, 1])

#### Save the model as a file using joblib dump

In [12]:
model_file_name = "DEMO-local-xgboost-model"

joblib.dump(bt, model_file_name) 

['DEMO-local-xgboost-model']

#### Try loading the saved model and test it to make sure everything is fine for deployment

In [13]:
point_X = X_test[0:5]
print(point_X)

np.savetxt("test_point.csv", point_X, delimiter=",")

[[5.4 3.  4.5 1.5]
 [5.6 3.  4.1 1.3]
 [6.3 2.8 5.1 1.5]
 [6.  3.  4.8 1.8]
 [5.1 3.3 1.7 0.5]]


In [14]:
file_name = (
    "test_point.csv"
)

with open(file_name, "r") as f:
    mypayload = np.loadtxt(f, delimiter=",")

print(mypayload)

[[5.4 3.  4.5 1.5]
 [5.6 3.  4.1 1.3]
 [6.3 2.8 5.1 1.5]
 [6.  3.  4.8 1.8]
 [5.1 3.3 1.7 0.5]]


In [15]:
bt1 = joblib.load(model_file_name)

In [16]:
bt1.predict(mypayload)

array([1, 1, 1, 2, 0], dtype=int32)