In [2]:
!pip3 install -q -U 'sagemaker>=2.126.0'

In [3]:
!pip3 install autogluon --no-cache-dir

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [4]:
import sagemaker
import pandas as pd
import os
import boto3

In [5]:
# Helper wrappers referred earlier
from ag_model import (
    AutoGluonSagemakerEstimator,
    AutoGluonNonRepackInferenceModel,
    AutoGluonSagemakerInferenceModel,
    AutoGluonRealtimePredictor,
)

from sagemaker import utils
from sagemaker.serializers import CSVSerializer

role = sagemaker.get_execution_role()
session = sagemaker.Session()
region = session._region_name

bucket = "mle-capstone"
s3_prefix = f"{utils.sagemaker_timestamp()}"
s3_data_prefix = "final-data/Final-Capstone-Data.zip"
output_path = f"s3://{bucket}/{s3_prefix}/output/"

In [38]:
# Command to download and unzip data (This command needs to be executed once)
session.download_data(path="data", bucket=bucket, key_prefix=s3_data_prefix)

In [39]:
!unzip -o data/Final-Capstone-Data.zip

Archive:  data/Final-Capstone-Data.zip
   creating: Final-Capstone-Data/
  inflating: Final-Capstone-Data/valid.csv  
  inflating: Final-Capstone-Data/test.csv  
  inflating: Final-Capstone-Data/train.csv  


In [46]:
ag = AutoGluonSagemakerEstimator(
    role=role,
    entry_point="scripts/training_script.py",
    region=region,
    instance_count=1,
    instance_type="ml.m5.2xlarge",  # You might want to use GPU instances for Text/Image/MultiModal Predictors etc
    framework_version="0.7",  # Replace this with the AutoGLuon DLC container version you want to use
    py_version="py39",
    base_job_name="autogluon-train",
    # Disable torch profiler instrumentation to avoid deserialization issues during deployment
    disable_profiler=True,
    debugger_hook_config=False,
)

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


In [47]:
train_input = ag.sagemaker_session.upload_data(
    path=os.path.join("Final-Capstone-Data", "train.csv"), bucket=bucket, key_prefix=s3_prefix
)
valid_input = ag.sagemaker_session.upload_data(
    path=os.path.join("Final-Capstone-Data", "valid.csv"), bucket=bucket, key_prefix=s3_prefix
)
eval_input = ag.sagemaker_session.upload_data(
    path=os.path.join("Final-Capstone-Data", "test.csv"), bucket=bucket, key_prefix=s3_prefix
)
config_input = ag.sagemaker_session.upload_data(
    path=os.path.join("config", "config-med.yaml"), bucket=bucket, key_prefix=s3_prefix
)
inference_script = ag.sagemaker_session.upload_data(
    path=os.path.join("scripts", "inference_script.py"), bucket=bucket, key_prefix=s3_prefix
)

job_name = utils.unique_name_from_base("test-autogluon-image")
ag.fit(
    {
        "config": config_input,
        "train": train_input,
        "valid": valid_input,
        "test": eval_input,
        "serving": inference_script
    },
    job_name=job_name,
)

INFO:sagemaker:Creating training-job with name: test-autogluon-image-1681329605-8167


2023-04-12 20:00:09 Starting - Starting the training job...
2023-04-12 20:00:24 Starting - Preparing the instances for training...
2023-04-12 20:01:14 Downloading - Downloading input data...
2023-04-12 20:01:34 Training - Downloading the training image.........
2023-04-12 20:03:05 Training - Training image download completed. Training in progress....[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2023-04-12 20:03:33,954 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2023-04-12 20:03:33,956 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-04-12 20:03:33,958 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2023-04-12 20:03:33,968 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2023-04-12 20:03:33,970 sagem

# Deployment

In [50]:
!aws s3 cp {ag.model_data} .

download: s3://sagemaker-us-east-1-004538843871/test-autogluon-image-1681329605-8167/output/model.tar.gz to ./model.tar.gz


In [51]:
!ls -alF model.tar.gz

-rw-rw-r-- 1 ec2-user ec2-user 406665479 Apr 12 20:16 model.tar.gz


In [52]:
endpoint_name = sagemaker.utils.unique_name_from_base("autogluon-serving-trained-model")

model_data = session.upload_data(
    path=os.path.join(".", "model.tar.gz"), key_prefix=f"{endpoint_name}/models"
)

In [24]:
instance_type = "ml.m5.2xlarge"  # You might want to use GPU instances, i.e. ml.g4dn.2xlarge for Text/Image/MultiModal Predictors etc

model = AutoGluonNonRepackInferenceModel(
    model_data=model_data,
    role=role,
    region=region,
    framework_version="0.7",  # Replace this with the AutoGluon DLC container version you want to use
    py_version="py39",
    instance_type=instance_type,
    source_dir="scripts",
    entry_point="inference_script.py"  # example: "tabular_serve.py"
)

In [25]:
model.deploy(initial_instance_count=1, serializer=CSVSerializer(), instance_type=instance_type)

------!

In [27]:
predictor = AutoGluonRealtimePredictor(model.endpoint_name)

In [26]:
model.endpoint_name

'autogluon-inference-2023-04-13-00-11-13-621'

In [28]:
df_test = pd.read_csv("Final-Capstone-Data/test.csv")

In [41]:
preds = predictor.predict(df_test.drop(columns=["customer_ID", "S_2", "target"], axis=1))

In [45]:
p = preds[["pred"]]
p = p.join(df_test["target"]).rename(columns={"target": "actual"})
p.head()

Unnamed: 0,pred,actual
0,1,1
1,1,1
2,0,0
3,1,1
4,1,1


In [44]:
print(f"{(p.pred==p.actual).astype(int).sum()}/{len(p)} are correct")

4982/6000 are correct
