# Direct Marketing with Amazon SageMaker Autopilot

Own code to learn from the other sagemaker_autopilot_direct_marketing.ipynb

- [Link to source](https://sagemaker-examples.readthedocs.io/en/latest/autopilot/sagemaker_autopilot_direct_marketing.html)
---

## Normal Conf

In [12]:
import sagemaker
import boto3
from sagemaker import get_execution_role

region = boto3.Session().region_name
sess = sagemaker.Session()
bucket = "test-sagemaker-examples-1357942113492"
prefix = "DEMO_AutoPilot"

role = get_execution_role()

sm = boto3.Session().client(service_name="sagemaker", region_name=region)

---

## Download the dataset

In [13]:
!apt-get install unzip
!wget -N https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip
!unzip -o bank-additional.zip

local_data_path = "./bank-additional/bank-additional-full.csv"

Reading package lists... Done
Building dependency tree       
Reading state information... Done
unzip is already the newest version (6.0-23+deb10u3).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.
--2023-05-24 09:18:53--  https://sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com/autopilot/direct_marketing/bank-additional.zip
Resolving sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com (sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com)... 52.218.252.185, 3.5.77.162, 52.92.179.50, ...
Connecting to sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com (sagemaker-sample-data-us-west-2.s3-us-west-2.amazonaws.com)|52.218.252.185|:443... connected.
HTTP request sent, awaiting response... 304 Not Modified
File ‘bank-additional.zip’ not modified on server. Omitting download.

Archive:  bank-additional.zip
  inflating: bank-additional/bank-additional-names.txt  
  inflating: bank-additional/bank-additional.csv  
  inflating: bank-additional/bank-a

In [14]:
# Check that all the columns and rows seem okay
import pandas as pd

data = pd.read_csv(local_data_path)
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 10)
data

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,261,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,149,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,226,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,151,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,307,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41183,73,retired,married,professional.course,no,yes,no,cellular,nov,fri,334,1,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,yes
41184,46,blue-collar,married,professional.course,no,no,no,cellular,nov,fri,383,1,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,no
41185,56,retired,married,university.degree,no,yes,no,cellular,nov,fri,189,2,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,no
41186,44,technician,married,professional.course,no,no,no,cellular,nov,fri,442,1,999,0,nonexistent,-1.1,94.767,-50.8,1.028,4963.6,yes


---

## Split the data to get batch inference at the end

In [15]:
train_data = data.sample(frac=0.8, random_state=200)
test_data = data.drop(train_data.index)
test_data_no_target = test_data.drop(columns=["y"])

---

## Upload to s3

In [16]:
train_file = "train_data.csv"
train_data.to_csv(train_file, index=False, header=True)
train_data_s3_path = sess.upload_data(
    path=train_file, 
    bucket=bucket,
    key_prefix=prefix + "/train")

test_file = "test_data.csv"
test_data_no_target.to_csv(test_file, index=False, header=False)
test_data_s3_path = sess.upload_data(
    path=test_file,
    bucket=bucket,
    key_prefix=prefix + "/test")
print("Data uploaded.")

Data uploaded.


---

## Setting up SageMaker Autopilot

In [17]:
auto_ml_job_config = {"CompletionCriteria": {"MaxCandidates": 5}}

input_data_config = [
    {
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3Uri": "s3://{}/{}/train".format(bucket, prefix),
            }
        },
        "TargetAttributeName": "y",
    }
]

output_data_config = {"S3OutputPath": "s3://{}/{}/output".format(bucket, prefix)}

---

## Launch Autopilot

In [18]:
from time import gmtime, strftime, sleep

timestamp_suffix = strftime("%d-%H-%M-%S", gmtime())

auto_ml_job_name = "automl-banking-" + timestamp_suffix
print("AutoMLJobName" + auto_ml_job_name)

sm.create_auto_ml_job(
    AutoMLJobName=auto_ml_job_name,
    InputDataConfig=input_data_config,
    OutputDataConfig=output_data_config,
    AutoMLJobConfig=auto_ml_job_config,
    RoleArn=role,
)

AutoMLJobNameautoml-banking-24-09-19-15


{'AutoMLJobArn': 'arn:aws:sagemaker:eu-west-1:790592228004:automl-job/automl-banking-24-09-19-15',
 'ResponseMetadata': {'RequestId': '5acc0747-1c4c-4f83-b6dc-c92eca9d76c5',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '5acc0747-1c4c-4f83-b6dc-c92eca9d76c5',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '97',
   'date': 'Wed, 24 May 2023 09:19:15 GMT'},
  'RetryAttempts': 0}}

In [19]:
print("JobStatus - Secondary Status")
print("------------------------------")


describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print(describe_response["AutoMLJobStatus"] + " - " + describe_response["AutoMLJobSecondaryStatus"])
job_run_status = describe_response["AutoMLJobStatus"]

while job_run_status not in ("Failed", "Completed", "Stopped"):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response["AutoMLJobStatus"]

    print(
        describe_response["AutoMLJobStatus"] + " - " + describe_response["AutoMLJobSecondaryStatus"]
    )
    sleep(30)

JobStatus - Secondary Status
------------------------------
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - Model

In [20]:
best_candidate = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)["BestCandidate"]
best_candidate_name = best_candidate["CandidateName"]
print(best_candidate)
print("\n")
print("CandidateName: " + best_candidate_name)
print(
    "FinalAutoMLJobObjectiveMetricName: "
    + best_candidate["FinalAutoMLJobObjectiveMetric"]["MetricName"]
)
print(
    "FinalAutoMLJobObjectiveMetricValue: "
    + str(best_candidate["FinalAutoMLJobObjectiveMetric"]["Value"])
)

{'CandidateName': 'automl-banking-24-09-19-15rxu7Fa-003-510a817f', 'FinalAutoMLJobObjectiveMetric': {'MetricName': 'validation:f1_binary', 'Value': 0.6088600158691406, 'StandardMetricName': 'F1'}, 'ObjectiveStatus': 'Succeeded', 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::ProcessingJob', 'CandidateStepArn': 'arn:aws:sagemaker:eu-west-1:790592228004:processing-job/automl-banking-24-09-19-15-db-1-84269eba4a594924938bd56ad5eb3a4', 'CandidateStepName': 'automl-banking-24-09-19-15-db-1-84269eba4a594924938bd56ad5eb3a4'}, {'CandidateStepType': 'AWS::SageMaker::TrainingJob', 'CandidateStepArn': 'arn:aws:sagemaker:eu-west-1:790592228004:training-job/automl-banking-24-09-19-15-dpp1-1-681d02f39cd948d3bc80d90c8098d', 'CandidateStepName': 'automl-banking-24-09-19-15-dpp1-1-681d02f39cd948d3bc80d90c8098d'}, {'CandidateStepType': 'AWS::SageMaker::TransformJob', 'CandidateStepArn': 'arn:aws:sagemaker:eu-west-1:790592228004:transform-job/automl-banking-24-09-19-15-dpp1-csv-1-1a64e888dfa64d

In [21]:
model_name = "automl-banking-model-" + timestamp_suffix

model = sm.create_model(
    Containers=best_candidate["InferenceContainers"],
    ModelName=model_name,
    ExecutionRoleArn=role
)

print("Model ARN corresponding to the best candidate is : {}".format(model["ModelArn"]))

Model ARN corresponding to the best candidate is : arn:aws:sagemaker:eu-west-1:790592228004:model/automl-banking-model-24-09-19-15


In [23]:
transform_job_name = "automl-banking-transform-" + timestamp_suffix

transform_input = {
    "DataSource": {"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": test_data_s3_path}},
    "ContentType": "text/csv",
    "CompressionType": "None",
    "SplitType": "Line",
}

transform_output = {
    "S3OutputPath": "s3://{}/{}/inference-results".format(bucket, prefix),
}

transform_resources = {"InstanceType": "ml.m5.4xlarge", "InstanceCount": 1}

sm.create_transform_job(
    TransformJobName=transform_job_name,
    ModelName=model_name,
    TransformInput=transform_input,
    TransformOutput=transform_output,
    TransformResources=transform_resources,
)

{'TransformJobArn': 'arn:aws:sagemaker:eu-west-1:790592228004:transform-job/automl-banking-transform-24-09-19-15',
 'ResponseMetadata': {'RequestId': 'bf35e594-1d70-4f6c-b923-e27557a4561d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'bf35e594-1d70-4f6c-b923-e27557a4561d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '113',
   'date': 'Wed, 24 May 2023 10:18:45 GMT'},
  'RetryAttempts': 0}}

In [24]:
print("JobStatus")
print("----------")


describe_response = sm.describe_transform_job(TransformJobName=transform_job_name)
job_run_status = describe_response["TransformJobStatus"]
print(job_run_status)

while job_run_status not in ("Failed", "Completed", "Stopped"):
    describe_response = sm.describe_transform_job(TransformJobName=transform_job_name)
    job_run_status = describe_response["TransformJobStatus"]
    print(job_run_status)
    sleep(30)

JobStatus
----------
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
Completed


In [25]:
s3_output_key = "{}/inference-results/test_data.csv.out".format(prefix)
local_inference_results_path = "inference_results.csv"

s3 = boto3.resource("s3")
inference_results_bucket = s3.Bucket(bucket)

inference_results_bucket.download_file(s3_output_key, local_inference_results_path)

data = pd.read_csv(local_inference_results_path, sep=";")
pd.set_option("display.max_rows", 10)
data

Unnamed: 0,no
0,no
1,no
2,no
3,no
4,no
...,...
8232,yes
8233,yes
8234,no
8235,yes


---

## View other candidates explored by SageMaker Autopilot

In [26]:
candidates = sm.list_candidates_for_auto_ml_job(
    AutoMLJobName=auto_ml_job_name, 
    SortBy="FinalObjectiveMetricValue")["Candidates"]
index = 1
for candidate in candidates:
    print(
        str(index)
        + " "
        + candidate["CandidateName"]
        + " "
        + str(candidate["FinalAutoMLJobObjectiveMetric"]["Value"])
    )
    index += 1

1 automl-banking-24-09-19-15rxu7Fa-003-510a817f 0.6088600158691406
2 automl-banking-24-09-19-15rxu7Fa-001-7de0dc3a 0.6081100106239319
3 automl-banking-24-09-19-15rxu7Fa-004-3ac3ab35 0.6073399782180786
4 automl-banking-24-09-19-15rxu7Fa-002-d66b653f 0.6068099737167358
5 automl-banking-24-09-19-15rxu7Fa-005-33f00766 0.26718953251838684


---

## Candidate Generation Notebook

In [27]:
sm.describe_auto_ml_job(
    AutoMLJobName=auto_ml_job_name)["AutoMLJobArtifacts"][
    "CandidateDefinitionNotebookLocation"
]

's3://test-sagemaker-examples-1357942113492/DEMO_AutoPilot/output/automl-banking-24-09-19-15/sagemaker-automl-candidates/automl-banking-24-09-19-15-pr-1-cda543afc1fd445d82a3e9babd81d18/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb'

In [28]:
sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)["AutoMLJobArtifacts"][
    "DataExplorationNotebookLocation"
]

's3://test-sagemaker-examples-1357942113492/DEMO_AutoPilot/output/automl-banking-24-09-19-15/sagemaker-automl-candidates/automl-banking-24-09-19-15-pr-1-cda543afc1fd445d82a3e9babd81d18/notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb'

---

## Cleanup

In [29]:
# s3 = boto3.resource('s3')
# bucket = s3.Bucket(bucket)

# job_outputs_prefix = '{}/output/{}'.format(prefix,auto_ml_job_name)
# bucket.objects.filter(Prefix=job_outputs_prefix).delete()

[{'ResponseMetadata': {'RequestId': '7FBPAV6Q603X7XV8',
   'HostId': 'tPQTEKtTrNXfLVcFwJylU32U+6ZFqwOlNuUnVm8ytuNw92NqYQJlZj6AMyqziSYY4hoDLPqxHgc=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'tPQTEKtTrNXfLVcFwJylU32U+6ZFqwOlNuUnVm8ytuNw92NqYQJlZj6AMyqziSYY4hoDLPqxHgc=',
    'x-amz-request-id': '7FBPAV6Q603X7XV8',
    'date': 'Wed, 24 May 2023 10:48:14 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'DEMO_AutoPilot/output/automl-banking-24-09-19-15/transformed-data/dpp3/csv/train/chunk_3.csv.out'},
   {'Key': 'DEMO_AutoPilot/output/automl-banking-24-09-19-15/transformed-data/dpp1/csv/train/chunk_5.csv.out'},
   {'Key': 'DEMO_AutoPilot/output/automl-banking-24-09-19-15/transformed-data/dpp3/csv/validation/chunk_11.csv.out'},
   {'Key': 'DEMO_AutoPilot/output/automl-banking-24-09-19-15/transformed-data/dpp2/rpb/train/chunk_55.csv.out'},
   {