In [1]:
import pandas as pd
import numpy as np
import boto3
import sagemaker
import os, sys

print (sagemaker.__version__)

sess   = sagemaker.Session()
bucket = sess.default_bucket()                     
prefix = 'sagemaker/automl-dm'
region = boto3.Session().region_name

# Role when working on a notebook instance
role = "arn:aws:iam::388295382521:role/service-role/AmazonSageMaker-ExecutionRole-20201029T114207"

sm = boto3.Session().client(service_name='sagemaker',region_name=region)
sm_rt = boto3.Session().client('runtime.sagemaker', region_name=region)

2.16.1


In [2]:
data = pd.read_csv('../finalCSVforTraining2.csv', sep=',')
data.set_index('indice', inplace=True)
pd.set_option('display.max_columns', 500)     # Make sure we can see all of the columns
pd.set_option('display.max_rows', 50)         # Keep the output on one page
data[:10] # Show the first 10 lines

data.shape # (number of lines, number of columns)

train_data, test_data, _ = np.split(data.sample(frac=1, random_state=123), 
                                                  [int(0.80 * len(data)), int(len(data))])  

# Save to CSV files
train_data.to_csv('automl-train.csv', index=False, header=True, sep=',') # Need to keep column names
test_data.to_csv('automl-test.csv', index=False, header=True, sep=',')

In [None]:
sess.upload_data(path="automl-test.csv", key_prefix=prefix + "/input")

In [3]:
job_config = {
    'CompletionCriteria': {
      'MaxRuntimePerTrainingJobInSeconds': 600,
      # 'MaxCandidates': 10,
      'MaxAutoMLJobRuntimeInSeconds': 3600
    },
}

input_data_config = [{
      'DataSource': {
        'S3DataSource': {
          'S3DataType': 'S3Prefix',
          'S3Uri': 's3://{}/{}/input'.format(bucket,prefix)
        }
      },
      'TargetAttributeName': 'classification'  # the column we want to predict
    }
]

output_data_config = { 'S3OutputPath': 's3://{}/{}/output'.format(bucket,prefix) }

# Optional parameters

problem_type = 'BinaryClassification'

job_objective = { 'MetricName': 'F1' }

In [4]:
from time import gmtime, strftime, sleep
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())

auto_ml_job_name = 'automl-dm-' + timestamp_suffix
print('AutoMLJobName: ' + auto_ml_job_name)

sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=input_data_config,
                      OutputDataConfig=output_data_config,
                      AutoMLJobConfig=job_config,
                      AutoMLJobObjective=job_objective,
                      ProblemType=problem_type,
                      RoleArn=role)

AutoMLJobName: automl-dm-30-02-15-18


{'AutoMLJobArn': 'arn:aws:sagemaker:sa-east-1:388295382521:automl-job/automl-dm-30-02-15-18',
 'ResponseMetadata': {'RequestId': '76e9f94e-81d8-48fc-9dc0-7796f0ef260e',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '76e9f94e-81d8-48fc-9dc0-7796f0ef260e',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '92',
   'date': 'Fri, 30 Oct 2020 02:15:20 GMT'},
  'RetryAttempts': 0}}

In [None]:
#auto_ml_job_name = 'automl-dm-29-17-21-27'

In [5]:
%%time
job_run_status = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)['AutoMLJobStatus']

print(job_run_status)

while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response['AutoMLJobStatus']
    
    print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
    sleep(60)

InProgress
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - ModelTuning
InProgress - Mo

In [6]:
job = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
job_candidate_notebook = job['AutoMLJobArtifacts']['CandidateDefinitionNotebookLocation']
job_data_notebook = job['AutoMLJobArtifacts']['DataExplorationNotebookLocation']

print(job_candidate_notebook)
print(job_data_notebook)

s3://sagemaker-sa-east-1-388295382521/sagemaker/automl-dm/output/automl-dm-30-02-15-18/sagemaker-automl-candidates/pr-1-8cccb9c53b2a4336b9dca924f6f2bfc7811d25cf2f314d68b034fd9562/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb
s3://sagemaker-sa-east-1-388295382521/sagemaker/automl-dm/output/automl-dm-30-02-15-18/sagemaker-automl-candidates/pr-1-8cccb9c53b2a4336b9dca924f6f2bfc7811d25cf2f314d68b034fd9562/notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb


In [7]:
%%sh -s $job_candidate_notebook $job_data_notebook
aws s3 cp $1 .
aws s3 cp $2 .

download: s3://sagemaker-sa-east-1-388295382521/sagemaker/automl-dm/output/automl-dm-30-02-15-18/sagemaker-automl-candidates/pr-1-8cccb9c53b2a4336b9dca924f6f2bfc7811d25cf2f314d68b034fd9562/notebooks/SageMakerAutopilotCandidateDefinitionNotebook.ipynb to ./SageMakerAutopilotCandidateDefinitionNotebook.ipynb
download: s3://sagemaker-sa-east-1-388295382521/sagemaker/automl-dm/output/automl-dm-30-02-15-18/sagemaker-automl-candidates/pr-1-8cccb9c53b2a4336b9dca924f6f2bfc7811d25cf2f314d68b034fd9562/notebooks/SageMakerAutopilotDataExplorationNotebook.ipynb to ./SageMakerAutopilotDataExplorationNotebook.ipynb


In [8]:
from sagemaker.analytics import ExperimentAnalytics

analytics = ExperimentAnalytics(
    sagemaker_session=sess, 
    experiment_name=auto_ml_job_name+'-aws-auto-ml-job'
)

df = analytics.dataframe()
df

Unnamed: 0,TrialComponentName,DisplayName,SourceArn,SageMaker.ImageUri,SageMaker.InstanceCount,SageMaker.InstanceType,SageMaker.VolumeSizeInGB,_tuning_objective_metric,binary_classifier_model_selection_criteria,l1,learning_rate,loss,mini_batch_size,num_models,predictor_type,wd,ObjectiveMetric - Min,ObjectiveMetric - Max,ObjectiveMetric - Avg,ObjectiveMetric - StdDev,ObjectiveMetric - Last,ObjectiveMetric - Count,validation:objective_loss - Min,validation:objective_loss - Max,validation:objective_loss - Avg,validation:objective_loss - StdDev,validation:objective_loss - Last,validation:objective_loss - Count,train:progress - Min,train:progress - Max,train:progress - Avg,train:progress - StdDev,train:progress - Last,train:progress - Count,train:throughput - Min,train:throughput - Max,train:throughput - Avg,train:throughput - StdDev,train:throughput - Last,train:throughput - Count,validation:recall - Min,validation:recall - Max,validation:recall - Avg,validation:recall - StdDev,validation:recall - Last,validation:recall - Count,validation:binary_classification_accuracy - Min,validation:binary_classification_accuracy - Max,validation:binary_classification_accuracy - Avg,validation:binary_classification_accuracy - StdDev,validation:binary_classification_accuracy - Last,validation:binary_classification_accuracy - Count,validation:roc_auc_score - Min,validation:roc_auc_score - Max,validation:roc_auc_score - Avg,validation:roc_auc_score - StdDev,validation:roc_auc_score - Last,validation:roc_auc_score - Count,train:objective_loss - Min,train:objective_loss - Max,train:objective_loss - Avg,train:objective_loss - StdDev,train:objective_loss - Last,train:objective_loss - Count,validation:objective_loss:final - Min,validation:objective_loss:final - Max,validation:objective_loss:final - Avg,validation:objective_loss:final - StdDev,validation:objective_loss:final - Last,validation:objective_loss:final - Count,validation:binary_f_beta - Min,validation:binary_f_beta - Max,validation:binary_f_beta - Avg,validation:binary_f_beta - StdDev,validation:binary_f_beta - Last,validation:binary_f_beta - Count,validation:precision - Min,validation:precision - Max,validation:precision - Avg,validation:precision - StdDev,validation:precision - Last,validation:precision - Count,train - MediaType,train - Value,validation - MediaType,validation - Value,SageMaker.ModelArtifact - MediaType,SageMaker.ModelArtifact - Value,Trials,Experiments,alpha,colsample_bytree,eta,gamma,lambda,max_depth,min_child_weight,num_round,objective,save_model_on_termination,subsample,train:f1 - Min,train:f1 - Max,train:f1 - Avg,train:f1 - StdDev,train:f1 - Last,train:f1 - Count,validation:error - Min,validation:error - Max,validation:error - Avg,validation:error - StdDev,validation:error - Last,validation:error - Count,validation:f1 - Min,validation:f1 - Max,validation:f1 - Avg,validation:f1 - StdDev,validation:f1 - Last,validation:f1 - Count,train:error - Min,train:error - Max,train:error - Avg,train:error - StdDev,train:error - Last,train:error - Count,SageMaker.ModelName,SageMaker.ModelPrimary.DataUrl,SageMaker.ModelPrimary.Image,SageMaker.TransformInput - MediaType,SageMaker.TransformInput - Value,SageMaker.TransformOutput - MediaType,SageMaker.TransformOutput - Value,processor_module,sagemaker_program,sagemaker_submit_directory,code - MediaType,code - Value,input_channel_mode,job_name,label_col,max_dataset_size,SageMaker.ImageUri - MediaType,SageMaker.ImageUri - Value,ds - MediaType,ds - Value
0,tuning-job-1-bff8cbf333be4ba5b0-200-955c2299-a...,tuning-job-1-bff8cbf333be4ba5b0-200-955c2299-a...,arn:aws:sagemaker:sa-east-1:388295382521:train...,855470959533.dkr.ecr.sa-east-1.amazonaws.com/l...,1.0,ml.m5.4xlarge,50.0,validation:binary_f_beta,loss_function,1.706526e-07,0.706089,logistic,800.0,1.0,binary_classifier,1.241696e-07,0.00000,0.00000,0.000000,0.000000,0.955095,0.0,0.190488,0.956565,0.354207,0.219253,0.190488,14.0,6.0,86.0,46.307692,25.978788,86.0,13.0,11114.6842,25279.369632,19524.729076,4480.871977,24174.435699,13.0,0.0,0.0,0.0,0.0,0.965096,0.0,0.0,0.0,0.0,0.0,0.954625,0.0,0.0,0.0,0.0,0.0,0.986944,0.0,0.153967,1.531418,0.375935,0.381687,0.153967,14.0,0.0,0.0,0.0,0.0,0.179662,0.0,0.0,0.0,0.0,0.0,0.955095,0.0,0.0,0.0,0.0,0.0,0.945299,0.0,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,,s3://sagemaker-sa-east-1-388295382521/sagemake...,[tuning-job-1-bff8cbf333be4ba5b0-200-955c2299-...,[automl-dm-30-02-15-18-aws-auto-ml-job],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,tuning-job-1-bff8cbf333be4ba5b0-187-397572c7-a...,tuning-job-1-bff8cbf333be4ba5b0-187-397572c7-a...,arn:aws:sagemaker:sa-east-1:388295382521:train...,737474898029.dkr.ecr.sa-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,,,,,,,,,0.00000,0.00000,0.000000,0.000000,0.960730,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,,s3://sagemaker-sa-east-1-388295382521/sagemake...,[tuning-job-1-bff8cbf333be4ba5b0-187-397572c7-...,[automl-dm-30-02-15-18-aws-auto-ml-job],0.001639,0.507428,0.113060,0.001266,1.520203,8.0,2.814295,530.0,binary:logistic,true,0.987174,0.00000,0.00000,0.000000,0.000000,1.00000,0.0,0.00000,0.00000,0.000000,0.000000,0.03927,0.0,0.00000,0.00000,0.000000,0.000000,0.96073,0.0,0.00000,0.00000,0.000000,0.000000,0.00000,0.0,,,,,,,,,,,,,,,,,,,,
2,tuning-job-1-bff8cbf333be4ba5b0-198-db3fea39-a...,tuning-job-1-bff8cbf333be4ba5b0-198-db3fea39-a...,arn:aws:sagemaker:sa-east-1:388295382521:train...,737474898029.dkr.ecr.sa-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,,,,,,,,,0.93804,0.95375,0.950262,0.003874,0.953750,14.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,,s3://sagemaker-sa-east-1-388295382521/sagemake...,[tuning-job-1-bff8cbf333be4ba5b0-198-db3fea39-...,[automl-dm-30-02-15-18-aws-auto-ml-job],1.187551,0.360922,0.137289,0.003404,1.741124,3.0,8.632918,1006.0,binary:logistic,true,0.907727,0.93392,0.95044,0.946733,0.004119,0.95044,14.0,0.04625,0.06196,0.049738,0.003874,0.04625,14.0,0.93804,0.95375,0.950262,0.003874,0.95375,14.0,0.04955,0.06607,0.053264,0.004118,0.04955,14.0,,,,,,,,,,,,,,,,,,,,
3,tuning-job-1-bff8cbf333be4ba5b0-199-f293a221-a...,tuning-job-1-bff8cbf333be4ba5b0-199-f293a221-a...,arn:aws:sagemaker:sa-east-1:388295382521:train...,737474898029.dkr.ecr.sa-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,,,,,,,,,0.00000,0.00000,0.000000,0.000000,0.965090,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,,s3://sagemaker-sa-east-1-388295382521/sagemake...,[tuning-job-1-bff8cbf333be4ba5b0-199-f293a221-...,[automl-dm-30-02-15-18-aws-auto-ml-job],0.000002,0.452326,0.145116,0.315749,0.552549,4.0,0.437663,706.0,binary:logistic,true,0.852731,0.00000,0.00000,0.000000,0.000000,1.00000,0.0,0.00000,0.00000,0.000000,0.000000,0.03490,0.0,0.00000,0.00000,0.000000,0.000000,0.96509,0.0,0.00000,0.00000,0.000000,0.000000,0.00000,0.0,,,,,,,,,,,,,,,,,,,,
4,tuning-job-1-bff8cbf333be4ba5b0-201-522d845e-a...,tuning-job-1-bff8cbf333be4ba5b0-201-522d845e-a...,arn:aws:sagemaker:sa-east-1:388295382521:train...,737474898029.dkr.ecr.sa-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,validation:f1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,application/x-recordio-protobuf,s3://sagemaker-sa-east-1-388295382521/sagemake...,,,[tuning-job-1-bff8cbf333be4ba5b0-201-522d845e-...,[automl-dm-30-02-15-18-aws-auto-ml-job],0.000005,0.321018,0.163821,0.002083,0.603872,5.0,1.762287,764.0,binary:logistic,true,0.959084,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207,automl-dm--dpp1-1-0a654cebfd0749ac8bcc6cc74e42...,automl-dm--dpp1-1-0a654cebfd0749ac8bcc6cc74e42...,arn:aws:sagemaker:sa-east-1:388295382521:train...,737474898029.dkr.ecr.sa-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,text/csv,s3://sagemaker-sa-east-1-388295382521/sagemake...,,,,s3://sagemaker-sa-east-1-388295382521/sagemake...,[tuning-job-1-bff8cbf333be4ba5b0-154-06c19070-...,"[automl-dm-30-02-15-18-aws-auto-ml-job, automl...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,candidate_data_processors.dpp1,candidate_data_processors.trainer,/opt/ml/input/data/code,application/x-code,s3://sagemaker-sa-east-1-388295382521/sagemake...,,,,,,,,
208,automl-dm--dpp4-1-53dc0784dc364daa9aaf089c7305...,automl-dm--dpp4-1-53dc0784dc364daa9aaf089c7305...,arn:aws:sagemaker:sa-east-1:388295382521:train...,737474898029.dkr.ecr.sa-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,text/csv,s3://sagemaker-sa-east-1-388295382521/sagemake...,,,,s3://sagemaker-sa-east-1-388295382521/sagemake...,[tuning-job-1-bff8cbf333be4ba5b0-149-7c5fe21f-...,"[automl-dm-30-02-15-18-aws-auto-ml-job, automl...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,candidate_data_processors.dpp4,candidate_data_processors.trainer,/opt/ml/input/data/code,application/x-code,s3://sagemaker-sa-east-1-388295382521/sagemake...,,,,,,,,
209,automl-dm--dpp3-1-93f768369dfe49acbf002e22b3cc...,automl-dm--dpp3-1-93f768369dfe49acbf002e22b3cc...,arn:aws:sagemaker:sa-east-1:388295382521:train...,737474898029.dkr.ecr.sa-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,text/csv,s3://sagemaker-sa-east-1-388295382521/sagemake...,,,,s3://sagemaker-sa-east-1-388295382521/sagemake...,[tuning-job-1-bff8cbf333be4ba5b0-201-522d845e-...,"[automl-dm-30-02-15-18-aws-auto-ml-job, automl...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,candidate_data_processors.dpp3,candidate_data_processors.trainer,/opt/ml/input/data/code,application/x-code,s3://sagemaker-sa-east-1-388295382521/sagemake...,,,,,,,,
210,automl-dm--dpp0-1-bd210fc5b4f64f078ccc08d4b3a1...,automl-dm--dpp0-1-bd210fc5b4f64f078ccc08d4b3a1...,arn:aws:sagemaker:sa-east-1:388295382521:train...,737474898029.dkr.ecr.sa-east-1.amazonaws.com/s...,1.0,ml.m5.4xlarge,50.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,text/csv,s3://sagemaker-sa-east-1-388295382521/sagemake...,,,,s3://sagemaker-sa-east-1-388295382521/sagemake...,[tuning-job-1-bff8cbf333be4ba5b0-168-191dd2d5-...,"[automl-dm-30-02-15-18-aws-auto-ml-job, automl...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,candidate_data_processors.dpp0,candidate_data_processors.trainer,/opt/ml/input/data/code,application/x-code,s3://sagemaker-sa-east-1-388295382521/sagemake...,,,,,,,,


In [9]:
candidates = sm.list_candidates_for_auto_ml_job(AutoMLJobName=auto_ml_job_name, 
                                                SortBy='FinalObjectiveMetricValue')['Candidates']
index = 1
for candidate in candidates:
  print (str(index) + "  " 
         + candidate['CandidateName'] + "  " 
         + str(candidate['FinalAutoMLJobObjectiveMetric']['Value']))
  index += 1

1  tuning-job-1-bff8cbf333be4ba5b0-169-79cce1e7  0.9659600257873535
2  tuning-job-1-bff8cbf333be4ba5b0-178-fbda304a  0.9659600257873535
3  tuning-job-1-bff8cbf333be4ba5b0-188-83ca6494  0.9650899767875671
4  tuning-job-1-bff8cbf333be4ba5b0-194-c51b4698  0.9650899767875671
5  tuning-job-1-bff8cbf333be4ba5b0-162-2c0df7e1  0.9650899767875671
6  tuning-job-1-bff8cbf333be4ba5b0-177-796c2ba3  0.9650899767875671
7  tuning-job-1-bff8cbf333be4ba5b0-153-7f4bde04  0.9642199873924255
8  tuning-job-1-bff8cbf333be4ba5b0-131-a200691c  0.9642199873924255
9  tuning-job-1-bff8cbf333be4ba5b0-173-36138a14  0.9642199873924255
10  tuning-job-1-bff8cbf333be4ba5b0-147-11e07f8a  0.9642199873924255


In [10]:
best_candidate = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)['BestCandidate']
best_candidate_name = best_candidate['CandidateName']

print("Candidate name: " + best_candidate_name)

Candidate name: tuning-job-1-bff8cbf333be4ba5b0-178-fbda304a


In [11]:
for container in best_candidate['InferenceContainers']:
    print(container['Image'])
    print(container['ModelDataUrl'])
    print('-')

737474898029.dkr.ecr.sa-east-1.amazonaws.com/sagemaker-sklearn-automl:0.2-1-cpu-py3
s3://sagemaker-sa-east-1-388295382521/sagemaker/automl-dm/output/automl-dm-30-02-15-18/data-processor-models/automl-dm--dpp3-1-93f768369dfe49acbf002e22b3cc1c27a49e7d345c664/output/model.tar.gz
-
737474898029.dkr.ecr.sa-east-1.amazonaws.com/sagemaker-xgboost:1.0-1-cpu-py3
s3://sagemaker-sa-east-1-388295382521/sagemaker/automl-dm/output/automl-dm-30-02-15-18/tuning/automl-dm--dpp3-xgb/tuning-job-1-bff8cbf333be4ba5b0-178-fbda304a/output/model.tar.gz
-
737474898029.dkr.ecr.sa-east-1.amazonaws.com/sagemaker-sklearn-automl:0.2-1-cpu-py3
s3://sagemaker-sa-east-1-388295382521/sagemaker/automl-dm/output/automl-dm-30-02-15-18/data-processor-models/automl-dm--dpp3-1-93f768369dfe49acbf002e22b3cc1c27a49e7d345c664/output/model.tar.gz
-


In [14]:
# model_name = 'automl-dm-model-' + timestamp_suffix

# model_arn = sm.create_model(Containers=best_candidate['InferenceContainers'],
#                             ModelName=model_name,
#                             ExecutionRoleArn=role)

# print('Model ARN: ', model_arn['ModelArn'])

############## use this to make iferences
from sagemaker import AutoML
model_name = 'newnamemodelfinish'
aml = AutoML.attach(auto_ml_job_name='automl-dm-30-02-15-18')
aml_best_model = aml.create_model(name=model_name,
                                  candidate=best_candidate,
                                  inference_response_keys=["predicted_label", "probability", "labels", "probabilities"])

aml_transformer = aml_best_model.transformer(accept='text/csv',
                                            assemble_with='Line',
                                            instance_type='ml.m5.xlarge',
                                            instance_count=1,)




In [15]:
# Don't forget to update the bucket! It must be in the same region as SageMaker
s3_capture_path = 's3://jsimon-capture-saeast1/' + model_name + '/'

print(s3_capture_path)

s3://jsimon-capture-saeast1/newnamemodelfinish/


In [16]:
data_capture_configuration = {
    "EnableCapture": True, # flag turns data capture on and off
    "DestinationS3Uri": s3_capture_path, # s3 location where captured data is saved
    "InitialSamplingPercentage": 100, # sampling rate to capture data. max is 100%
    "CaptureOptions": [
       {
            "CaptureMode": "Output" # The type of capture this option enables. Values can be: [Output/Input]
        },
        {
            "CaptureMode": "Input" # The type of capture this option enables. Values can be: [Output/Input]
        }
    ],
    "CaptureContentTypeHeader": {
       "CsvContentTypes": ["text/csv"], # headers which should signal to decode the payload into CSV format 
       "JsonContentTypes": ["application/json"] # headers which should signal to decode the payload into JSON format 
    }
}

In [None]:
# ############## use this to make iferences
# from sagemaker import AutoML

# aml = AutoML.attach(auto_ml_job_name='automl-dm-30-02-15-18')
# aml_best_model = aml.create_model(name='automl-dm-model-' + timestamp_suffix,
#                                   candidate=best_candidate,
#                                   inference_response_keys=["predicted_label",'probabilities', 'probability' 'labels'])

# aml_transformer = aml_best_model.transformer(accept='text/csv',
#                                             assemble_with='Line',
#                                             instance_type='ml.m5.xlarge',
#                                             instance_count=1,)

In [17]:
# Endpoint configuration name
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())
epc_name = 'automl-dm-epc-' + timestamp_suffix
print('Endpoint configuration name:', epc_name)

ep_config = sm.create_endpoint_config(EndpointConfigName = epc_name,
                                      ProductionVariants=[{'InstanceType':'ml.m4.xlarge',
                                                           'InitialInstanceCount':1,
                                                           'ModelName':model_name,
                                                           'VariantName': 'AllTraffic'}],
                                      DataCaptureConfig = data_capture_configuration)

Endpoint configuration name: automl-dm-epc-30-03-36-26


In [18]:
# Endpoint name
ep_name = 'automl-dm-ep-' + timestamp_suffix
variant_name = 'automl-dm-variant-' + timestamp_suffix
print('Endpoint name:', ep_name)
# variant_name = "automl-dm-variant-29-22-04-24"
# ep_name = "automl-dm-ep-29-22-04-24"

create_endpoint_response = sm.create_endpoint(EndpointName=ep_name,
                                              EndpointConfigName=epc_name)

Endpoint name: automl-dm-ep-30-03-36-26


In [19]:
%%time
sm.get_waiter('endpoint_in_service').wait(EndpointName=ep_name)

resp = sm.describe_endpoint(EndpointName=ep_name)
status = resp['EndpointStatus']

print("Endpoint ARN   : " + resp['EndpointArn'])
print("Endpoint status: " + status)

Endpoint ARN   : arn:aws:sagemaker:sa-east-1:388295382521:endpoint/automl-dm-ep-30-03-36-26
Endpoint status: InService
CPU times: user 151 ms, sys: 31.9 ms, total: 183 ms
Wall time: 8min 36s


In [20]:
tp = tn = fp = fn = count = 0

with open('automl-test.csv') as f:
    lines = f.readlines()
    for l in lines[1:]:   # Skip header
        l = l.split(',')  # Split CSV line into features
        label = l[-1]     # Store 'yes'/'no' label
        l = l[:-1]        # Remove label
        l = ','.join(l)   # Rebuild CSV line without label
                
        response = sm_rt.invoke_endpoint(EndpointName=ep_name, ContentType='text/csv', Accept='text/csv', Body=l)

        response = response['Body'].read().decode("utf-8")
        #print ("label %s response %s" %(label,response))

        if 'TRUE' in label:
            # Sample is positive
            if 'TRUE' in response:
                # True positive
                tp=tp+1
            else:
                # False negative
                fn=fn+1
        else:
            # Sample is negative
            if 'FAKE' in response:
                # True negative
                tn=tn+1
            else:
                # False positive
                fp=fp+1
        count = count+1
        if (count % 100 == 0):   
            sys.stdout.write(str(count)+' ')
            
print ("Done")

100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 Done


In [21]:
print ("%d %d" % (tn, fp))
print ("%d %d" % (fn, tp))

accuracy  = (tp+tn)/(tp+tn+fp+fn)
precision = tp/(tp+fp)
recall    = tn/(tp+fn)
f1        = (2*precision*recall)/(precision+recall)

print ("Accuracy: %.4f, Precision: %.4f, Recall: %.4f, F1: %.4f" % (accuracy, precision, recall, f1))

728 0
0 709
Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0268, F1: 1.0132


In [None]:
%%sh -s "$s3_capture_path"

aws s3 ls --recursive $1

In [None]:
############## use this to make iferences
from sagemaker import AutoML

aml = AutoML.attach(auto_ml_job_name='automl-dm-30-02-15-18')
aml_best_model = aml.create_model(name='automl-dm-model-' + timestamp_suffix,
                                  candidate=None,
                                  inference_response_keys=["predicted_label",'probabilities', 'probability' 'labels'])

aml_transformer = aml_best_model.transformer(accept='text/csv',
                                            assemble_with='Line',
                                            instance_type='ml.m5.xlarge',
                                            instance_count=1,)