In [23]:
import pandas as pd

# upload data and drop the columns '__dt'
df = pd.read_csv('dht20testdata.csv')
df = df.drop(columns=['__dt'])

# define a path that you want to save your processed data
local_clean_path = 'dht20testdata_processed.csv'
local_train_data = df.values
df.to_csv(local_clean_path, header=False, index=False)
print(df)

        humidity  temperature
0      52.930164    24.315834
1      52.861595    24.345970
2      52.776527    24.326706
3      52.679348    24.330330
4      52.596760    24.330330
...          ...          ...
12032  52.900124    24.073029
12033  52.892303    24.077606
12034  52.867317    24.081230
12035  52.865601    24.051666
12036  52.905273    24.061203

[12037 rows x 2 columns]


In [3]:
import sagemaker
import boto3

# Initialize the boto3 and sagemaker session
boto3_session = boto3.Session()
sagemaker_session = sagemaker.Session(boto_session=boto3_session)

# Get the execution role for the notebook instance.
# This is the role that SageMaker would use to leverage AWS resources (S3, CloudWatch) on your behalf
role = sagemaker.get_execution_role()  # Make sure this is available in your environment

# Create S3 resource. Be careful: No capital letters in bucket_name.
s3 = boto3.resource('s3')
bucket_name = 'dht20rcfdemo'
try:
    s3.create_bucket(Bucket=bucket_name)
    print('success')
except Exception as e:
    print('error:',e)

success


In [10]:
# Define your S3 bucket and prefix
bucket = 'dht20rcfdemo'
prefix = 'RCFDemo'

# Upload the file to S3
train_data = sagemaker_session.upload_data(path='dht20testdata_processed.csv', bucket=bucket, key_prefix=prefix)
print('data_path:' + train_data)

data_path:s3://dht20rcfdemo/RCFDemo/dht20testdata_processed.csv


In [15]:
# Set up the Random Cut Forest estimator
rcf = sagemaker.RandomCutForest(role=role,
                      instance_count=1,
                      instance_type='ml.m4.xlarge',
                      data_location=train_data,
                      output_path='s3://{}/{}/output'.format(bucket, prefix),
                      num_samples_per_tree=512,
                      num_trees=50)

In [17]:
# Train model
rcf.fit(rcf.record_set(local_train_data))

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: randomcutforest-2024-03-18-10-42-59-597


2024-03-18 10:42:59 Starting - Starting the training job...
2024-03-18 10:43:15 Starting - Preparing the instances for training...
2024-03-18 10:43:51 Downloading - Downloading input data...
2024-03-18 10:44:16 Downloading - Downloading the training image............
2024-03-18 10:46:16 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[34m[03/18/2024 10:46:36 INFO 140662585653056] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-conf.json: {'num_samples_per_tree': 256, 'num_trees': 100, 'force_dense': 'true', 'eval_metrics': ['accuracy', 'precision_recall_fscore'], 'epochs': 1, 'mini_batch_size': 1000, '_log_level': 'info', '_kvstore': 'dist_async', '_num_kv_servers': 'auto', '_num

In [18]:
# Deploy the model to an endpoint
predictor = rcf.deploy(
    initial_instance_count=1,
    instance_type='ml.m4.xlarge'
)

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating model with name: randomcutforest-2024-03-18-10-47-37-165
INFO:sagemaker:Creating endpoint-config with name randomcutforest-2024-03-18-10-47-37-165
INFO:sagemaker:Creating endpoint with name randomcutforest-2024-03-18-10-47-37-165


-------!

In [19]:
# Get the endpoint name that we will use it later
predictor.endpoint_name

'randomcutforest-2024-03-18-10-47-37-165'

In [21]:
#test the trained model effect
import numpy as np
arr1 = np.array([[52.93016434, 34.31583405]])
result1 = predictor.predict(arr1)
print(result1)

[label {
  key: "score"
  value {
    float32_tensor {
      values: 6.37300253
    }
  }
}
]


In [22]:
#lambda function
import boto3
import json

ENDPOINT_NAME = 'randomcutforest-2024-03-18-10-47-37-165'
runtime = boto3.client('runtime.sagemaker')

def lambda_handler(event, context):
    input = event['data']
    
    serialized_input = ','.join(map(str, input))

    response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME, 
                                       ContentType='text/csv', 
                                       Body=serialized_input)

    result_str = response['Body'].read().decode()
    result_json = json.loads(result_str)
    inference = result_json['scores'][0]['score']
    
    result = "";
    if(inference>5):
        result = "abnormal";
    else:
        result = "normal";

    return result


Input_json = { 'data':[52.93016434, 24.31583405]}

result = lambda_handler(Input_json, _)
result


'normal'

In [None]:
# test API gateway

import requests

# put your API link here
API_ENDPOINT = "https://rtch7hy6yk.execute-api.us-east-1.amazonaws.com/DHT20API"

# data sent to api
json = { 'data':[62.93016434, 24.31583405]}

# sending post request and saving response as response object
r = requests.post(url=API_ENDPOINT, json=json)

print(r.json())