In [1]:
import json
import numpy as np
import time
import subprocess
subprocess.run(["pip", "install", "tensorflow"])
from sklearn.metrics import accuracy_score
import boto3
import sagemaker
from sagemaker.predictor import Predictor

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
import sagemaker
import boto3
import os

sess = sagemaker.Session()
role = sagemaker.get_execution_role()

bucket = sess.default_bucket()
s3_prefix = "cifar10-image-recognition"
data_s3_prefix = f"{s3_prefix}/data"
model_s3_prefix = f"{s3_prefix}/model"
data_s3_uri = f"s3://{bucket}/{data_s3_prefix}"
model_s3_uri = f"s3://{bucket}/{model_s3_prefix}"

print(f"Using bucket: {bucket}")


Using bucket: sagemaker-ca-central-1-127214167538


In [3]:

train_dir = os.path.join(os.getcwd(), "cifar-10-batches-py")


s3_resource_bucket = boto3.Session().resource("s3").Bucket(bucket)
for file_name in os.listdir(train_dir):
    s3_resource_bucket.Object(os.path.join(data_s3_prefix, file_name)).upload_file(
        os.path.join(train_dir, file_name)
    )
print(f"Training data uploaded to: {data_s3_uri}")


Training data uploaded to: s3://sagemaker-ca-central-1-127214167538/cifar10-image-recognition/data


In [4]:
from sagemaker.tensorflow import TensorFlow

estimator = TensorFlow(
    entry_point="cnnScript.py",
    source_dir="script",  # Directory containing the cnnScript.py
    role=role,
    instance_type="ml.m5.large",
    instance_count=1,
    framework_version="2.4",
    py_version="py37",
    hyperparameters={
        "epochs": 3
    },
)
# Launch training job
estimator.fit({"train": data_s3_uri})


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: tensorflow-training-2024-11-16-07-34-26-858


2024-11-16 07:34:29 Starting - Starting the training job...
2024-11-16 07:34:42 Starting - Preparing the instances for training...
2024-11-16 07:35:04 Downloading - Downloading input data...
2024-11-16 07:35:49 Downloading - Downloading the training image...
2024-11-16 07:36:17 Training - Training image download completed. Training in progress.[34m2024-11-16 07:36:21.467783: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2024-11-16 07:36:21.468035: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.[0m
[34m2024-11-16 07:36:21.498437: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2024-11-16 07:36:23,015 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2024-11-16 07:36:23,0

In [5]:
import time
print("Deploying the trained model...")
start_time = time.time()

predictor = estimator.deploy(
    instance_type="ml.m5.large",
    initial_instance_count=1
)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time taken to deploy the model: {elapsed_time:.2f} seconds")

INFO:sagemaker.tensorflow.model:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating model with name: tensorflow-training-2024-11-16-07-39-13-089


Deploying the trained model...


INFO:sagemaker:Creating endpoint-config with name tensorflow-training-2024-11-16-07-39-13-089
INFO:sagemaker:Creating endpoint with name tensorflow-training-2024-11-16-07-39-13-089


----!Time taken to deploy the model: 151.68 seconds


In [6]:
# Run predictions
import pickle
import numpy as np
import time


test_data_path = "cifar-10-batches-py/test_batch"
with open(test_data_path, "rb") as f:
    test_data = pickle.load(f, encoding="bytes")


X_test = test_data[b"data"].reshape(-1, 32, 32, 3) / 255.0
y_test = np.array(test_data[b"labels"])


num_samples = 300
indices = np.random.choice(X_test.shape[0], num_samples, replace=False)
X_test_sample = X_test[indices]
y_test_sample = y_test[indices]


start_time = time.time()


predictions = predictor.predict(X_test_sample)


end_time = time.time()


elapsed_time = end_time - start_time
print(f"Time taken for predictions: {elapsed_time:.2f} seconds")


print(f"Type of predictions: {type(predictions)}")


if isinstance(predictions, dict) and 'predictions' in predictions:
    predictions = np.array(predictions['predictions'])


print(f"Predictions shape: {predictions.shape}")


if len(predictions.shape) == 1:  # If 1D array (class indices)
    accuracy = (predictions == y_test_sample).mean()
elif len(predictions.shape) == 2:  # If 2D array (class probabilities)
    predicted_classes = np.argmax(predictions, axis=1)
    accuracy = (predicted_classes == y_test_sample).mean()
else:
    raise ValueError(f"Unexpected predictions shape: {predictions.shape}")

print(f"Test Accuracy from Endpoint (300 samples): {accuracy:.4f}")


Time taken for predictions: 2.31 seconds
Type of predictions: <class 'dict'>
Predictions shape: (300, 10)
Test Accuracy from Endpoint (300 samples): 0.6267


In [7]:
# Delete the endpoint
predictor.delete_endpoint()


INFO:sagemaker:Deleting endpoint configuration with name: tensorflow-training-2024-11-16-07-39-13-089
INFO:sagemaker:Deleting endpoint with name: tensorflow-training-2024-11-16-07-39-13-089
