In [None]:
SETUP THE VARIABLES


In [None]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()
role = get_execution_role()

In [None]:
import subprocess
from sagemaker.tensorflow import TensorFlow

instance_type = "local"

if subprocess.call("nvidial-smi") == 0 :
    local_hyperparameters = {"epochs": 1 , "batch-size" : 64}

    estimator = TensorFlow (
        entry_point = "cifar10_keras_main.py" ,
        source_dir = "source_dir" ,
        role = role ,
        framework_version = "1.15.2"
        py_version = "py3" ,
        hyperparameters = local_hyperparameters ,
        train_instance_count =1 ,
        train_instance_type = instance_type ,
    )

In [None]:
import os 

data_path = os.path.join(os.getcwd(),"data")

local_inputs = {
    "train":"file://{}/train".format(data_path),
    "validation":"file://{}/validation".format(data_path),
    "eval":"file://{}/eval".format(data_path),
}

estimator.fit(local_inputs)

In [None]:
metric_definitions = [
        {"Name": "train:loss", "Regex": ".*loss: ([0-9\\.]+) - accuracy: [0-9\\.]+.*"},
    {"Name": "train:accuracy", "Regex": ".*loss: [0-9\\.]+ - accuracy: ([0-9\\.]+).*"},
    {
        "Name": "validation:accuracy",
        "Regex": ".*step - loss: [0-9\\.]+ - accuracy: [0-9\\.]+ - val_loss: [0-9\\.]+ - val_accuracy: ([0-9\\.]+).*",
    },
    {
        "Name": "validation:loss",
        "Regex": ".*step - loss: [0-9\\.]+ - accuracy: [0-9\\.]+ - val_loss: ([0-9\\.]+) - val_accuracy: [0-9\\.]+.*",
    },
    {
        "Name": "sec/steps",
        "Regex": ".* - \d+s (\d+)[mu]s/step - loss: [0-9\\.]+ - accuracy: [0-9\\.]+ - val_loss: [0-9\\.]+ - val_accuracy: [0-9\\.]+",
    },
]

In [None]:
from sagemaker.tensorflow import Tensorflow

hyperparameters = {"epochs":10 , "batch_size":256}
tags = [{"key": "Project" , "value" : "cifar10"}, {"key" ,"TensorBoard" , "Value" : "file"}]

estimator = Tensorflow (
    entry_point = "cifar10_keras_main.py" ,
    source_dir = "source_dir" ,
    metric_definitions = metric_definitions ,
    hyperparameters = hyperparameters ,
    role = role ,
    framework_version = framework_version ,
    py_version = "py3" ,
    train_instance_count = 1 ,
    train_instance_type ="ml.p2.xlarge"
    base_job_name = "cifar10-tf" ,
    tags = tags 
    
)

In [None]:
inputs = {
    "train": "{}/train".format(dataset_uri) ,
    "validation": "{}/validation".format(dataset_uri),
     "eval" : "{}/eval".format(dataset_uri),
}

estimator.fit(inputs)

In [None]:
from urllib import parse

from IPython.core.display import Markdown

region = sagemaker_session.boto_region_name
cw_url = parse.urlunparse(
    (
        "https",
        "{}.console.aws.amazon.com".format(region),
        "/cloudwatch/home",
        "",
        "region={}".format(region),
        "metricsV2:namespace=/aws/sagemaker/TrainingJobs;dimensions=TrainingJobName;search={}".format(
            estimator.latest_training_job.name
        ),
    )
)

display(
    Markdown(
        "CloudWatch metrics: [link]({}). After you choose a metric, "
        "change the period to 1 Minute (Graphed Metrics -> Period).".format(cw_url)
    )
)

In [None]:
pipe_mode_estimator = Tensorflow (
    entry_point = "cifar10_keras_main.py" ,
    source_dir = "source_dir" , 
    metric_definitions = metric_definitions ,
    hyperparameters = hyperparameters ,
    role = role ,
    framework_version = "1.15.2" ,
    py_version = "py3" ,
    train_instance_count = "m1.p2.xlarge" , 
    input_mode = "Pipe" ,
    base_job_name = "cifar10-tf-pipe" ,
    tags = tags ,

)

In [None]:
pipe_mode_estimator.fit(inputs ,wait = False)

In [None]:
distribution = {
    "mpi": {
        "enabled" : True ,
        "processes_per_host": 1,
    }
}

In [None]:
dist_estimator = TensorFlow (
      entry_point="cifar10_keras_main.py",
    source_dir="source_dir",
    metric_definitions=metric_definitions,
    hyperparameters=hyperparameters,
    distributions=distribution,
    role=role,
    framework_version="1.15.2",
    py_version="py3",
    train_instance_count=2,
    train_instance_type="ml.p3.2xlarge",
    base_job_name="cifar10-tf-dist",
    tags=tags,
)


In [None]:
dist_estimator.fit(inputs , wait = False)

In [None]:
!python generate_tensorboard_command.py

In [None]:
predictor = estimator.deploy(initial_instance_count=1 , instance_type = "ml.m4.xlarge")

In [None]:
import numpy as np
data = np.random.randn(1,32,32,3)

print("Predicted class : {}".format(np.argmax(predict.predict(data["predictions"]))))

In [None]:
from  keras.datasets import cifar10

(x_train , y_train) , (x_test,y_train) = cifar10.load_data()

In [None]:
from keras.preprocessing.image import ImageDataGenerator

def predict(data):
    predictions = predictor.predict(data)["predictions"]
    return predictions

predicted = []
actual = []
batches = 0 
batch_size = 128

datagen = ImageDataGenerator()
for data in datagen.flow(x_test,y_test, batch_size=batch_size)
for i , prediction in enumarate(predict(data[0])):
    predicted.append(np.argmax(prediction))
    actual.append(data[1][i][0])

    batches += 1
    if batches >= len(x_test) /batch_size:
        break

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_pred=predicted, y_true=actual)
cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
sn.set(rc={"figure.figsize": (11.7, 8.27)})
sn.set(font_scale=1.4)  # for label size
sn.heatmap(cm, annot=True, annot_kws={"size": 10})  # font size

In [None]:
predictor.delete_endpoint()