# Use conda-amazonei-tensorflow-p36

In [1]:
import boto3
import numpy as np
import os
import pathlib
import random
import tensorflow as tf
import time
import sagemaker

from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.tensorflow import TensorFlow
from sagemaker.tuner import ContinuousParameter, HyperparameterTuner
from sagemaker.tensorflow.serving import Model

from sklearn.model_selection import train_test_split
from keras.preprocessing.image import img_to_array, load_img




Using TensorFlow backend.


# Convert images to tfRecord

In [2]:
data_root = pathlib.Path(os.path.join(os.getcwd(), 'chest_xray_standard'))
all_image_paths = list(data_root.glob('t*/*/*'))
all_image_paths = [str(path) for path in all_image_paths]
random.shuffle(all_image_paths)
image_count = len(all_image_paths)

all_image_paths[:10]
print(len(all_image_paths))

5840


In [3]:
all_image_labels = [pathlib.Path(path).parent.name
                    for path in all_image_paths]
print(all_image_labels[:10])
all_image_labels = list(map((lambda v: 1 if v == 'PNEUMONIA' else 0), 
                            all_image_labels))
print(all_image_labels[:10])

['NORMAL', 'PNEUMONIA', 'PNEUMONIA', 'PNEUMONIA', 'PNEUMONIA', 'PNEUMONIA', 'PNEUMONIA', 'NORMAL', 'PNEUMONIA', 'NORMAL']
[0, 1, 1, 1, 1, 1, 1, 0, 1, 0]


# numpy array with assoc labels

In [4]:
channels = 3
image_height = 224
image_width = 224

In [5]:
dataset = np.ndarray(shape=(image_count, image_height, image_width, channels),
                     dtype=np.uint8)

i = 0
for file in all_image_paths:
    img = load_img(file)  # this is a PIL image
    img = img.resize((image_width, image_height))
    x = img_to_array(img, 'channels_last') 
    dataset[i] = x
    i += 1
    if i % 250 == 0:
        print("%d images to array" % i)
print("All images to array!")

250 images to array
500 images to array
750 images to array
1000 images to array
1250 images to array
1500 images to array
1750 images to array
2000 images to array
2250 images to array
2500 images to array
2750 images to array
3000 images to array
3250 images to array
3500 images to array
3750 images to array
4000 images to array
4250 images to array
4500 images to array
4750 images to array
5000 images to array
5250 images to array
5500 images to array
5750 images to array
All images to array!


In [6]:
X_train, X_test, y_train, y_test = train_test_split(dataset, 
                                                    all_image_labels, 
                                                    test_size=0.2, 
                                                    random_state=33)

# Convert images

In [None]:
def convert_to_tfrecord(images, labels, num_examples, name, directory):
    def _int64_feature(value):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

    def _bytes_feature(value):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    if images.shape[0] != num_examples:
        raise ValueError('Images size %d does not match label size %d.' % (images.shape[0], num_examples))
    rows = images.shape[1]
    cols = images.shape[2]
    depth = images.shape[3]

    filename = os.path.join(directory, name + '.tfrecords')
    print('Writing', filename)
    writer = tf.python_io.TFRecordWriter(filename)
    for index in range(num_examples):
        image_raw = images[index].tobytes()
        example = tf.train.Example(features=tf.train.Features(feature={
            'height': _int64_feature(rows),
            'width': _int64_feature(cols),
            'depth': _int64_feature(depth),
            'label': _int64_feature(labels[index]),
            'image_raw': _bytes_feature(image_raw)}))
        writer.write(example.SerializeToString())
    writer.close()

In [None]:
convert_to_tfrecord(X_train, y_train, len(y_train), 
                    'chest_xray_images_train', os.getcwd())

In [None]:
convert_to_tfrecord(X_test, y_test, len(y_test), 
                    'chest_xray_images_test', os.getcwd())

# Upload the train and test .tfrecords files to s3

In [7]:
import gc

In [8]:
#del all_image_paths
del dataset

In [9]:
gc.collect()

10

In [10]:
!pwd

/home/ec2-user/SageMaker


In [12]:
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()

job_name_prefix = 'pneumonia-detection'

In [None]:
train_path = sagemaker_session.upload_data(path='/home/ec2-user/SageMaker/chest_xray_images_train.tfrecords', 
                                           key_prefix='{0}/input/tfrecord/train'.format(job_name_prefix))
test_path = sagemaker_session.upload_data(path='/home/ec2-user/SageMaker/chest_xray_images_test.tfrecords', 
                                          key_prefix='{0}/input/tfrecord/test'.format(job_name_prefix))

# Create tensorflow model

## hyperparameters

* Step: A training step means using one batch size of training data to train the model.
* Number of training steps per epoch: total_number_of_training_examples / batch_size.

In [34]:
epoch = 10
num_classes = 2
mini_batch_size =  64
max_steps = int(len(X_train) / mini_batch_size) * epoch
learning_rate = 0.01

In [35]:
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name = job_name_prefix + timestamp

In [36]:
input_prefix = '{0}/input/tfrecord'.format(job_name_prefix)
input_train = 's3://{}/{}/train/'.format(bucket, input_prefix)
input_test = 's3://{}/{}/test/'.format(bucket, input_prefix)
output_prefix = '{0}/output'.format(job_name_prefix)
output_path = 's3://{}/{}/'.format(bucket, output_prefix)

print(input_train)
print(input_test)

s3://sagemaker-us-east-2-755441266669/pneumonia-detection/input/tfrecord/train/
s3://sagemaker-us-east-2-755441266669/pneumonia-detection/input/tfrecord/test/


In [37]:
instance_count = 1
instance_type = 'ml.p2.xlarge'
volume_size_gb = 50

In [38]:
role = get_execution_role()
train_timeout = 360000
training_script_path = 'tensorflowScript.py'

# Create a sagemaker.Tensorflow estimator

In [39]:
estimator = TensorFlow(entry_point=training_script_path,
                       source_dir='source_tf_cnn_1',
                       role=role,
                       train_instance_count=instance_count,
                       train_instance_type=instance_type,
                       train_volume_size=volume_size_gb,
                       train_max_run=train_timeout,
                       model_dir=output_path,
                       output_path=output_path,
                       framework_version='1.12.0',
                       py_version = 'py3',
                       hyperparameters = {
                           'num-classes': num_classes,
                           'mini-batch-size': mini_batch_size,
                           'max-steps': max_steps,
                           'learning-rate': learning_rate
                       },
                       metric_definitions = [
                           {
                               'Name': 'loss',
                               'Regex': 'loss = ([0-9\\.]+)'
                           }
                       ])

In [40]:
estimator.fit({
    'train': input_train,
    'test': input_test
}, job_name = job_name)

2020-04-03 21:31:28 Starting - Starting the training job...
2020-04-03 21:31:29 Starting - Launching requested ML instances...
2020-04-03 21:32:25 Starting - Preparing the instances for training.........
2020-04-03 21:33:41 Downloading - Downloading input data......
2020-04-03 21:34:53 Training - Training image download completed. Training in progress..[34m2020-04-03 21:34:56,452 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-04-03 21:34:56,823 sagemaker-containers INFO     Invoking user script
[0m
[34mTraining Env:
[0m
[34m{
    "additional_framework_parameters": {},
    "channel_input_dirs": {
        "test": "/opt/ml/input/data/test",
        "train": "/opt/ml/input/data/train"
    },
    "current_host": "algo-1",
    "framework_module": "sagemaker_tensorflow_container.training:main",
    "hosts": [
        "algo-1"
    ],
    "hyperparameters": {
        "num-classes": 2,
        "learning-rate": 0.01,
        "max-steps"

# Deploy

In [41]:
role = get_execution_role()
instance_count = 1
instance_type = 'ml.m4.xlarge'

In [45]:
model_name_prefix = 'pnu-image-classification-tensorflow-cnn-1'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
model_name = model_name_prefix + timestamp

In [46]:
model_artifacts_s3_path = 's3://{0}/{1}/output/{2}/output/model.tar.gz'\
.format(bucket, 
        'pneumonia-detection',
        'pneumonia-detection-2020-04-03-17-35-56'
       )
model = Model(
    name=model_name,
    model_data=model_artifacts_s3_path,
    role=role
)

In [47]:
endpoint_name_prefix = 'pneumonia-detection-ep'
timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
endpoint_name = endpoint_name_prefix + timestamp

predictor = model.deploy(
    endpoint_name=endpoint_name,
    initial_instance_count=instance_count,
    instance_type=instance_type
)

-----------!

# Testing deployed model on validation

In [48]:
data_root = pathlib.Path(os.path.join(os.getcwd(), 'chest_xray_standard'))
all_image_paths = list(data_root.glob('val/*/*'))
all_image_paths = [str(path) for path in all_image_paths]
random.shuffle(all_image_paths)
image_count = len(all_image_paths)

all_image_paths[:10]

['/home/ec2-user/SageMaker/chest_xray_standard/val/NORMAL/NORMAL2-IM-1437-0001.jpeg',
 '/home/ec2-user/SageMaker/chest_xray_standard/val/PNEUMONIA/person1951_bacteria_4882.jpeg',
 '/home/ec2-user/SageMaker/chest_xray_standard/val/NORMAL/NORMAL2-IM-1431-0001.jpeg',
 '/home/ec2-user/SageMaker/chest_xray_standard/val/NORMAL/NORMAL2-IM-1440-0001.jpeg',
 '/home/ec2-user/SageMaker/chest_xray_standard/val/PNEUMONIA/person1946_bacteria_4874.jpeg',
 '/home/ec2-user/SageMaker/chest_xray_standard/val/PNEUMONIA/person1949_bacteria_4880.jpeg',
 '/home/ec2-user/SageMaker/chest_xray_standard/val/PNEUMONIA/person1947_bacteria_4876.jpeg',
 '/home/ec2-user/SageMaker/chest_xray_standard/val/NORMAL/NORMAL2-IM-1442-0001.jpeg',
 '/home/ec2-user/SageMaker/chest_xray_standard/val/NORMAL/NORMAL2-IM-1438-0001.jpeg',
 '/home/ec2-user/SageMaker/chest_xray_standard/val/NORMAL/NORMAL2-IM-1427-0001.jpeg']

In [49]:
all_image_labels = [pathlib.Path(path).parent.name
                    for path in all_image_paths]
print(all_image_labels[:10])
all_image_labels = list(map((lambda v: 1 if v == 'PNEUMONIA' else 0), 
                            all_image_labels))
print(all_image_labels[:10])

['NORMAL', 'PNEUMONIA', 'NORMAL', 'NORMAL', 'PNEUMONIA', 'PNEUMONIA', 'PNEUMONIA', 'NORMAL', 'NORMAL', 'NORMAL']
[0, 1, 0, 0, 1, 1, 1, 0, 0, 0]


In [50]:
channels = 3
image_height = 224
image_width = 224

In [51]:
dataset = np.ndarray(shape=(image_count, image_height, image_width, channels),
                     dtype=np.uint8)

i = 0
for file in all_image_paths:
    img = load_img(file)  # this is a PIL image
    
    img = img.resize((image_width, image_height))
    x = img_to_array(img, 'channels_last')
    print("x: ", x.shape)
    dataset[i] = x
    i += 1
    if i % 250 == 0:
        print("%d images to array" % i)
print("All images to array!")

x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
x:  (224, 224, 3)
All images to array!


In [57]:
def predict_pneumonia(image):
    response = predictor.predict(image)
    print('Received response is: ', response)
    print('Probabilities for all classes: ',
          response['predictions'][0]['probabilities'])
    predicted_class = response['predictions'][0]['classes']
    if predicted_class == 0:
        print('Pneumonia not detected')
        return 0
    else:
        print('Pneumonia detected')
        return 1

In [58]:
image_with_no_pnu = dataset[np.logical_not(all_image_labels)][3]

In [59]:
image_with_pnu = dataset[all_image_labels][3]

In [60]:
predict_pneumonia(image_with_no_pnu)

Received response is:  {'predictions': [{'probabilities': [1.0, 0.0], 'classes': 0}]}
Probabilities for all classes:  [1.0, 0.0]
Pneumonia not detected


0

In [61]:
predict_pneumonia(image_with_pnu)

Received response is:  {'predictions': [{'probabilities': [2.49911e-38, 1.0], 'classes': 1}]}
Probabilities for all classes:  [2.49911e-38, 1.0]
Pneumonia detected


1

## Get prediction labels

In [62]:
import pandas as pd
import numpy as np

In [63]:
def evaluation_metrics(confusion_matrix):
    tn, fn, fp, tp = confusion_matrix[0][0], confusion_matrix[0][1], confusion_matrix[1][0], confusion_matrix[1][1]
    accuracy = sum([tn, tp])/sum([tn, fn, fp, tp])
    precision = sum([tp])/ sum([tp, fp])
    recall = sum([tp])/ sum([tp, fn])
    f1_score = 2 * ((precision * recall)/(precision + recall))
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1_score}")
    return accuracy, precision, recall, f1_score

In [64]:
predictions = list([ predict_pneumonia(data) for data in dataset ])

Received response is:  {'predictions': [{'probabilities': [2.49911e-38, 1.0], 'classes': 1}]}
Probabilities for all classes:  [2.49911e-38, 1.0]
Pneumonia detected
Received response is:  {'predictions': [{'classes': 1, 'probabilities': [0.0, 1.0]}]}
Probabilities for all classes:  [0.0, 1.0]
Pneumonia detected
Received response is:  {'predictions': [{'probabilities': [1.0, 0.0], 'classes': 0}]}
Probabilities for all classes:  [1.0, 0.0]
Pneumonia not detected
Received response is:  {'predictions': [{'probabilities': [1.0, 0.0], 'classes': 0}]}
Probabilities for all classes:  [1.0, 0.0]
Pneumonia not detected
Received response is:  {'predictions': [{'probabilities': [0.0, 1.0], 'classes': 1}]}
Probabilities for all classes:  [0.0, 1.0]
Pneumonia detected
Received response is:  {'predictions': [{'classes': 0, 'probabilities': [1.0, 0.0]}]}
Probabilities for all classes:  [1.0, 0.0]
Pneumonia not detected
Received response is:  {'predictions': [{'probabilities': [0.0, 1.0], 'classes': 1}]

In [65]:
confusion_matrix = pd.crosstab(index=np.array(all_image_labels), columns=np.array(predictions),
                               rownames=['actual'], colnames=['prediction'])
confusion_matrix

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,6,2
1,1,7


In [66]:
evaluation_metrics(confusion_matrix)

Accuracy: 0.8125
Precision: 0.7777777777777778
Recall: 0.875
F1 Score: 0.823529411764706


(0.8125, 0.7777777777777778, 0.875, 0.823529411764706)

# Delete endpoint

In [67]:
sagemaker.Session().delete_endpoint(predictor.endpoint)