# IMAGE PREPROCESSING TECHNIQUES

In [1]:
#%%sh
# pip install -q pip --upgrade
# pip install -q --upgrade opencv-python
#pip install --user --upgrade tensorflow
#pip install -q --upgrade pillow

In [2]:
import cv2
import boto3
import os
import PIL
from io import BytesIO
from PIL import Image
import numpy as np
import time

In [3]:
print(f"OpenCV Version: {cv2.__version__}") 
print(f"Pillow Version: {PIL.__version__}")

OpenCV Version: 4.1.2
Pillow Version: 7.0.0


## Pillow Method

In [11]:
S3 = boto3.client('s3')

filename = 'public/Sofy Soul.png'
image_object = S3.get_object(Bucket='pornilarity-bucket170933-production', Key=filename)
file_content = image_object["Body"].read()

dataBytesIO = BytesIO(file_content)
pillow_array = np.frombuffer(dataBytesIO.read(), np.uint8)
pillow_image = Image.open(dataBytesIO)

In [13]:
pillow_gray = pillow_image.convert('L')

In [15]:
pillow_image = pillow_gray.resize((100,100))

In [17]:
pillow_image = np.array(pillow_image).reshape(-1, 100, 100, 1) # reshapes the image into accepted dimensions (4D tensor)
pillow_image = pillow_image.astype('float32') # change data type of image to float32
pillow_image = pillow_image / 255.0

## OpenCV Method

In [12]:
S3 = boto3.client('s3')

filename = 'public/Sofy Soul.png'
image_object = S3.get_object(Bucket='pornilarity-bucket170933-production', Key=filename)
file_content = image_object["Body"].read()

np_array = np.frombuffer(file_content, np.uint8)
cv2_image_tensor = cv2.imdecode(np_array, cv2.IMREAD_COLOR)
cv2_gray_image = cv2.cvtColor(image_tensor, cv2.COLOR_BGR2GRAY)

cv2_image = cv2.resize(cv2_gray_image, (100, 100))
cv2_image = np.array(cv2_image).reshape(-1, 100, 100, 1) # reshapes the image into accepted dimensions (4D tensor)
cv2_image = cv2_image.astype('float32') # change data type of image to float32
cv2_image = cv2_image / 255.0

## Sample CloudWatch Logger

### Get Current Timestamp

In [44]:
import time
def get_current_timestamp():
    timestamp = int(time.time()*1000)
    return timestamp

### Create Log Stream within Notebook Log Group

In [27]:
LOG_GROUP_NAME='/aws/sagemaker/NotebookInstances'
LOG_STREAM_NAME = f"[{time.strftime('%Y/%M/%d - %H/%M/%S')}] <inference_logs>"

In [28]:
client = boto3.client('logs')
response = client.create_log_stream(
    logGroupName=LOG_GROUP_NAME,
    logStreamName=LOG_STREAM_NAME
)

### Create Initial Log event within Log Stream

In [None]:
response = client.put_log_events(
    logGroupName=LOG_GROUP_NAME,
    logStreamName=LOG_STREAM_NAME,
    logEvents=[
        {
            'timestamp': get_current_timestamp(),
            'message': "Hello World!"
        },
    ]
)

### Get next sequence token

In [45]:
response = client.describe_log_streams(
    logGroupName=LOG_GROUP_NAME,
    logStreamNamePrefix=LOG_STREAM_NAME,
)

token = response['logStreams'][0]['uploadSequenceToken']

### Use sequence token obtained from response to continue adding more events

In [46]:
# token = response['nextSequenceToken']

response = client.put_log_events(
    logGroupName=LOG_GROUP_NAME,
    logStreamName=LOG_STREAM_NAME,
    logEvents=[
        {
            'timestamp': get_current_timestamp(),
            'message': f"Hello World with token {token}"
        },
    ],
    sequenceToken=token
)

In [26]:
image_tensor = np.array(pillow_image).tolist()
string_tensor = str(image_tensor)

In [60]:
import json, re

json_string = json.dumps({
    "predictions": [[1.79605185e-29, 2.44883862e-24, 6.03925444e-19, 4.58280264e-14, 1.80641227e-23, 3.06986203e-28, 4.79789881e-19, 2.18121113e-23, 4.88950596e-19, 2.00703e-21, 1.50391759e-15, 5.98734793e-14, 1.2061119e-13, 1.29583038e-22, 1.06595022e-28, 3.08147194e-24, 2.09108358e-15, 6.08894928e-17, 6.57895731e-16, 7.18972519e-29, 3.94493905e-21, 1.21453632e-23, 1.43495471e-23, 7.64568609e-30, 1.19619726e-19, 9.83542212e-25, 6.04389368e-13, 1.39188361e-15, 1.88600819e-11, 2.98319067e-25, 4.84899727e-18, 1.19852154e-20, 2.40176073e-29, 5.57373374e-19, 2.51498126e-20, 4.82108548e-13, 2.83524616e-24, 5.50734227e-21, 2.89885866e-20, 2.1909551e-12, 2.81259559e-29, 1.79046981e-29, 1.73175981e-13, 1.22634062e-19, 1.91456973e-25, 1.12097408e-20, 7.93001e-18, 3.85991789e-17, 1.34281756e-21, 1.07700352e-26, 1.72901518e-19, 9.52595739e-23, 9.16911421e-20, 9.53447607e-13, 7.30675298e-32, 4.44859931e-20, 7.89192508e-18, 1.08040326e-17, 7.20212336e-24, 1.37280684e-28, 5.42158924e-20, 8.74256775e-25, 1.88932007e-19, 6.11269257e-23, 2.74921e-24, 1.01490824e-13, 4.06968338e-21, 7.05227698e-29, 1.61418955e-21, 3.0218847e-15, 3.9907614e-18, 2.16097036e-15, 3.06890276e-20, 9.9172927e-17, 4.30674946e-20, 1.0, 3.63232132e-21, 5.5378725e-27, 1.52852333e-28, 1.62777314e-23, 5.28152614e-15, 9.00544085e-25, 3.73298452e-21, 5.81935091e-22, 3.08763887e-12, 3.09453667e-16, 1.22420528e-18, 2.44553e-27, 5.39139831e-19, 9.4311711e-19, 8.24885445e-20, 1.50251956e-21, 1.21229817e-27, 1.18388703e-17, 1.12860903e-17]
    ]
})

In [61]:
def convert_bytes_to_array(response_body):
    regex = re.compile("(?![e])[a-z\"\}\{:\s\[\]]*") # regex for removing other useless characters
    predictions = regex.sub("", response_body).split(',') # performs regex on reponse body
    predictions = np.array(predictions) # converts the result into a numpy array
    predictions = predictions.astype(np.float64)
    return predictions

In [66]:
array = convert_bytes_to_array(json_string)
array = array.tolist()