# IMPORTS

In [1]:
#!pip install --upgrade pip
#!pip install --upgrade setuptools
#!pip install --user --upgrade opencv-python
#!pip install --user --upgrade tqdm
#!pip install --user --upgrade tensorflow

In [2]:
import tensorflow as tf
import datetime
import time
import cv2
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
import PIL
import boto3, re
from collections import Counter
from random import shuffle, randint, seed
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.python.framework import graph_util
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
%matplotlib inline

In [3]:
print(f'OpenCV version: {cv2.__version__}')
print(f'Tensorflow version: {tf.__version__}')
print(f'Pillow version: {PIL.__version__}')

OpenCV version: 3.4.2
Tensorflow version: 1.14.0
Pillow version: 5.2.0


In [4]:
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Please install GPU version of TF


## Useful Parameters

In [5]:
# DATE = datetime.date(2019, 11, 21).strftime('%d-%b-%Y')
IMG_SIZE = 100
LR = 1e-3
PATH = 'uploads/'
MODEL_NAME = 'pornilarity_model'
MODEL_PATH = f'export/Servo/{MODEL_NAME}/'
IMG_SIZE

100

## Read in Labels Text File

In [6]:
with open('labels.txt', 'r') as file:
    LABELS = file.read().splitlines()

# S3 Bucket Configuration

In [7]:
S3 = boto3.resource('s3')
BUCKET = S3.Bucket('pornilarity-bucket170933-production')
BUCKET.get_available_subresources()

['Acl',
 'Cors',
 'Lifecycle',
 'LifecycleConfiguration',
 'Logging',
 'Notification',
 'Object',
 'Policy',
 'RequestPayment',
 'Tagging',
 'Versioning',
 'Website']

## Download all images from S3 Bucket into local directory

In [8]:
"""for key in BUCKET.objects.all():
    name = key.key.split('/')[1]
    # print(key.key)
    BUCKET.download_file(key.key, f'uploads/{name}')"""

"for key in BUCKET.objects.all():\n    name = key.key.split('/')[1]\n    # print(key.key)\n    BUCKET.download_file(key.key, f'uploads/{name}')"

## Process Images

In [9]:
def process_images():
    test_images = []
    for img in tqdm(os.listdir(PATH)):
        img_name = str(img).strip() 
        if img_name == '.ipynb_checkpoints':
            print(f'Image filtered: {img_name}')
            continue
        IMAGE_NAMES.append(img_name)
        full_path = os.path.join(PATH, img)  # full path of the image
        # print(full_path)
        try:
            img = cv2.imread(full_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            img = tf.cast(img, tf.float32)  # change data type of image to float32
            test_images.append(img)
        except Exception as e:
            print(full_path)
            print(str(e))
            
    return np.array(test_images)

In [10]:
IMAGE_NAMES = []
TEST_IMAGES = process_images()
TEST_IMAGES = np.array([i for i in TEST_IMAGES]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

 71%|███████   | 32/45 [00:01<00:00, 23.16it/s]

Image filtered: .ipynb_checkpoints


100%|██████████| 45/45 [00:01<00:00, 23.37it/s]


ValueError: cannot reshape array of size 44 into shape (100,100,1)

## Instantiate Model

In [13]:
MODEL = tf.keras.models.load_model(f'{MODEL_PATH}')
MODEL.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 98, 98, 32)        320       
_________________________________________________________________
activation (Activation)      (None, 98, 98, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 96, 96, 32)        9248      
_________________________________________________________________
activation_1 (Activation)    (None, 96, 96, 32)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 48, 48, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 48, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 46, 46, 64)        1

## Predictions

In [14]:
CLASS_INDEX = dict(zip(list(range(len(LABELS))), LABELS))
IMAGE_IDs = iter(range(len(TEST_IMAGES)))

def get_predictions(test_images, image_ids):
    results = {cls: [] for cls in LABELS}
    predicted_labels = []
    INDEX = 0
    print("%-10s %-25s %-20s" %("Image ID", "Image Name", "Prediction"))
    print("***********************************************************")
    for img in test_images:
        img = img / 255.0
        img = img.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
        pred = MODEL.predict(img).flatten()
        index = np.argmax(pred)
        label = LABELS[index]
        results = {LABELS[i]: results.get(LABELS[i]) + [pred[i]] for i in range(len(LABELS))}
        print("%-10d %-25s %-20s" %(next(image_ids), IMAGE_NAMES[INDEX], label))
        predicted_labels.append(label)
        INDEX += 1
        
    return np.array(predicted_labels), results

In [15]:
PREDICTED_LABELS, RESULTS = get_predictions(TEST_IMAGES, IMAGE_IDs)

Image ID   Image Name                Prediction          
***********************************************************
0          Evelina Darling.png       Evelina Darling     
1          Catarina Petrov.png       Catarina Petrov     
2          Samantha Ryan.png         Samantha Ryan       
3          Jessica Bangkok.png       Jessica Bangkok     
4          Marcelin Abadir.png       Marcelin Abadir     
5          Nicole Aniston.png        Nicole Aniston      
6          Zaya Cassidy.png          Audrey Bitoni       
7          Jenna Sativa.png          Amirah Adara        
8          Lisa Ann.png              Marcelin Abadir     
9          Emily Willis.png          Emily Willis        
10         Dana DeArmond.png         Dana DeArmond       
11         Lana Rhoades.png          Lana Rhoades        
12         Prinzzess.png             Samantha Ryan       
13         Riley Reid.png            Xev Bellringer      
14         Sarah Banks.png           Sarah Banks         
15         C

### Get Top 5 results for Test Image

In [None]:
"""
Re-structures the results dictionary so that each class_label points to another dictionary {k, v}
where k = the Image_Id number and v = the confidence value
"""
IMAGE_IDs = list(range(len(TEST_IMAGES)))
def gen_results(results):
    my_dict = {}
    for cls in LABELS:
        probs = iter(results[cls])
        my_dict.update({cls: {}})
        for k in IMAGE_IDs:
            my_dict[cls][int(k)] = next(probs)

    return my_dict

In [None]:
def get_top5(results, ID=1):
    results = gen_results(results)
    probs = np.array([(results[k][ID]) for k in results])
    indices = (-probs).argsort()[:5] # sorts probabilities (largest - smallest) + returns their corresponding array indices
    top_5 = [CLASS_INDEX.get(i) for i in indices]
    return top_5

In [None]:
Image_ID = 6
TOP_5 = get_top5(RESULTS, Image_ID)

### Save array of Top5 results as a textfile 

In [None]:
np.savetxt("Top5.txt", TOP_5, fmt="%s")

## Upload results back to S3 Bucket

In [None]:
FILE = f'top5-test_img-{int(time.time())}.txt'

In [None]:
BUCKET.upload_file('Top5.txt', f'results/{FILE}')