# IMPORTS

In [5]:
!pip install --user --upgrade opencv-python

Collecting opencv-python
[?25l  Downloading https://files.pythonhosted.org/packages/c0/a9/9828dfaf93f40e190ebfb292141df6b7ea1a2d57b46263e757f52be8589f/opencv_python-4.1.2.30-cp36-cp36m-manylinux1_x86_64.whl (28.3MB)
[K     |████████████████████████████████| 28.3MB 19.7MB/s eta 0:00:01
Installing collected packages: opencv-python
Successfully installed opencv-python-4.1.2.30


In [1]:
import tensorflow as tf
import datetime
import time
import cv2
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
import PIL
import boto3
from collections import Counter
from random import shuffle, randint, seed
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.python.framework import graph_util
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
%matplotlib inline

In [7]:
print(f'OpenCV version: {cv2.__version__}')
print(f'Tensorflow version: {tf.__version__}')
print(f'Pillow version: {PIL.__version__}')

OpenCV version: 4.1.2
Tensorflow version: 2.0.0
Pillow version: 5.2.0


In [8]:
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Please install GPU version of TF


## Useful Parameters

In [9]:
DATE = datetime.date(2019, 11, 21).strftime('%d-%b-%Y')
IMG_SIZE = 100
LR = 1e-3
PATH = 'uploads/'
MODEL_PATH = f'models/{DATE}/'
MODEL_NAME = 'ImageClassifier-keras-5-Conv-Layer-{}.model'.format(int(time.time()))
IMG_SIZE

100

## Read in Labels Text File

In [10]:
with open('labels.txt', 'r') as file:
    LABELS = file.read().splitlines()

# S3 Bucket Configuration

In [11]:
s3 = boto3.resource('s3')
bucket = s3.Bucket('pornilarity-bucket170933-production')
bucket.get_available_subresources()

['Acl',
 'Cors',
 'Lifecycle',
 'LifecycleConfiguration',
 'Logging',
 'Notification',
 'Object',
 'Policy',
 'RequestPayment',
 'Tagging',
 'Versioning',
 'Website']

## Download all images from S3 Bucket into local directory

In [12]:
for key in bucket.objects.all():
    name = key.key.split('/')[1]
    # print(key.key)
    bucket.download_file(key.key, f'uploads/{name}')

## Process Images

In [14]:
def process_images():
    test_images = []
    for img in tqdm(os.listdir(PATH)):
        img_name = str(img) 
        full_path = os.path.join(PATH, img)  # full path of the image
        # print(full_path)
        # feature extraction
        try:
            img = cv2.imread(full_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            img = tf.cast(img, tf.float32)  # change data type of image to float32
            test_images.append(img)
        except Exception as e:
            print(full_path)
            print(str(e))
            continue
            
    return np.array(test_images)

In [17]:
test_images = process_images()

 73%|███████▎  | 33/45 [00:00<00:00, 39.68it/s]

uploads/.ipynb_checkpoints
OpenCV(4.1.2) /io/opencv/modules/imgproc/src/resize.cpp:3720: error: (-215:Assertion failed) !ssize.empty() in function 'resize'



100%|██████████| 45/45 [00:01<00:00, 38.86it/s]


In [18]:
test_images = np.array([i for i in test_images]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
"""for img in test_images:
    print(img.shape)"""

'for img in test_images:\n    print(img.shape)'

## Load Model

In [19]:
MODEL = tf.keras.models.load_model(f'{MODEL_PATH}')
MODEL.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 98, 98, 32)        320       
_________________________________________________________________
activation (Activation)      (None, 98, 98, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 96, 96, 32)        9248      
_________________________________________________________________
activation_1 (Activation)    (None, 96, 96, 32)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 48, 48, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 48, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 46, 46, 64)        1

## Predictions

In [20]:
CLASS_INDEX = dict(zip(list(range(len(LABELS))), LABELS))
IMAGE_IDs = iter(range(len(test_images)))

In [21]:
def get_predictions(test_images, IMAGE_IDs):
    results = {cls: [] for cls in LABELS}
    predicted_labels = []
    for img in test_images:
        img = img / 255.0
        img = img.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
        pred = MODEL.predict(img).flatten()
        index = np.argmax(pred)
        label = LABELS[index]
        results = {LABELS[i]: results.get(LABELS[i]) + [pred[i]] for i in range(len(LABELS))}
        print(f"Image ID: {next(IMAGE_IDs)}\t | Prediction: {label}")
        predicted_labels.append(label)
 
    return np.array(predicted_labels), results

In [22]:
PREDICTED_LABELS, RESULTS = get_predictions(test_images, IMAGE_IDs)

Image ID: 0	 | Prediction: Evelina Darling
Image ID: 1	 | Prediction: Catarina Petrov
Image ID: 2	 | Prediction: Samantha Ryan
Image ID: 3	 | Prediction: Jessica Bangkok
Image ID: 4	 | Prediction: Marcelin Abadir
Image ID: 5	 | Prediction: Nicole Aniston
Image ID: 6	 | Prediction: Audrey Bitoni
Image ID: 7	 | Prediction: Amirah Adara
Image ID: 8	 | Prediction: Marcelin Abadir
Image ID: 9	 | Prediction: Emily Willis
Image ID: 10	 | Prediction: Dana DeArmond
Image ID: 11	 | Prediction: Lana Rhoades
Image ID: 12	 | Prediction: Samantha Ryan
Image ID: 13	 | Prediction: Xev Bellringer
Image ID: 14	 | Prediction: Sarah Banks
Image ID: 15	 | Prediction: Cherie De Ville
Image ID: 16	 | Prediction: Bridgette B
Image ID: 17	 | Prediction: Karlie Montana
Image ID: 18	 | Prediction: Sunny Leone
Image ID: 19	 | Prediction: Madison Ivy
Image ID: 20	 | Prediction: Allie Haze
Image ID: 21	 | Prediction: Veronica Rodriguez
Image ID: 22	 | Prediction: Brenda James
Image ID: 23	 | Prediction: Aiden Starr

### Get Top 5 results for Test Image

In [33]:
"""
Re-structures the results dictionary so that each class_label points to another dictionary {k, v}
where k = the Image_Id number and v = the confidence value
"""
IMAGE_IDs = list(range(len(test_images)))
def gen_results(results):
    my_dict = {}
    for cls in LABELS:
        probs = iter(results[cls])
        my_dict.update({cls: {}})
        for k in IMAGE_IDs:
            my_dict[cls][int(k)] = next(probs)

    return my_dict

In [36]:
def get_top5(results, ID=1):
    results = gen_results(results)
    probs = np.array([(results[k][ID]) for k in results])
    # print(f'Reverse: {(-probs).argsort()} - {sorted(probs, reverse=True)}')
    indices = (-probs).argsort()[:5] # sorts probabilities (largest - smallest) + returns their corresponding array indices
    top_5 = [CLASS_INDEX.get(i) for i in indices]
    return top_5

In [37]:
Image_ID = 6
TOP_5 = get_top5(RESULTS, Image_ID)

['Audrey Bitoni',
 'Cameron Canela',
 'Lana Rhoades',
 'Gianna Dior',
 'Jelena Jensen']

In [38]:
np.savetxt("Top5.txt", TOP_5, fmt="%s")