# IMPORTS

In [101]:
import tensorflow as tf
import datetime
import time
import cv2
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
from collections import Counter
from random import shuffle, randint, seed
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.python.framework import graph_util
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
%matplotlib inline

In [102]:
import bz2
import argparse
from tensorflow.keras.utils import get_file
from ffhq_dataset.face_alignment import image_align
from ffhq_dataset.landmarks_detector import LandmarksDetector

In [103]:
print(f'OpenCV version: {cv2.__version__}')
print(f'Tensorflow version: {tf.__version__}')

OpenCV version: 4.1.0
Tensorflow version: 2.0.0


In [104]:
## Gets the repo for aligned images

# !rm -rf sample_data
# !git clone https://github.com/pbaylies/stylegan-encoder

# PREPROCESSING

In [105]:
# Check what folder to use for training and testing images
CHISOM_TRAIN_DIR = 'C:/Users/chiso/MEGA/data/train'
CHISOM_TEST_DIR = 'C:/Users/chiso/MEGA/data/test'
CHISOM_ALIGNED_TRAIN_DIR = 'C:/Users/chiso/MEGA/data/aligned_train'
CHISOM_ALIGNED_TEST_DIR = 'C:/Users/chiso/MEGA/data/aligned_test'

YISI_TRAIN_DIR = 'E:/MegaSync/data/train'
YISI_TEST_DIR = 'E:/MegaSync/data/test'
YISI_ALIGNED_TRAIN_DIR = 'E:/MegaSync/data/aligned_train'
YISI_ALIGNED_TEST_DIR = 'E:/MegaSync/data/aligned_test'


def get_directories():
    if os.path.exists(CHISOM_TRAIN_DIR) and os.path.exists(CHISOM_TEST_DIR) \
            and os.path.exists(CHISOM_ALIGNED_TRAIN_DIR) and os.path.exists(CHISOM_ALIGNED_TEST_DIR):
        return CHISOM_TRAIN_DIR, CHISOM_TEST_DIR, CHISOM_ALIGNED_TRAIN_DIR, CHISOM_ALIGNED_TEST_DIR
    else:
        return YISI_TRAIN_DIR, YISI_TEST_DIR, YISI_ALIGNED_TRAIN_DIR, YISI_ALIGNED_TEST_DIR

### Useful Parameters

In [106]:
DATE = datetime.datetime.now().strftime('%d-%b-%Y')
TRAIN_DIR, TEST_DIR, ALIGNED_TRAIN_DIR, ALIGNED_TEST_DIR = get_directories()
IMG_SIZE = 50
LR = 1e-3
MODEL_PATH = f'models/{DATE}/'
MODEL_NAME = 'ImageClassifier-keras-5-Conv-Layer-{}.model'.format(int(time.time()))
TENSORBOARD = TensorBoard(log_dir=f'logs\\{MODEL_NAME}') 
NUM_CLASSES = len(next(os.walk(ALIGNED_TRAIN_DIR))[1])
NUM_CLASSES

38

In [107]:
TEST_DIR, TRAIN_DIR, ALIGNED_TRAIN_DIR, ALIGNED_TEST_DIR

('C:/Users/chiso/MEGA/data/test',
 'C:/Users/chiso/MEGA/data/train',
 'C:/Users/chiso/MEGA/data/aligned_train',
 'C:/Users/chiso/MEGA/data/aligned_test')

### Walkthrough of Subfolders in Train Directory:

In [108]:
# Only the root
ROOTS = next(os.walk(ALIGNED_TRAIN_DIR))[0]
print(f"Roots = {ROOTS}")

Roots = C:/Users/chiso/MEGA/data/aligned_train


In [109]:
# Only the directories
DIRS = next(os.walk(ALIGNED_TRAIN_DIR))[1]
DIRS

['Abella Danger',
 'Aiden Starr',
 'Aidra Fox',
 'Aletta Ocean',
 'Alina Lopez',
 'Allie Haze',
 'Amirah Adara',
 'Andriana Chechik',
 'Ariella Ferrera',
 'Brenda James',
 'Chastity Lynn',
 'Dana DeArmond',
 'Dana Weyron',
 'Emily Willis',
 'Evelina Darling',
 'Jessa Rhodes',
 'Jessica Bangkok',
 'Julia Ann',
 'Kimmy Granger',
 'Krystal Boyd',
 'Lana Rhoades',
 'Leyla Fiore',
 'Little Caprice',
 'Madison Ivy',
 'Marcelin Abadir',
 'Mellanie Monroe',
 'Mia Khalifa',
 'Nicole Aniston',
 'Peta Jensen',
 'Riley Reid',
 'Riley Steele',
 'Samantha Ryan',
 'Shyla Jennings',
 'Stella Cox',
 'Tanya Tate',
 'Valentina Nappi',
 'Xev Bellringer',
 'Zoey Holloway']

In [110]:
# Only the files
"""for root, dirs, files in os.walk(ALIGNED_TRAIN_DIR):
    for name in files:
        print(name.split('.')[0]) # filters the file name by file extension and the copy_number
        
"""

"for root, dirs, files in os.walk(ALIGNED_TRAIN_DIR):\n    for name in files:\n        print(name.split('.')[0]) # filters the file name by file extension and the copy_number\n        \n"

### Generation of Image classes

In [111]:
# Used for abbreviating the class names NOT USED

"""def get_class_labels():
    labels = []
    for root, dirs, files in os.walk(TRAIN_DIR):
        path = root.split(os.sep)
        for folder in dirs:
            name = folder.split()
            class_label = "".join([letter[0] for letter in name])
            labels.append(class_label)
    return labels"""

'def get_class_labels():\n    labels = []\n    for root, dirs, files in os.walk(TRAIN_DIR):\n        path = root.split(os.sep)\n        for folder in dirs:\n            name = folder.split()\n            class_label = "".join([letter[0] for letter in name])\n            labels.append(class_label)\n    return labels'

### One-Hot Encoding

In [112]:
LABELS = next(os.walk(ALIGNED_TRAIN_DIR))[1] # all the class labels (pornstar names) to be used
LABELS = np.reshape(LABELS, (-1, 1)) # reshapes array from 1D to 2D array
mlb = MultiLabelBinarizer()
encoded_labels = np.array(mlb.fit_transform(LABELS))
# dict(zip(LABELS.flatten(), encoded_labels))

In [113]:
# img.split('.')[0].split('(')[0]  # filters the file name by file extension and the copy_number
"""
Labelled training data
"""
def create_train_data():
    training_data = []
    # iterate over each image-class (subfolder) in training directory
    for folder in tqdm(os.listdir(ALIGNED_TRAIN_DIR)):
        full_path = f'{ALIGNED_TRAIN_DIR}/{folder}'
        # iterate over each image in each subfolder
        for img in os.listdir(full_path):
            ##### !python align_images.py raw_images/ aligned_images/ --output_size=1048
            img_name = str(folder)  # the sub-folder is used as the image name for each image
            img_name = img_name.strip() # removes any leading and trailing whitespaces from the img name
            label = mlb.transform([[img_name]]) # encodes the label of the image using MultiLabelBinarizer
            label = label.flatten()  # converts encoded label from 2D to 1D array
            # print(f'Image: {img} - Encoding:{label}')
            path = os.path.join(full_path, img)  # full path of the image
            # feature extraction
            img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE), (IMG_SIZE, IMG_SIZE))
            img = tf.cast(img, tf.float32) # change data type of image to float32
            training_data.append([np.array(img), np.array(label)])
    shuffle(training_data)
    np.save('train_data.npy', training_data)
    return training_data

In [114]:
"""
Unlabelled test data
"""
def process_test_data():
    img_ids = list(range(len(os.listdir(ALIGNED_TEST_DIR)))) # generates list of ID numbers
    shuffle(img_ids) # randomly assorted
    img_ids = iter(img_ids) 
    testing_data = [] 
    for img in tqdm(os.listdir(ALIGNED_TEST_DIR)):
        path = os.path.join(ALIGNED_TEST_DIR, img)
        img_num = next(img_ids)
        print(f"ID: {img_num} - Image: {img}")
        # feature extraction
        img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE), (IMG_SIZE, IMG_SIZE))
        img = tf.cast(img, tf.float32)
        testing_data.append([np.array(img), img_num])
    np.save('test_data.npy', testing_data)
    return testing_data     

### Generate Training and Testing data

In [115]:
train_data = create_train_data()
test_data = process_test_data()
# if train/test data already exists
# train_data = np.load('train_data.npy', allow_pickle=True)
# test_data = np.load('test_data.npy', allow_pickle=True)

  0%|          | 0/38 [00:00<?, ?it/s]  3%|▎         | 1/38 [00:02<01:23,  2.26s/it]  5%|▌         | 2/38 [00:03<01:14,  2.08s/it]  8%|▊         | 3/38 [00:06<01:17,  2.21s/it] 11%|█         | 4/38 [00:08<01:15,  2.23s/it] 13%|█▎        | 5/38 [00:10<01:07,  2.06s/it] 16%|█▌        | 6/38 [00:13<01:13,  2.29s/it] 18%|█▊        | 7/38 [00:16<01:16,  2.48s/it] 21%|██        | 8/38 [00:18<01:15,  2.53s/it] 24%|██▎       | 9/38 [00:21<01:13,  2.55s/it] 26%|██▋       | 10/38 [00:23<01:06,  2.39s/it] 29%|██▉       | 11/38 [00:25<00:59,  2.21s/it] 32%|███▏      | 12/38 [00:27<01:00,  2.34s/it] 34%|███▍      | 13/38 [00:29<00:56,  2.28s/it] 37%|███▋      | 14/38 [00:32<00:54,  2.26s/it] 39%|███▉      | 15/38 [00:33<00:49,  2.13s/it] 42%|████▏     | 16/38 [00:36<00:48,  2.22s/it] 45%|████▍     | 17/38 [00:38<00:45,  2.15s/it] 47%|████▋     | 18/38 [00:40<00:44,  2.24s/it] 50%|█████     | 19/38 [00:43<00:44,  2.33s/it] 53%|█████▎    | 20/38 [00:45<00:39,  2.17s/it] 55%|████

ID: 15 - Image: Aiden Starr.png
ID: 21 - Image: Allie Haze.png
ID: 11 - Image: Andriana Chechik.png
ID: 12 - Image: Anna.jpg
ID: 9 - Image: Ariella Ferrera.png
ID: 5 - Image: Emily Willis.png
ID: 19 - Image: Evelina Darling.png
ID: 1 - Image: Jessa Rhodes.png
ID: 14 - Image: Jessica Bangkok.png
ID: 6 - Image: Julia Ann.png
ID: 18 - Image: Krystal Boyd.png
ID: 22 - Image: Lana Rhoades.png
ID: 20 - Image: Madison Ivy.png
ID: 13 - Image: Marcelin Abadir.png
ID: 16 - Image: Mellanie Monroe.png
ID: 0 - Image: Mia Khalifa.png
ID: 10 - Image: Nicole Aniston.png
ID: 23 - Image: Peta Jensen.png
ID: 4 - Image: Riley Reid.png
ID: 8 - Image: Samantha Ryan.png
ID: 2 - Image: Stella Cox.png
ID: 3 - Image: Tanya Tate.png
ID: 17 - Image: Valentina Nappi.png
ID: 7 - Image: Zoey Holloway.png


In [116]:
print(len(train_data))
print(len(test_data))

2401
24


# BUILDING THE MODEL

### Implementation of Convoluted Neural Network

In [117]:
def create_cnn_model():
    # tf.reset_default_graph()
    model = Sequential()
    input_shape = (IMG_SIZE, IMG_SIZE, 1)
    
    # INPUT LAYER
    model.add(Conv2D(32, (3, 3), input_shape=input_shape))
    model.add(Activation('relu'))
    # model.add(MaxPooling2D(pool_size=(2,2)))

    # HIDDEN LAYER 1
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    # model.add(Dropout(0.25))

    # HIDDEN LAYER 2
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))
    
    # HIDDEN LAYER 3
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))
    
    # HIDDEN LAYER 4
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))

    # Fully Connected
    model.add(Flatten()) # converts the 3D feature maps to 1D feature vectors
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.5)) # reduces overfitting

    # OUTPUT LAYER
    model.add(Dense(NUM_CLASSES))
    model.add(Activation('softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    return model

# TRAINING THE NETWORK

### Cross Validation Train/Test Split

In [118]:
ratio = int(round(len(train_data), -1) * 0.2)
train = train_data[:-ratio] # sample train data
test = train_data[-ratio:]

In [119]:
train_X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 1) # train features (images)
train_Y = np.array([i[1] for i in train]) # train labels

test_X = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 1) # test features (images)
test_Y = np.array([i[1] for i in test]) # test labels

### Feature Scaling (Normalization)

In [120]:
# Have to divide by 255 
train_X = train_X/255.0
test_X = test_X/255.0

In [121]:
print(f"train data: {train_X.shape}")
print(f"train labels: {train_Y.shape}")
print(f"test data: {test_X.shape}")
print(f"test labels: {test_Y.shape}")

train data: (2041, 50, 50, 1)
train labels: (2041, 38)
test data: (360, 50, 50, 1)
test labels: (360, 38)


### Frequency distribution of classes being used in "test data"

In [122]:
enc = []
for img in test:
    enc.append(img[1])
    
enc = np.array(enc)
test_labels = mlb.inverse_transform(enc)
c = Counter(test_labels)
c

Counter({('Krystal Boyd',): 7,
         ('Mellanie Monroe',): 10,
         ('Aidra Fox',): 18,
         ('Riley Steele',): 6,
         ('Andriana Chechik',): 15,
         ('Emily Willis',): 12,
         ('Samantha Ryan',): 11,
         ('Jessa Rhodes',): 13,
         ('Tanya Tate',): 9,
         ('Chastity Lynn',): 9,
         ('Allie Haze',): 13,
         ('Ariella Ferrera',): 16,
         ('Zoey Holloway',): 9,
         ('Little Caprice',): 12,
         ('Mia Khalifa',): 10,
         ('Aiden Starr',): 4,
         ('Julia Ann',): 9,
         ('Nicole Aniston',): 5,
         ('Dana Weyron',): 13,
         ('Amirah Adara',): 12,
         ('Madison Ivy',): 8,
         ('Leyla Fiore',): 4,
         ('Kimmy Granger',): 11,
         ('Aletta Ocean',): 8,
         ('Lana Rhoades',): 9,
         ('Abella Danger',): 7,
         ('Peta Jensen',): 8,
         ('Riley Reid',): 13,
         ('Dana DeArmond',): 18,
         ('Brenda James',): 4,
         ('Marcelin Abadir',): 6,
         ('Valentin

In [None]:
MODEL = create_cnn_model()
MODEL.summary()
history = MODEL.fit(train_X, train_Y, batch_size=32, epochs=150, validation_data=(test_X, test_Y), verbose=2, callbacks=[TENSORBOARD])

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_16 (Conv2D)           (None, 48, 48, 32)        320       
_________________________________________________________________
activation_19 (Activation)   (None, 48, 48, 32)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 46, 46, 32)        9248      
_________________________________________________________________
activation_20 (Activation)   (None, 46, 46, 32)        0         
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 23, 23, 32)        0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 21, 21, 64)        18496     
_________________________________________________________________
activation_21 (Activation)   (None, 21, 21, 64)       

## Saving Model

In [None]:
MODEL.save(f'{MODEL_PATH}')

## Load Model

In [None]:
# MODEL = tf.keras.models.load_model(f'{MODEL_PATH}')

### Convert model to TensorFlow Lite format

In [None]:
"""converter = tf.lite.TFLiteConverter.from_keras_model(MODEL)
tflite_model = converter.convert()
open("converted_model.tflite", "wb").write(tflite_model)"""

# RESULTS

In [None]:
CLASS_INDEX = dict(zip([np.argmax(x) for x in encoded_labels], LABELS.flatten()))
# CLASS_INDEX = dict(sorted(CLASS_INDEX.items()))
LABELS = LABELS.flatten()
IMAGE_IDs = []

### Graph Plot of Predicted Classes

In [None]:
fig = plt.figure(figsize=(20,10))
results = {cls: [] for cls in LABELS}

# iterate over each image in test_sample
# get the model's class prediction of the image
for num, data in enumerate(test_data):
    data[0] = data[0] / 255.0
    img_data = data[0]
    img_num = data[1]
    y = fig.add_subplot(6, 6, num + 1)
    orig = img_data
    data = img_data.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    model_out = MODEL.predict([data]).flatten()
    index = np.argmax(model_out)
    # generate output dictionary
    results = {LABELS[i]: results.get(LABELS[i]) + [model_out[i]] for i in range(NUM_CLASSES)}
    IMAGE_IDs.append(img_num)
    
    # cross-reference the predicted class-index to its class-label (for each test image)
    class_label = CLASS_INDEX.get(index, 'Invalid class!')
    print(f"Image ID: {img_num}\t | Prediction: {class_label}")

    y.imshow(orig, cmap='gray')
    plt.title(f'{img_num}: {class_label}')
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_yaxis().set_visible(False)
plt.show()
# plt.savefig('Class Results')

In [None]:
imgs = [img.split('.')[0] for img in next(os.walk(ALIGNED_TEST_DIR))[2]]

### Tabulated Prediction Probabilities

In [None]:
# Creates a HeatMap using the seaborn library
cm = sns.light_palette("red", as_cmap=True)
df = pd.DataFrame.from_dict(results, orient='index', columns=imgs)
df.style.\
    format("{:.2%}").\
    set_caption('Confidence Values')\
    .background_gradient(cmap=cm)

In [None]:
"""
Re-structures the results dictionary so that each class_label points to another dictionary {k, v}
where k = the Image_Id number and v = the confidence value
"""

def gen_results(results):
    my_dict = {}
    for cls in LABELS:
        probs = iter(results[cls])
        my_dict.update({cls: {}})
        for k in IMAGE_IDs:
            my_dict[cls][int(k)] = next(probs)

    return my_dict

In [None]:
def get_top5(results, ID=1):
    results = gen_results(results)
    probs = np.array([(results[k][ID]) for k in results])
    # print(f'Reverse: {(-probs).argsort()} - {sorted(probs, reverse=True)}')
    indices = (-probs).argsort()[:5] # sorts probabilities (largest - smallest) + returns their corresponding array indices
    top_5 = [CLASS_INDEX.get(i) for i in indices]
    return top_5

In [None]:
Image_ID = 7
TOP_5 = get_top5(results, Image_ID)
TOP_5

# Get Overall Accuracy

In [None]:
def get_overall_accuracy(results):
    i = 0
    num_correct = 0
    total = len(test_data) # total number of images
    keys = results.keys()
    class_labels = []
    
    for ID in IMAGE_IDs: # loop through each image ID
        predictions = []
        for key in list(keys): # for each model in the results dictionary
            prob = results[key].get(ID)
            predictions.append(prob)
        max_index = np.argmax(predictions) # max index
        label = CLASS_INDEX.get(max_index, 'Invalid class!')
        class_labels.append(label)
    
    for img in os.listdir(ALIGNED_TEST_DIR):
        img = img.split('.')[0].strip() # gets the class name of the image file
        if img == class_labels[i]:
            num_correct += 1
            # print(f"Image name: {img} - predicted label: {class_labels[i]}")
        print(f"Image name: {img} - predicted label: {class_labels[i]}")
        i += 1 
        
        
    accuracy = round((num_correct / total) * 100, 2)
    return f'{accuracy}%'

# Overall Accuracy

In [None]:
get_overall_accuracy(gen_results(results))