# Makeup Binary Classification

This dataset is made up of images of people wearing makeup and not wearing makeup. The goal of the project is to build a model that is able to distinguish between the two and make highly accurate predictions on new unseen images.

## Imports

In [1]:
# ------------------------------------------------
# Import libararies
# ------------------------------------------------
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.inception_v3 import InceptionV3
from shutil import copyfile
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from PIL import Image
import gradio as gr
from torchvision import transforms


# ------------------------------------------------
# Extract Zip File
# ------------------------------------------------
local_zip = 'makeup.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

## Create file-structure to separate data

In [2]:
# ---------------------------------------------------------
# Create file structure for tensorflow to label/split data 
# ---------------------------------------------------------
try:
    os.mkdir('/tmp/makeup')
    os.mkdir('/tmp/makeup/training')
    os.mkdir('/tmp/makeup/testing')
    os.mkdir('/tmp/makeup/training/yes_makeup')
    os.mkdir('/tmp/makeup/training/no_makeup')
    os.mkdir('/tmp/makeup/testing/yes_makeup')
    os.mkdir('/tmp/makeup/testing/no_makeup')
except OSError:
    pass

In [3]:
# ------------------------------------------------
# Function to split data and write to directories
# ------------------------------------------------
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    all_files = []

    for file_name in os.listdir(SOURCE):
        file_path = SOURCE + file_name

        if os.path.getsize(file_path):
            all_files.append(file_name)
        else:
            print('{} is zero length, so ignoring'.format(file_name))

    n_files = len(all_files)
    split_point = int(n_files * SPLIT_SIZE)

    shuffled = random.sample(all_files, n_files)

    train_set = shuffled[:split_point]
    test_set = shuffled[split_point:]

    for file_name in train_set:
        copyfile(SOURCE + file_name, TRAINING + file_name)

    for file_name in test_set:
        copyfile(SOURCE + file_name, TESTING + file_name)

In [4]:
# ------------------------------------------------
# Naming directories as variables
# ------------------------------------------------
YES_SOURCE_DIR = "/tmp/data/makeup/"
TRAINING_YES_MAKEUP_DIR = "/tmp/makeup/training/yes_makeup/"
TESTING_YES_MAKEUP_DIR = "/tmp/makeup/testing/yes_makeup/"
NO_SOURCE_DIR = "/tmp/data/no_makeup/"
TRAINING_NO_MAKEUP_DIR = "/tmp/makeup/training/no_makeup/"
TESTING_NO_MAKEUP_DIR = "/tmp/makeup/testing/no_makeup/"

In [5]:
# ------------------------------------------------
# Function to split data and write to directories
# ------------------------------------------------
split_size = .9
split_data(YES_SOURCE_DIR, TRAINING_YES_MAKEUP_DIR, TESTING_YES_MAKEUP_DIR, split_size)
split_data(NO_SOURCE_DIR, TRAINING_NO_MAKEUP_DIR, TESTING_NO_MAKEUP_DIR, split_size)

## Loading in Inception V3 model for transfer learning!

The reason I am using transfer learning in this problem is that there is not a lot of data to train on, therefore using weights trained for another task and fine tuning it using my data is a good fit.

In [None]:
# --------------------------------------------------
# Loading weights and applying to inception model
# --------------------------------------------------
local_weights_file = 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

pre_trained_model = InceptionV3(input_shape=(150, 150, 3),
                                include_top=False,
                                weights=None)

pre_trained_model.load_weights(local_weights_file)

# --------------------------------------------------
# Make all layers in pre-trained model not trainable
# --------------------------------------------------
for layer in pre_trained_model.layers:
    pre_trained_model.trainable = False
    
# --------------------------------------------------
# Getting last layer outuput to connect to fine-tuned
# DNN attached for transfer learning
# --------------------------------------------------
last_layer = pre_trained_model.get_layer('mixed7')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

In [None]:
# ---------------------
# Defining DNN to train
# ---------------------
x = tf.keras.layers.Flatten()(last_output)
x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
x = tf.keras.layers.Dropout(0.3)(x)
x = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(pre_trained_model.input, x)

## Specify optimizer, create checkpoint model files, Augment training data

In [None]:
# ----------------
# Define Optimizer
# ----------------
adam = tf.keras.optimizers.Adam(
    learning_rate=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam')

In [None]:
# ------------------------------------
# Saving model using Checkpoint method
# ------------------------------------
filepath="best_model_file.h5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True)
callbacks_list = [checkpoint]

In [None]:
# ---------------
# Compiling model
# ---------------
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# ---------------------------------------
# Using Data Augmentation to have more
# data to train on and for regularization
# ---------------------------------------
TRAINING_DIR = '/tmp/makeup/training'
train_datagen = ImageDataGenerator(
    rescale=1/255,
    rotation_range=10,
    width_shift_range=.1,
    height_shift_range=.1,
    shear_range=.1,
    zoom_range=.1,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=32,
                                                    target_size=(150, 150),
                                                    class_mode='binary',
                                                    subset='training')

validation_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=32,
                                                    target_size=(150, 150),
                                                    class_mode='binary',
                                                    subset='validation')

In [None]:
# ------------------------------
# Creating Test Generator
# ------------------------------
TEST_DIR = '/tmp/makeup/testing'
test_datagen = ImageDataGenerator(rescale=1.0 / 255)
test_generator = test_datagen.flow_from_directory(TEST_DIR,
                                          batch_size=32,
                                          target_size=(150, 150),
                                          class_mode='binary')

## Training!

In [None]:
# ------------------------------
# Training Model
# ------------------------------
history = model.fit(train_generator,
                    batch_size=32,
                    epochs=50,
                    verbose=1,
                    validation_data=validation_generator,
                    callbacks=[checkpoint])

## Training evalutaion

In [None]:
import matplotlib
matplotlib.rcParams.update({'font.size': 22})
# -----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
# -----------------------------------------------------------
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))  # Get number of epochs

# ------------------------------------------------
# Plot training and validation accuracy per epoch
# ------------------------------------------------
plt.figure(figsize=(15,10))
plt.plot(epochs, acc, '-g', label="Training Accuracy")
plt.plot(epochs, val_acc, '-b', label="Validation Accuracy")
plt.title('Training and validation accuracy')
plt.legend()

# ------------------------------------------------
# Plot training and validation loss per epoch
# ------------------------------------------------
plt.figure(figsize=(15,10))
plt.plot(epochs, loss, '-g', label="Training Loss")
plt.plot(epochs, val_loss, '-b', label="Validation Loss")
plt.legend()
plt.title('Training and validation loss')

# Testing Evaluation

In [None]:
# -------------------------------
# Load best model iteration
# -------------------------------
model = keras.models.load_model('best_model_file.h5')

# -------------------------------
# Evaluation on the Test data set
# -------------------------------
test_loss, test_acc = model.evaluate(test_generator, verbose=2)

# Demo

In [None]:
img_class = model.predict_classes(test_images[0])
prediction = img_class[0]
classname = img_class[0]
print("Class: ",classname)
img = img.reshape((28,28))
plt.imshow(img)
plt.title(classname)
plt.show()