# Makeup Binary Classification

This dataset is made up of images of people wearing makeup and not wearing makeup. The goal of the project is to build a model that is able to distinguish between the two and make highly accurate predictions on new unseen images.

## Imports

In [1]:
# -----------------
# Import libararies
# -----------------
import os
import zipfile
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.resnet50 import ResNet50
from shutil import copyfile
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from PIL import Image
import gradio as gr
from torchvision import transforms
import splitfolders


# -----------------
# Extract Zip File
# -----------------
local_zip = 'makeup.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

## Create file-structure to separate data

In [2]:
# -----------------------
# Confirming image counts
# -----------------------
base_path = '/tmp/data'
print(f'Total Makeup images {len(os.listdir(os.path.join(base_path, "makeup")))}')
print(f'Total No Makeup images {len(os.listdir(os.path.join(base_path, "no_makeup")))}')

Total Makeup images 1062
Total No Makeup images 444


In [3]:
# -------------------------------
# Extract Zip File & create files
# -------------------------------
os.mkdir('images') 
output_folder = 'images'
splitfolders.ratio(base_path, output=output_folder, ratio=(.9, .1))

Copying files: 1506 files [00:00, 2050.68 files/s]


In [4]:
# ----------------------------
# Defining directory variables
# ----------------------------
TRAINING_DIR = os.path.join('images', 'train')
TESTING_DIR = os.path.join('images', 'val')

train_makeup_dir = os.path.join(TRAINING_DIR, 'makeup')
train_nomakeup_dir = os.path.join(TRAINING_DIR, 'no_makeup')

test_makeup_dir = os.path.join(TESTING_DIR, 'makeup')
test_nomakeup_dir = os.path.join(TESTING_DIR, 'no_makeup')

In [5]:
# ---------------------------------------
# Checking image counts in each directory
# ---------------------------------------
print('total training makeup images :', len(os.listdir(train_makeup_dir) ))
print('total training no makeup images :', len(os.listdir(train_nomakeup_dir) ))

print('total test makeup images :', len(os.listdir(test_makeup_dir) ))
print('total test no makeup images :', len(os.listdir(test_nomakeup_dir) ))

total training makeup images : 955
total training no makeup images : 399
total test makeup images : 107
total test no makeup images : 45


## Loading in Inception V3 model for transfer learning!

The reason I am using transfer learning in this problem is that there is not a lot of data to train on, therefore using weights trained for another task and fine tuning it using my data is a good fit.

In [6]:
# --------------------------------------------------
# Loading weights and applying to inception model
# --------------------------------------------------
local_weights_file = 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

pre_trained_model = InceptionV3(input_shape=(150, 150, 3),
                                include_top=False,
                                weights=None)

pre_trained_model.load_weights(local_weights_file)

# --------------------------------------------------
# Make all layers in pre-trained model not trainable
# --------------------------------------------------
for layer in pre_trained_model.layers:
    pre_trained_model.trainable = False
    
# --------------------------------------------------
# Getting last layer outuput to connect to fine-tuned
# DNN attached for transfer learning
# --------------------------------------------------
last_layer = pre_trained_model.get_layer('mixed7')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

last layer output shape:  (None, 7, 7, 768)


In [7]:
# ---------------------
# Defining DNN to train
# ---------------------
x = tf.keras.layers.Flatten()(last_output)
x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(pre_trained_model.input, x)

In [8]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

## Specify optimizer, create checkpoint model files, Augment training data

In [9]:
# ----------------
# Define Optimizer
# ----------------
adam = tf.keras.optimizers.Adam(
    learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam')

In [10]:
# ------------------------------------
# Saving model using Checkpoint method
# ------------------------------------
filepath="best_model_file.h5"
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True)

In [11]:
# ---------------
# Compiling model
# ---------------
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
# ---------------------------------------
# Using Data Augmentation to have more
# data to train on and for regularization
# ---------------------------------------
TRAINING_DIR = 'images/train/'
train_datagen = ImageDataGenerator(
    rescale=1./255.,
    rotation_range=1,
    width_shift_range=.1,
    height_shift_range=0.1,
    shear_range=.1,
    zoom_range=.1,
    horizontal_flip=True,
    brightness_range=[0.2,1.0],
    fill_mode='nearest',
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=16,
                                                    target_size=(150, 150),
                                                    class_mode='binary',
                                                    classes=['makeup','no_makeup'],
                                                    subset='training')

validation_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size=16,
                                                    target_size=(150, 150),
                                                    class_mode='binary',
                                                    classes=['makeup','no_makeup'],
                                                    subset='validation')

Found 1084 images belonging to 2 classes.
Found 270 images belonging to 2 classes.


In [13]:
# -----------------------
# Creating Test Generator
# -----------------------
TEST_DIR = 'images/val'
test_datagen = ImageDataGenerator(rescale=1. / 255.)
test_generator = test_datagen.flow_from_directory(TEST_DIR,
                                                  batch_size=16,
                                                  target_size=(150, 150))

Found 152 images belonging to 3 classes.


## Training!

In [14]:
# --------------
# Training Model
# --------------
history = model.fit(train_generator,
                    batch_size=32,
                    epochs=5,
                    verbose=1,
                    validation_data=validation_generator,
                    callbacks=[checkpoint])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5

KeyboardInterrupt: 

## Training evalutaion

In [None]:
import matplotlib
matplotlib.rcParams.update({'font.size': 22})
# ---------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
# ---------------------------------------------------------
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))  # Get number of epochs

# -----------------------------------------------
# Plot training and validation accuracy per epoch
# -----------------------------------------------
plt.figure(figsize=(15,10))
plt.plot(epochs, acc, '-g', label="Training Accuracy")
plt.plot(epochs, val_acc, '-b', label="Validation Accuracy")
plt.title('Training and validation accuracy')
plt.legend()

# -------------------------------------------
# Plot training and validation loss per epoch
# -------------------------------------------
plt.figure(figsize=(15,10))
plt.plot(epochs, loss, '-g', label="Training Loss")
plt.plot(epochs, val_loss, '-b', label="Validation Loss")
plt.legend()
plt.title('Training and validation loss')

# Testing Evaluation

In [None]:
# -------------------------
# Load best model iteration
# -------------------------
model = tf.keras.models.load_model('best_model_file.h5')

# -------------------------------
# Evaluation on the Test data set
# -------------------------------
test_loss, test_acc = model.evaluate(test_generator, verbose=2)

# Demo