In [1]:
import pandas as pd
import numpy as np

import glob
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

import math
from collections import Counter
import re
import json
from bs4 import BeautifulSoup
import datetime
from pprint import pprint
import requests

In [31]:
from keras.preprocessing import image as image_utils
from keras.applications.imagenet_utils import decode_predictions
from keras.applications.imagenet_utils import preprocess_input
from keras.applications import VGG16

import keras as keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Activation

In [10]:
celeb_align = glob.glob('MsCelebV1-Faces-Aligned.Samples/MsCelebV1-Faces-Aligned.Samples/**/*.jpg')
background = glob.glob('cars_brad_bg/*.jpg')

In [11]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [12]:
#image augumentation generator
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

### Test augmentation

img = load_img('MsCelebV1-Faces-Aligned.Samples/MsCelebV1-Faces-Aligned.Samples/m.0qfnmpt/0-FaceId-0.jpg')
x = img_to_array(img)
x = x.reshape((1,) + x.shape)

In [14]:
x.shape

(1, 224, 192, 3)

In [18]:
i = 0
for batch in datagen.flow(x, batch_size=1, save_to_dir='preview', save_prefix='sample', save_format='jpeg'):
    i += 1
    if i > 20:
        break

# Prepare resized images

### features

In [20]:
celeb_align_re = []
for i in range(0, len(celeb_align)):
    temp_img = cv2.imread(celeb_align[i])
    temp_img_resize = cv2.resize(temp_img, (150, 150))
    celeb_align_re.append(temp_img_resize)

In [22]:
background_re = []
for i in range(0, len(background)):
    temp_img = cv2.imread(background[i])
    temp_img_resize = cv2.resize(temp_img, (150, 150))
    background_re.append(temp_img_resize)

In [27]:
feature = np.concatenate((celeb_align_re, background_re), axis=0)

### labels

In [26]:
label_full_yes = list('1') * len(celeb_align_re)
label_full_yes = [int(i) for i in label_full_yes]
label_full_yes = np.asarray(label_full_yes)

label_full_no = list('0') * len(background_re)
label_full_no = [int(i) for i in label_full_no]
label_full_no = np.asarray(label_full_no)

label_full = np.concatenate((label_full_no, label_full_yes), axis=0)

### train/test split

In [29]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(feature, label_full, test_size = 0.33, stratify=label_full, random_state=1992)

# Normalize data set to 0-to-1 range
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255


In [116]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [34]:
model.fit(
    X_train,
    y_train,
    batch_size=32,
    epochs=30,
    validation_data=(X_test, y_test),
    shuffle=True
)

Train on 1839 samples, validate on 906 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x14c2f69e8>

# Test model with cropped face dataset

In [35]:
celeb_crop = glob.glob('MsCelebV1-Faces-Cropped.Samples/MsCelebV1-Faces-Cropped.Samples/**/*.jpg')

In [37]:
celeb_crop_re = []
for i in range(0, len(celeb_crop)):
    temp_img = cv2.imread(celeb_crop[i])
    temp_img_resize = cv2.resize(temp_img, (150, 150))
    celeb_crop_re.append(temp_img_resize)

In [55]:
target = np.array(celeb_crop_re)
target = target.astype('float32')
target /= 255

In [83]:
result = model.predict(target)

In [80]:
observe = [[1.0]] * len(celeb_crop_re)
observe = np.asarray(observe)
observe

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]])

In [82]:
from sklearn.metrics import accuracy_score
accuracy_score(np.rint(result), observe)

0.06181818181818182

# Resized samples and save to 'data' folder

### facial images in different folders

In [100]:
all_files = glob.glob('MsCelebV1-Faces-Aligned.Samples/MsCelebV1-Faces-Aligned.Samples/**')

In [109]:
out_dir = 'data/'

for file in all_files:
    # all paths in one folder
    all_images = glob.glob(file+'/*.jpg')
    for image in all_images:
        # read image in an array
        temp_img = cv2.imread(image,1)
        # resize the array
        temp_img_resize = cv2.resize(temp_img, (150, 150))
        # split path and filename.jpg
        path, name = os.path.split(image)
        # get the folder name
        folder_name = os.path.basename(file)
        # write resized-array as an image into respective dir instead of working dir
        cv2.imwrite(os.path.join(out_dir, "resized_" + folder_name + name), temp_img_resize)

### background images in one folder

In [110]:
all_images = glob.glob('cars_brad_bg/*.jpg')

In [111]:
out_dir = 'data/'

for image in all_images:
    # read image in an array
    temp_img = cv2.imread(image,1)
    # resize the array
    temp_img_resize = cv2.resize(temp_img, (150, 150))
    # split path and filename w/ extension
    path, name = os.path.split(image)
    # write resized-array as an image into respective dir instead of working dir
    cv2.imwrite(os.path.join(out_dir, "resized_" + name), temp_img_resize)

# model1_aug (from 150 x 150 images)

In [117]:
batch_size = 16

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        'data/train',  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'data/test',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')

Found 1935 images belonging to 2 classes.
Found 810 images belonging to 2 classes.


In [118]:
model.fit_generator(
        train_generator,
        steps_per_epoch=1935 // batch_size,
        epochs=20,
        validation_data=validation_generator,
        validation_steps=810 // batch_size)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x135b42898>

In [150]:
# save model structure
from pathlib import Path
model_structure = model.to_json()

f = Path('model1_aug_structure.json')
f.write_text(model_structure)

# save model weights
model.save_weights('model1_aug_weights.h5')

# model2_aug (from 64 x 64 images)

In [167]:
batch_size = 16

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        'data/train',  # this is the target directory
        target_size=(64, 64),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'data/test',
        target_size=(64, 64),
        batch_size=batch_size,
        class_mode='binary')

Found 1935 images belonging to 2 classes.
Found 810 images belonging to 2 classes.


In [168]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(64, 64, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [169]:
model.fit_generator(
        train_generator,
        steps_per_epoch=1935 // batch_size,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=810 // batch_size)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x13d421f98>

In [210]:
# save model structure
from pathlib import Path
model_structure = model.to_json()

f = Path('model2_aug_structure.json')
f.write_text(model_structure)

# save model weights
model.save_weights('model2_aug_weights.h5')

# model3_aug (from 32 x 32 images)

In [228]:
batch_size = 16

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        'data/train',  # this is the target directory
        target_size=(32, 32),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'data/test',
        target_size=(32, 32),
        batch_size=batch_size,
        class_mode='binary')

Found 1935 images belonging to 2 classes.
Found 810 images belonging to 2 classes.


In [229]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(32, 32, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [230]:
model.fit_generator(
        train_generator,
        steps_per_epoch=1935 // batch_size,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=810 // batch_size)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x14e8eccf8>

In [241]:
# save model structure
from pathlib import Path
model_structure = model.to_json()

f = Path('model3_aug_structure.json')
f.write_text(model_structure)

# save model weights
model.save_weights('model3_aug_weights.h5')

# model4_aug (from 32 x 32 images)
* use model8's structure

In [270]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(32, 32, 3), activation="relu"))
model.add(Conv2D(32, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same', activation="relu"))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.5))

model.add(Dense(2, activation="softmax"))

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

### prepare data

In [247]:
landscape = glob.glob('val_256/*.jpg')

In [249]:
import pickle
#with open("index_2750.txt", "wb") as fp:   #Pickling
#    pickle.dump(index_2750, fp)

with open("index_2750.txt", "rb") as fp:   # Unpickling
    index_2750 = pickle.load(fp)

# use index to select files
landscape_2750 = []
for i in index_2750:
    temp_img = cv2.imread(landscape[i])
    temp_img_resize = cv2.resize(temp_img, (32, 32))
    landscape_2750.append(temp_img_resize)

In [250]:
celeb_align_re32 = []
for i in range(0, len(celeb_align)):
    temp_img = cv2.imread(celeb_align[i])
    temp_img_resize = cv2.resize(temp_img, (32,32))
    celeb_align_re32.append(temp_img_resize)

In [251]:
celeb_crop_re32 = []
for i in range(0, len(celeb_crop)):
    temp_img = cv2.imread(celeb_crop[i])
    temp_img_resize = cv2.resize(temp_img, (32,32))
    celeb_crop_re32.append(temp_img_resize)

In [254]:
all_imgs = np.concatenate((landscape_2750, celeb_align_re32, celeb_crop_re32), axis=0)

In [255]:
len(all_imgs)

5500

In [256]:
label_full_no = list('0') * 2750
label_full_no = [int(i) for i in label_full_no]
label_full_no = np.asarray(label_full_no)

label_full_yes = list('1') * 2750
label_full_yes = [int(i) for i in label_full_yes]
label_full_yes = np.asarray(label_full_yes)

label_full = np.concatenate((label_full_no, label_full_yes), axis=0)
len(label_full)

5500

In [264]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(all_imgs, label_full, test_size = 0.33, stratify=label_full, random_state=1992)

### create augmentations

In [265]:
# Normalize data set to 0-to-1 range
X_train = (X_train.astype('float32'))
X_test = (X_test.astype('float32'))
X_train /= 255
X_test /= 255

# categorize target variable
y_train = keras.utils.to_categorical(y_train, 2)
y_test = keras.utils.to_categorical(y_test, 2)

In [266]:
batch_size = 16

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator()

# generate augmented train data
train_generator = train_datagen.flow(
        X_train,
        y_train,
        batch_size=batch_size)  

# this is a similar generator, for validation data
validation_generator = test_datagen.flow(
        X_test,
        y_test,
        batch_size=batch_size)

In [271]:
model.fit_generator(
        train_generator,
        steps_per_epoch=3685 // batch_size,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=1815 // batch_size)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x13e53fb70>

In [272]:
# save model structure
from pathlib import Path
model_structure = model.to_json()

f = Path('model4_aug_structure.json')
f.write_text(model_structure)

# save model weights
model.save_weights('model4_aug_weights.h5')

# model5_aug 
* refined model4_aug with more conditoins in augmentation

In [276]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(32, 32, 3), activation="relu"))
model.add(Conv2D(32, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same', activation="relu"))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.5))

model.add(Dense(2, activation="softmax"))

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [277]:
batch_size = 16

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        featurewise_center=True,
        featurewise_std_normalization=True,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator()

# generate augmented train data
train_generator = train_datagen.flow(
        X_train,
        y_train,
        batch_size=batch_size)  

# this is a similar generator, for validation data
validation_generator = test_datagen.flow(
        X_test,
        y_test,
        batch_size=batch_size)

In [278]:
train_datagen.fit(X_train)

In [279]:
model.fit_generator(
        train_generator,
        steps_per_epoch=3685 // batch_size,
        epochs=50,
        validation_data=validation_generator,
        validation_steps=1815 // batch_size)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x14fed5198>

In [280]:
# save model structure
from pathlib import Path
model_structure = model.to_json()

f = Path('model5_aug_structure.json')
f.write_text(model_structure)

# save model weights
model.save_weights('model5_aug_weights.h5')