# 14 Celebrities dataset

## Importing packages

In [None]:
import cv2
import requests
import zipfile
import os
import keras
import numpy as np
from PIL import Image
from io import BytesIO
import tensorflow as tf
import matplotlib.pyplot as plt
from keras import backend as K
from keras.preprocessing import image
from keras.models import Model, Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, GlobalAveragePooling2D

In [None]:
base_dir = "../input/14-celebrity-faces-dataset"
celeb14 = os.path.join(base_dir, "14-celebrity-faces-dataset.zip")

with zipfile.ZipFile(celeb14, "r") as z:
    z.extractall('.')

In [None]:
data_dir = '/kaggle/working/14-celebrity-faces-dataset/data'

## Model parameters

In [None]:
img_width, img_height = 224, 224

train_data_dir = os.path.join(data_dir, 'train')
validation_data_dir = os.path.join(data_dir, 'val')
nb_train_samples = 220
nb_validation_samples = 70
epochs = 40
batch_size = 16
numclasses = 14

## Preprocessing

In [None]:
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rescale=1./ 255,
    rotation_range=10,  
    zoom_range=0.1,  
    width_shift_range=0.1,
    height_shift_range=0.1,
    vertical_flip=False,
    horizontal_flip=True)


test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

In [None]:
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

### choose one of the models and run its cells

## DenseNet model

In [None]:
"""densenet = tf.keras.applications.DenseNet121(
    weights='/kaggle/input/densenet-keras/DenseNet-BC-121-32-no-top.h5',
    include_top=False,
    input_shape=input_shape)"""
densenet = tf.keras.applications.DenseNet201(
    weights='imagenet',
    include_top=False,
    input_shape=input_shape)
densenet.trainable = True
def build_model():
    model = Sequential()
    model.add(densenet)
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.5))
    model.add(Dense(512, activation='relu')) #sigmoid
    model.add(Dropout(0.3))
    model.add(Dense(numclasses, activation='softmax'))
    
    model.compile(
        loss='categorical_crossentropy',
        optimizer=keras.optimizers.adam_v2.Adam(lr=1e-4), #1e-4
        metrics=['categorical_accuracy'])
    
    return model

model = build_model()
model.summary()

In [None]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

In [None]:
# Get training and test loss histories
training_loss = history.history['loss']
validation_loss = history.history['val_loss']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

fig=plt.figure(figsize=(12, 4))
# Visualize loss history
fig.add_subplot(121)
plt.plot(epoch_count, training_loss)
plt.plot(epoch_count, validation_loss)
plt.legend(['Training Loss', 'Validation Loss'])
plt.xlabel('Epoch')
plt.ylabel('Training Loss/Acc')

# Get training and test loss histories
val_acc = history.history['val_categorical_accuracy']
training_acc = history.history['categorical_accuracy']

# Create count of the number of epochs
epoch_count = range(1, len(val_acc) + 1)

# Visualize loss history
fig.add_subplot(122)
plt.plot(epoch_count, training_acc)
plt.plot(epoch_count, val_acc)
plt.legend(['Training Accuracy', 'Validation Accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()

In [None]:
saveweight = 'celebriytag_weight.h5'
model.save_weights(saveweight)

In [None]:
labels = ['anne_hathaway', 'arnold_schwarzenegger', 'ben_afflek', 'dwayne_johnson', 'elton_john', 'jerry_seinfeld', 'kate_beckinsale', 'keanu_reeves', 'lauren_cohan', 'madonna', 'mindy_kaling', 'simon_pegg', 'sofia_vergara', 'will_smith']
test_imgs = []
for celebrity in labels:
    tmp = (os.listdir(f'/kaggle/working/14-celebrity-faces-dataset/data/val/{celebrity}'))
    tmp2 = []
    for s in tmp:
        tmp2.append(f'{celebrity}/'+ s)
    
    test_imgs.extend(tmp2)

In [None]:
err_cnt = 0
count = 0
for test in test_imgs:
    test_img = os.path.join(validation_data_dir, test)
    img = image.load_img(test_img, target_size=(img_width, img_height))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x /= 255.
    classes = model.predict(x)
    result = np.squeeze(classes)
    result_indices = np.argmax(result)
    count+=1
    if labels[result_indices] != test.split("/")[0]:
        err_cnt += 1
#     print(labels[result_indices], test.split("/")[0])
#     print('-------------------------')
print(err_cnt, count)
print(f'Accuracy: {(count-err_cnt)/count}')

## ResnetModel

In [None]:
def resnet50tl(input_shape, outclass, sigma='sigmoid'):
    base_model = None
    base_model = keras.applications.resnet50.ResNet50(weights=None, include_top=False, input_shape=input_shape)
    base_model.load_weights(resnet50weight)
    
    top_model = Sequential()
#     top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
    top_model.add(GlobalAveragePooling2D(input_shape=base_model.output_shape[1:]))
    top_model.add(Dense(2048, activation='relu'))
    top_model.add(Dropout(0.5))
    
    top_model.add(Dense(1024, activation='relu'))
    top_model.add(Dropout(0.5)) 
    
    top_model.add(Dense(outclass, activation=sigma))

    model = None
    model = Model(inputs=base_model.input, outputs=top_model(base_model.output))
    
    return model


model = resnet50tl(input_shape, numclasses, 'softmax')
opt = keras.optimizers.Adam(lr=3e-5, decay=1e-7)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

In [None]:
# Get training and test loss histories
training_loss = history.history['loss']
validation_loss = history.history['val_loss']

# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)

fig=plt.figure(figsize=(12, 4))
# Visualize loss history
fig.add_subplot(121)
plt.plot(epoch_count, training_loss)
plt.plot(epoch_count, validation_loss)
plt.legend(['Training Loss', 'Validation Loss'])
plt.xlabel('Epoch')
plt.ylabel('Training Loss/Acc')

# Get training and test loss histories
val_acc = history.history['val_acc']
training_acc = history.history['acc']

# Create count of the number of epochs
epoch_count = range(1, len(val_acc) + 1)

# Visualize loss history
fig.add_subplot(122)
plt.plot(epoch_count, training_acc)
plt.plot(epoch_count, val_acc)
plt.legend(['Training Accuracy', 'Validation Accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()

In [None]:
saveweight = 'celebriytag_weight.h5'
model.save_weights(saveweight)

In [None]:
labels = ['anne_hathaway', 'arnold_schwarzenegger', 'ben_afflek', 'dwayne_johnson', 'elton_john', 'jerry_seinfeld', 'kate_beckinsale', 'keanu_reeves', 'lauren_cohan', 'madonna', 'mindy_kaling', 'simon_pegg', 'sofia_vergara', 'will_smith']
test_imgs = []
for celebrity in labels:
    tmp = (os.listdir(f'/kaggle/working/14-celebrity-faces-dataset/data/val/{celebrity}'))
    tmp2 = []
    for s in tmp:
        tmp2.append(f'{celebrity}/'+ s)
    
    test_imgs.extend(tmp2)

In [None]:
err_cnt = 0
count = 0
for test in test_imgs:
    test_img = os.path.join(validation_data_dir, test)
    img = image.load_img(test_img, target_size=(img_width, img_height))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x /= 255.
    classes = model.predict(x)
    result = np.squeeze(classes)
    result_indices = np.argmax(result)
    count+=1
    if labels[result_indices] != test.split("/")[0]:
        err_cnt += 1
#     print(labels[result_indices], test.split("/")[0])
#     print('-------------------------')
# print(err_cnt, count)
print(f'Accuracy: {(count-err_cnt)/count}')