In [None]:
import os
import cv2
import shutil
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Directory paths
indir = "D:/train_small/train_small"
outdir = "D:/crop"

os.makedirs(outdir)

face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Function that takes the first face (I tried to find a way to get the face with greatest confidence
# the dlib library would not work on my computer) This is just a way of avoiding multiple faces per image.
def first_face(imgloc, outdir):
    # Get the filename of the original image
    filename = os.path.basename(imgloc)
    # Output path for the cropped or copied image
    out_loc = os.path.join(outdir, filename)
    # Load each image
    try:
        img = cv2.imread(imgloc)
        # greyscale
        grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # Detect faces
        faces = face_cascade.detectMultiScale(grey, 1.1, 4)
        # If faces are detected, crop the image to the first detected face
        if len(faces) > 0:
            x, y, w, h = faces[0]
            crop = img[y:y+h, x:x+w]
            cv2.imwrite(out_loc, crop)
        else:
            # If no faces are detected, copy the entire image
            shutil.copyfile(imgloc, out_loc)
    except Exception as e:
        print(f"Error with {imgloc}: {str(e)}")
        # Move whole image to output directory if error reading jpg
        shutil.copyfile(imgloc, out_loc)
        print(f"Moved unaltered {out_loc}")

for filename in os.listdir(indir):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        imgloc = os.path.join(indir, filename)
        # Record first face detected
        first_face(imgloc, outdir)

In [None]:
## Submission 4 ##
####################### Unfreezing 15 layers #######################################
batch_size = 32

# Define dataset directory and CSV file
data_dir = "D:/cropped_faces_copy"
names_csv = "D:/train_small.csv"

# Create training and validation directories
tset = os.path.join(data_dir, "train")
vset = os.path.join(data_dir, "val")
os.makedirs(tset, exist_ok=True)
os.makedirs(vset, exist_ok=True)

# Load the CSV file
names = pd.read_csv(names_csv)

# Get unique categories (celebrities)
cat = names["Category"].unique()

# 80/20 training/validation split
for cel in cat:
    # Make training and validation folders for modeling
    os.makedirs(os.path.join(tset, cel), exist_ok=True)
    os.makedirs(os.path.join(vset, cel), exist_ok=True)
    
    # Get filenames for the current category
    filenames = names[names["Category"] == cel]["File Name"].tolist()
    
    np.random.shuffle(filenames)
    
    # Split training and validation
    np.random.shuffle(filenames)
    train_size = int(len(filenames) * 0.8)
    train_filenames = filenames[:train_size]
    val_filenames = filenames[train_size:]
    
    # Copy images to train directory
    for filename in train_filenames:
        src = os.path.join(data_dir, filename)
        dst = os.path.join(tset, cel, filename)
        shutil.copyfile(src, dst)
    
    # Copy images to validation directory
    for filename in val_filenames:
        src = os.path.join(data_dir, filename)
        dst = os.path.join(vset, cel, filename)
        shutil.copyfile(src, dst)

# Image dimensions
hei = 160 
wid = 160 

# Load the MobileNetV2 model with default weights (tmod is the trained model)
tmod = MobileNetV2(input_shape=(hei, wid, 3),
                            include_top=False,
                            weights='imagenet')

# Unfreeze specific layers (there are 53 in V2 and I unfreeze the last 15)
for layer in tmod.layers[-15:]:
    if not isinstance(layer, tf.keras.layers.BatchNormalization):
        layer.trainable = True

# Final Layer
fl = tmod.get_layer('out_relu')
fl_out = fl.output

# Architecture
x = tf.keras.layers.GlobalAveragePooling2D()(fl_out)
x = tf.keras.layers.Dropout(0.7)(x)
x = tf.keras.layers.Dense(len(cat), activation='softmax')(x) 
model = tf.keras.Model(tmod.input, x)

# Compile
model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])
model.summary()

# Data augmentation -- I could not get keras layers to work on my Jupyter notebook so I used ImageDataGenerator 
# (I believe ImageDataGenerator is deprecated, but it's in the only data augmentation tool that works on my Jupyter notebook)
taug = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    rotation_range=40,
    width_shift_range=0.1,
    height_shift_range=0.1
)

vaug = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

# Augmented data generator
t = taug.flow_from_directory(
    tset,
    target_size=(hei, wid),
    batch_size=batch_size,
    class_mode='categorical'
)

v = vaug.flow_from_directory(
    vset,
    target_size=(hei, wid),
    batch_size=batch_size,
    class_mode='categorical'
)

# Training model
result = model.fit(t, v, epochs=60, verbose=1)

In [None]:
#I cropped the test images in the same manner as the training images but I cannot find that block of code anymore.
# It was the same as the code used for the training data above with Cascade
from PIL import Image

wid = 160
hei = 160
image_dir = "D:/test/testcrop"
# Preprocessing
def preparation(image):
    # Make sure images are RGB
    im1 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Resize images
    im2 = Image.fromarray(im1)
    im3 = im2.resize((wid, hei), Image.ANTIALIAS)
    im4 = np.array(im3)
    # Put images on 0 to 1 scale
    resize_im = im4 / 255.0
    # Add batch dimension
    proc_im = np.expand_dims(resize_im, axis=0)
    return proc_im

pred_labels = []

# Iterate over images in the input directory
for filename in sorted(os.listdir(image_dir)):
    # Check if the file is a JPEG image
    if filename.lower().endswith(".jpg"):
        # Get the full path of the image
        imgloc = os.path.join(image_dir, filename)
        # Attempt to read the image
        try:
            image = cv2.imread(imgloc)
            if image is None:
                # If unable to read the image, try searching in the original directory
                imgloc = os.path.join("D:/test/test", filename)
                image = cv2.imread(imgloc)
                if image is None:
                    raise Exception("Bad image")
            # Predict
            prep = preparation(image)
            predictions = model.predict(prep)
            # Relabel
            pred_class = np.argmax(predictions[0])
            pred_labels.append(pred_class)
        except Exception as e:
            print(f"Error with {imgloc}: {str(e)}")
            pred_labels.append(None) 

# Store Pred
pred = pd.DataFrame({
    'Id': [os.path.splitext(filename)[0] for filename in sorted(os.listdir(image_dir))],  # Remove file extension
    'Category': pred_labels
})

# Save to submission csv
pred.to_csv('submission.csv', index=False)

In [3]:
# Convert Number to Celebrity Name
sub = pd.read_csv("submission.csv")

# Name Directory
train_path = "D:/cropped_faces_copy/train"
celebrity = sorted(os.listdir(train_path))

# Sort by jpg number
sub = sub.sort_values(by='Id')

# Replace the numeric category values with celebrity names
sub['Category'] = [celebrity[i] if i >= 0 else None for i in sub['Category']]

# Save to final submission csv
sub.to_csv("submission4.csv", index=False)