# Face Classification Project - Group 8

In [1]:
#Face Detection
"""
Mounting the Google drive
"""
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Need to install deepface if not already installed in ur notebook
pip install deepface

##Imports

In [4]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from copy import deepcopy

from skimage import exposure

from deepface import DeepFace

from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

# modules for augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img

# modules for cleaning
import shutil

24-03-18 01:17:50 - Directory /root/.deepface created
24-03-18 01:17:50 - Directory /root/.deepface/weights created


## Image Augmentation

In [7]:
root_dir = "/content/drive/MyDrive/Colab Notebooks/COEN240_TA/data"
data_dir = root_dir + "/train"                # Directory with original images -- Need to change this according to your folder path
label_file = data_dir + "/file_mapping.txt"  # File with label mappings
output_dir = root_dir + "/Augmented_images"  # Directory to save augmented images -- Keep as is

# Load label mappings from a space-separated text file
label_mapping = {}
with open(label_file, 'r') as file:
    for line in file:
        filename, label = line.strip().split(' ')
        label_mapping[filename] = label

# Initialize the ImageDataGenerator with desired augmentations
datagen = ImageDataGenerator(
    rotation_range=5,
    rescale=1./255,
    zoom_range=0.2,
    brightness_range = (0.5,1.0),
    horizontal_flip=True,
    fill_mode='nearest'
)

# Function to save augmented images in label-specific folders
def save_augmented_images(directory, output_directory, label_mapping, num_augmented_images=5):
    for filename in os.listdir(directory):
        if filename in label_mapping:  # Check if the file has a mapping
            label = label_mapping[filename]  # Get the label for the current file
            label_dir = os.path.join(output_directory, label)  # Define label-specific directory path

            if not os.path.exists(label_dir):
                os.makedirs(label_dir)  # Create the directory if it doesn't exist

            file_path = os.path.join(directory, filename)
            image = load_img(file_path)
            image_array = img_to_array(image)
            image_array = image_array.reshape((1,) + image_array.shape)

            # Generate and save augmented images
            i = 0
            # Note: save_prefix is set to filename without extension to keep track of original image
            save_prefix = os.path.splitext(filename)[0]
            for batch in datagen.flow(image_array, batch_size=1, save_to_dir=label_dir, save_prefix=save_prefix, save_format='jpeg'):
                i += 1
                if i >= num_augmented_images:
                    break  # Limit the number of augmented images generated per original image



In [8]:
# Call the function to start the process
save_augmented_images(data_dir, output_dir, label_mapping)

#DataCleaning

In [11]:
# clean the data
def clean_data(path):
    shutil.rmtree(output_dir + "/wufangyuan")
    folders = os.listdir(path)
    for name in folders:
        folder_path = os.path.join(path, name)
        img_path = os.listdir(folder_path)
        for img in img_path:
            i_p = os.path.join(folder_path, img)
            image = cv2.imread(i_p)
            try:
                DeepFace.extract_faces(image)

            except Exception as e:
                print(i_p)
                os.remove(i_p)
                continue
    return

In [None]:
path = output_dir
print("Filtering Data")
clean_data(path)

In [13]:
# Face extractor

def detect_faces(img: np.ndarray) -> np.ndarray:
    try:
        img = DeepFace.extract_faces(img)[0]['face']
        return img
    except ValueError:
        return np.empty(shape = (0,))

# Load images from folders

In [82]:
def load_images_from_folder(folder: str, file_map: map) -> list:
    print("loading images from folder")

    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path):
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
                labels.append(file_map[filename])

    return images, labels

def preprocess_images(images: list, face_detection: bool) -> list:
    processed_images = []
    print("pre - processing images")
    wrongCrop = 0

    for i,img in enumerate(images):
        backup = deepcopy(img)
        # img = cv2.resize(img, (200, 200))

        if face_detection == True:
            img = detect_faces(img)
            if img.size == 0:  # Check if the image is empty after face cropping
                wrongCrop += 1
                img = backup


        img = cv2.resize(img, (128, 128))

        processed_images.append(img)

    # print(f"wrong crop = {wrongCrop}")
    return processed_images

def load_labels_from_file(mapping_file):
    label_map = {}
    with open(mapping_file, 'r') as file:
        for line in file:
            filename, label = line.strip().split(' ')
            label_map[filename] = label
    return label_map

In [83]:
def load_keras_images(path):
    folders = os.listdir(path)
    labels = []
    images = []
    for name in folders:
        folder_path = os.path.join(path,name)
        img_path = os.listdir(folder_path)
        # print(img_path)
        for img in img_path:
            image = cv2.imread(os.path.join(folder_path, img))
            images.append(image)
            labels.append(name)
    return images, labels

In [81]:
def load_images(path:str, Face_detect = False, keras = False) -> np.array:
    # Load images from a folder
    images = []
    if not keras:

        # Load labels from the mapping file
        mapping_file_path = os.path.join(path,"file_mapping.txt")
        label_map = load_labels_from_file(mapping_file_path)
        images, labels = load_images_from_folder(path, label_map)


    else:
        images, labels = load_keras_images(path)

    # Preprocess the images
    preprocessed_images = preprocess_images(images, Face_detect)

    # Convert preprocessed images to numpy array
    data = np.array(preprocessed_images)

    # Reshape data to flatten
    data = data.reshape(len(data),-1)


    return data, labels

# Load Data

In [84]:
Face_detect = True

root_path = "/content/drive/MyDrive/Colab Notebooks/COEN240_TA/data"            # Adjust this according to your path

train_path = root_path + "/Augmented_images"                                    # Keep as is, because this is wher the augmented images will be
grade_path = root_path + "/grade"                                               # change as needed



data, labels = load_images(train_path,Face_detect,keras=True)

grade_data, grade_labels = load_images(grade_path,Face_detect)

pre - processing images
loading images from folder
pre - processing images


# ML model

In [85]:
pip = Pipeline([
      ('scaler', StandardScaler()),
      ('pca', PCA(n_components = 390)),
      ('lda', LinearDiscriminantAnalysis(n_components=31)),
      ('svm', SVC(C = 5))
     ])

pip.fit(data, labels)
y_pred = pip.predict(grade_data)
score = pip.score(grade_data, grade_labels)
print(score)

0.8838709677419355


# Creating output csv

In [119]:
f_path = grade_path + "/file_mapping.txt"
f_map = load_labels_from_file(f_path)
folder = grade_path
df = pd.DataFrame(columns=['filename', 'ground_truth'])
for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path) and cv2.imread(img_path) is not None:
          df = df.append({'filename': filename, 'ground_truth': f_map.get(filename, None)}, ignore_index=True)
out = pd.DataFrame(y_pred, columns = ['predicted'])
merged = pd.concat([df, out], axis = 1)

In [122]:
merged.to_csv(grade_path + "/solution.csv")

In [133]:

# Assuming the CSV file has columns 'sample_id' and 'ground_truth'
# made a slight change
# predictions is not passed as im not creating a mapping of file name to predictions
# So im reading this from the datafram, where it was already mapped and written
# thus pass NONE to predictions

def calculate_accuracy(ground_truths, predictions):
    if len(ground_truths) != len(predictions):
        raise ValueError("The number of predictions does not match the number of ground truths.")
    correct_predictions = 0
    for sample_id, ground_truth in ground_truths.items():
        if predictions.get(sample_id) == ground_truth:
            correct_predictions += 1
    return correct_predictions / len(ground_truths)

def grade_predictions(predictions = None, grade_path=grade_path):
    df = pd.read_csv(grade_path + "/solution.csv")
    predictions = dict(zip(df['filename'], df['predicted']))
    ground_truths = dict(zip(df['filename'], df['ground_truth']))
    accuracy = calculate_accuracy(ground_truths, predictions)
    return accuracy

# Example usage
# predictions = {'id1': 'A', 'id2': 'B', 'id3': 'C'}  # Your predictions
# accuracy = grade_predictions(predictions)
# print(f"Prediction Accuracy: {accuracy * 100}%")

In [134]:
# pass none to predictions because they are read in the grade_predictions function itself
print(grade_predictions(None, grade_path))

0.8838709677419355
