In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from openpyxl import Workbook
from mtcnn.mtcnn import MTCNN
from openpyxl.styles import Font
from openpyxl import load_workbook
from keras.utils import to_categorical
from sklearn.impute import SimpleImputer
from openpyxl.utils.dataframe import dataframe_to_rows

Renaming Each image to 4,5,6 so that we can make resized faces to 1,2,3

In [2]:
# Path to the main folder containing the subfolders
main_folder_path = 'E:\\University\\FYP Stuff\\Dataset\\Makeup Images new'

# Loop through each subfolder in the main folder
for subfolder_name in os.listdir(main_folder_path):
    subfolder_path = os.path.join(main_folder_path, subfolder_name)
    
    # Check if the subfolder is actually a directory
    if os.path.isdir(subfolder_path):
        # Get a list of image files in the subfolder
        image_files = [file for file in os.listdir(subfolder_path) if file.endswith(('.jpg', '.jpeg', '.png', '.gif'))]
        
        # Determine the number of images in the subfolder
        num_images = len(image_files)
        
        # Rename each image file in the subfolder
        for i, image_file in enumerate(image_files, start=4):
            # If there are only 2 images, rename each image to 4, 5 respectively
            if num_images == 2 and i > 5:
                break
            # Otherwise, rename each image to 4, 5, 6 respectively
            new_name = str(i) + os.path.splitext(image_file)[1]
            old_path = os.path.join(subfolder_path, image_file)
            new_path = os.path.join(subfolder_path, new_name)
            os.rename(old_path, new_path)

Save face into folder and rename to 1,2,3

In [3]:
def detect_faces_in_folder(main_directory):
    # Loop through each subfolder in the main directory
    for subfolder_name in os.listdir(main_directory):
        subfolder_path = os.path.join(main_directory, subfolder_name)
        
        # Check if the subfolder is actually a directory
        if os.path.isdir(subfolder_path):
            # All files in the subfolder
            files = os.listdir(subfolder_path)
            
            # Initialize counter for face image naming
            face_counter = 1
            
            # Create MTCNN detector
            detector = MTCNN()
            
            # Loop through each image file in the subfolder
            for image_file in files:
                image_path = os.path.join(subfolder_path, image_file)

                # Reading the image
                image = cv2.imread(image_path)
                if image is None:
                    print(f"Unable to read {image_path}. Skipping...")
                    continue
                
                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Detect faces in the image
                faces = detector.detect_faces(image_rgb)

                # Loop through each detected face
                for idx, face in enumerate(faces):
                    bounding_box = face['box']
                    x, y, w, h = bounding_box

                    min_face_size = 45
                    if w > min_face_size and h > min_face_size:
                        extracted_face = image[y:y + h, x:x + w]

                        # Construct the path to store the extracted face image
                        extracted_face_filename = f'{str(face_counter)}.jpg'
                        extracted_face_path = os.path.join(subfolder_path, extracted_face_filename)

                        # Write the extracted face image to the same subfolder
                        cv2.imwrite(extracted_face_path, extracted_face)
                        print(f"Face Extraction Completed for {image_file}!")
                        
                        # Increment face counter
                        face_counter += 1
                    else:
                        print(f"Face in {image_file} is too small and will be skipped.")

# Main folder containing subfolders
main_directory = 'E:\\University\\FYP Stuff\\Dataset\\Makeup Images new'
detect_faces_in_folder(main_directory)

Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Unable to read E:\University\FYP Stuff\Dataset\Makeup Images new\sabrinajetli​\4.jpg. Skipping...
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!
Face Extraction Completed for 4.jpg!


Find out Missing Folders from Xlsx File

In [4]:
def find_missing_folders(main_folder, df):
    # Get unique folder names from the DataFrame
    df_folders = set(df['channelName'].tolist())

    # List of missing folders
    missing_folders = []

    # Iterate through each subfolder in the main folder
    for subfolder_name in os.listdir(main_folder):
        subfolder_path = os.path.join(main_folder, subfolder_name)

        # Check if the subfolder is a directory
        if os.path.isdir(subfolder_path):
            # Check if the subfolder name is not in the DataFrame
            if subfolder_name not in df_folders:
                missing_folders.append(subfolder_name)

    return missing_folders

# Example usage:
main_folder = 'E:\\University\\FYP Stuff\\Dataset\\Makeup Images'
# Assuming 'df' is your DataFrame loaded from Excel
# Load Excel file into DataFrame
df = pd.read_excel('E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_dec.xlsx')

# Find missing folders
missing_folders = find_missing_folders(main_folder, df)
print("Missing folders:", missing_folders)


Missing folders: []


Now we apply Model to FIRST face and store it into Xlsx File (NO LIGHTING CONDITIONS)

In [4]:
# All needed Functions

# Function to load the model
def load_model():
    model = tf.keras.models.load_model('E:\\University\\FYP Stuff\\Models For SkinTone Recognition\\Skintone_Recognition_lit.h5')
    return model

# Function to preprocess the image
def preprocess_image(image):
    # Resize, normalize, and reshape the image
    image = image.resize((224, 224))  
    image = np.array(image) / 255.0  
    image = np.expand_dims(image, axis=0)  
    return image

# Function to predict skintone using the model
def predict_skintone(model, image):

    #Predictions
    prediction = model.predict(image)
    return prediction

# Function to process images in subfolders and update Excel file
def process_images_and_update_excel(model, main_folder, excel_file):
    # Load Excel file into DataFrame
    df = pd.read_excel(excel_file)

    # Iterate through each subfolder in the main folder
    for subfolder_name in os.listdir(main_folder):
        subfolder_path = os.path.join(main_folder, subfolder_name)
        
        # Check if the subfolder is actually a directory
        if os.path.isdir(subfolder_path):
            # Look for image files named "1" with different extensions
            for ext in ['png', 'jpeg', 'jpg']:
                image_path = os.path.join(subfolder_path, f"1.{ext}")
                # Check if the image exists
                if os.path.exists(image_path):
                    # Load and preprocess the image
                    image = Image.open(image_path)
                    image = preprocess_image(image)

                    # Predict skintone using the model
                    prediction = predict_skintone(model, image)

                    skintone = np.argmax(prediction) + 1
                    
                    # Find the corresponding row in the DataFrame using the folder name
                    row_index = df.index[df['channelName'] == subfolder_name].tolist()[0]
                    
                    # Store the skintone prediction in the "Skintone" column of the corresponding row
                    df.loc[row_index, 'skinTone'] = skintone
                    # Break the loop after finding the first image
                    break

    # Code to preserve Web link
    # Create a new Workbook object and select active worksheet
    wb = Workbook()
    ws = wb.active

    # Convert DataFrame to rows and write to Excel worksheet
    for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True)):
        for c_idx, value in enumerate(row, 1):
            # Preserve hyperlink format for "productLink" column
            if df.columns[c_idx - 1] == "productLink":
                cell = ws.cell(row=r_idx + 1, column=c_idx, value=value)
                cell.style = "Hyperlink"
                cell.font = Font(underline="single", color="0563C1")
            else:
                ws.cell(row=r_idx + 1, column=c_idx, value=value)

    # Location and name of new file to be saved
    new_file_path = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_MST.xlsx'
    wb.save(new_file_path)

Now we apply Model to FIRST face and store it into Xlsx File (LIGHTING CONDITIONS)

In [3]:
# All needed Functions

# Function to load the model
def load_model():
    model = tf.keras.models.load_model('E:\\University\\FYP Stuff\\Models For SkinTone Recognition\\Skintone_Recognition_Multimodal1.h5')
    return model

# Function to preprocess the image
def preprocess_image(image):
    # Resize, normalize, and reshape the image
    image = image.resize((224, 224))  
    image = np.array(image) / 255.0  
    image = np.expand_dims(image, axis=0)  
    return image

# Function to predict skintone using the model
def predict_skintone(model, image, lighting_text, classes):

    # Convert lighting text to one-hot encoding
    lighting_encoded = classes.index(lighting_text)
    lighting_onehot = to_categorical(lighting_encoded, num_classes=3)

    #Predictions
    prediction = model.predict({'image_input': image, 
                                 'text_input': np.expand_dims(lighting_onehot, axis=0)})
    return prediction

# Function to process images in subfolders and update Excel file
def process_images_and_update_excel(model, main_folder, excel_file):
    # Load Excel file into DataFrame
    df = pd.read_excel(excel_file)

    # Iterate through each subfolder in the main folder
    for subfolder_name in os.listdir(main_folder):
        subfolder_path = os.path.join(main_folder, subfolder_name)
        
        # Check if the subfolder is actually a directory
        if os.path.isdir(subfolder_path):
            # Look for image files named "1" with different extensions
            for ext in ['png', 'jpeg', 'jpg']:
                image_path = os.path.join(subfolder_path, f"1.{ext}")
                # Check if the image exists
                if os.path.exists(image_path):
                    # Load and preprocess the image
                    image = Image.open(image_path)
                    image = preprocess_image(image)

                    # Classes typed manually 
                    classes = ['poorly', 'video', 'well']
                    
                    # LIGHTING TEXT
                    lighting_text = 'well'

                    # Predict skintone using the model
                    prediction = predict_skintone(model, image, lighting_text, classes)

                    skintone = np.argmax(prediction) + 1
                    
                    # Find the corresponding row in the DataFrame using the folder name
                    row_index = df.index[df['channelName'] == subfolder_name].tolist()[0]
                    
                    # Store the skintone prediction in the "Skintone" column of the corresponding row
                    df.loc[row_index, 'skinTone'] = skintone
                    # Break the loop after finding the first image
                    break

    # Code to preserve Web link
    # Create a new Workbook object and select active worksheet
    wb = Workbook()
    ws = wb.active

    # Convert DataFrame to rows and write to Excel worksheet
    for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True)):
        for c_idx, value in enumerate(row, 1):
            # Preserve hyperlink format for "productLink" column
            if df.columns[c_idx - 1] == "productLink":
                cell = ws.cell(row=r_idx + 1, column=c_idx, value=value)
                cell.style = "Hyperlink"
                cell.font = Font(underline="single", color="0563C1")
            else:
                ws.cell(row=r_idx + 1, column=c_idx, value=value)

    # Location and name of new file to be saved
    new_file_path = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_MST.xlsx'
    wb.save(new_file_path)

Executing using functions above

In [4]:
# Path to the main folder containing subfolders
main_folder = 'E:\\University\\FYP Stuff\\Dataset\\Makeup Images'
# Path to the Excel file
excel_file = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_dec.xlsx'

# Load the model
model = load_model()

# Process images in subfolders and update Excel file
process_images_and_update_excel(model, main_folder, excel_file)



Repeat for SECOND FACE (LIGHTING CONDITIONS)

In [6]:
# Function to process images in subfolders and update Excel file
def process_images_and_update_excel(model, main_folder, excel_file):
    # Load Excel file into DataFrame
    df = pd.read_excel(excel_file)

    # Iterate through each subfolder in the main folder
    for subfolder_name in os.listdir(main_folder):
        subfolder_path = os.path.join(main_folder, subfolder_name)
        
        # Check if the subfolder is actually a directory
        if os.path.isdir(subfolder_path):
            # Look for image files named "1" with different extensions
            for ext in ['png', 'jpeg', 'jpg']:
                image_path = os.path.join(subfolder_path, f"2.{ext}")
                # Check if the image exists
                if os.path.exists(image_path):
                    # Load and preprocess the image
                    image = Image.open(image_path)
                    image = preprocess_image(image)

                    # Classes typed manually 
                    classes = ['poorly', 'video', 'well']
                    
                    # LIGHTING TEXT
                    lighting_text = 'well'

                    # Predict skintone using the model
                    prediction = predict_skintone(model, image, lighting_text, classes)

                    skintone2 = np.argmax(prediction) + 1
                    
                    # Find the corresponding row in the DataFrame using the folder name
                    row_index = df.index[df['channelName'] == subfolder_name].tolist()[0]
                    
                    # Store the skintone prediction in the "Skintone" column of the corresponding row
                    df.loc[row_index, 'skinTone2'] = skintone2
                    # Break the loop after finding the first image
                    break

    # Code to preserve Web link
    # Create a new Workbook object and select active worksheet
    wb = Workbook()
    ws = wb.active

    # Convert DataFrame to rows and write to Excel worksheet
    for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True)):
        for c_idx, value in enumerate(row, 1):
            # Preserve hyperlink format for "productLink" column
            if df.columns[c_idx - 1] == "productLink":
                cell = ws.cell(row=r_idx + 1, column=c_idx, value=value)
                cell.style = "Hyperlink"
                cell.font = Font(underline="single", color="0563C1")
            else:
                ws.cell(row=r_idx + 1, column=c_idx, value=value)

    # Location and name of new file to be saved
    new_file_path = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_MST.xlsx'
    wb.save(new_file_path)

# Path to the main folder containing subfolders
main_folder = 'E:\\University\\FYP Stuff\\Dataset\\Makeup Images'
# Path to the Excel file
excel_file = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_MST.xlsx'

# Load the model
model = load_model()

# Process images in subfolders and update Excel file
process_images_and_update_excel(model, main_folder, excel_file)



Repeat for SECOND FACE (NO LIGHTING CONDITIONS)

In [5]:
# Function to process images in subfolders and update Excel file
def process_images_and_update_excel(model, main_folder, excel_file):
    # Load Excel file into DataFrame
    df = pd.read_excel(excel_file)

    # Iterate through each subfolder in the main folder
    for subfolder_name in os.listdir(main_folder):
        subfolder_path = os.path.join(main_folder, subfolder_name)
        
        # Check if the subfolder is actually a directory
        if os.path.isdir(subfolder_path):
            # Look for image files named "1" with different extensions
            for ext in ['png', 'jpeg', 'jpg']:
                image_path = os.path.join(subfolder_path, f"2.{ext}")
                # Check if the image exists
                if os.path.exists(image_path):
                    # Load and preprocess the image
                    image = Image.open(image_path)
                    image = preprocess_image(image)

                    # Predict skintone using the model
                    prediction = predict_skintone(model, image)

                    skintone2 = np.argmax(prediction) + 1
                    
                    # Find the corresponding row in the DataFrame using the folder name
                    row_index = df.index[df['channelName'] == subfolder_name].tolist()[0]
                    
                    # Store the skintone prediction in the "Skintone" column of the corresponding row
                    df.loc[row_index, 'skinTone2'] = skintone2
                    # Break the loop after finding the first image
                    break

    # Code to preserve Web link
    # Create a new Workbook object and select active worksheet
    wb = Workbook()
    ws = wb.active

    # Convert DataFrame to rows and write to Excel worksheet
    for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True)):
        for c_idx, value in enumerate(row, 1):
            # Preserve hyperlink format for "productLink" column
            if df.columns[c_idx - 1] == "productLink":
                cell = ws.cell(row=r_idx + 1, column=c_idx, value=value)
                cell.style = "Hyperlink"
                cell.font = Font(underline="single", color="0563C1")
            else:
                ws.cell(row=r_idx + 1, column=c_idx, value=value)

    # Location and name of new file to be saved
    new_file_path = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_MST.xlsx'
    wb.save(new_file_path)

# Path to the main folder containing subfolders
main_folder = 'E:\\University\\FYP Stuff\\Dataset\\Makeup Images'
# Path to the Excel file
excel_file = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_MST.xlsx'

# Load the model
model = load_model()

# Process images in subfolders and update Excel file
process_images_and_update_excel(model, main_folder, excel_file)

TypeError: predict_skintone() missing 2 required positional arguments: 'lighting_text' and 'classes'

Finding Average of 2 Skintones

In [8]:
def calculate_average_and_add_column(input_file_path, output_file_path):
    # Load the existing Excel file
    wb = load_workbook(input_file_path)
    
    # Iterate through each sheet
    for sheet_name in wb.sheetnames:
        # Read the sheet into DataFrame
        df = pd.read_excel(input_file_path, sheet_name=sheet_name)
        
        # Calculate the average of the two columns and store it in a new column named "SkinTone"
        if 'skinTone' in df.columns and 'skinTone2' in df.columns:
            df['SkinTone'] = df[['skinTone', 'skinTone2']].astype(float).mean(axis=1)
        
        # Write the updated DataFrame back to the sheet
        with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:
            writer.book = wb
            df.to_excel(writer, sheet_name=sheet_name, index=False)

# Example usage:
input_file_path = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_MST.xlsx'  
output_file_path = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_Final.xlsx' 
calculate_average_and_add_column(input_file_path, output_file_path)


  writer.book = wb


Filling Missing Information

In [8]:
def calculate_average_and_add_column(input_file_path, output_file_path):
    # Load the existing Excel file
    wb = load_workbook(input_file_path)
    
    # Iterate through each sheet
    for sheet_name in wb.sheetnames:
        # Read the sheet into DataFrame
        df = pd.read_excel(input_file_path, sheet_name=sheet_name)
        
        # Initialize the SimpleImputer with mean strategy
        imputer = SimpleImputer(strategy='mean')

        # Handle missing values for the 'SkinTone' column
        df['SkinTone'] = imputer.fit_transform(df[['SkinTone']])
        
        # Write the updated DataFrame back to the sheet
        with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:
            writer.book = wb
            df.to_excel(writer, sheet_name=sheet_name, index=False)

# Example usage:
input_file_path = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_Final.xlsx'  
output_file_path = 'E:\\University\\FYP Stuff\\Dataset\\Youtube_Dataset_Final.xlsx' 
calculate_average_and_add_column(input_file_path, output_file_path)

  writer.book = wb
