In [None]:
# Prepare the directory for a single transformation
import os
import random
import shutil

#place all images from the subdirectories in in ffhq/images - images downloaded from https://github.com/NVlabs/ffhq-dataset
source_dir = "ffhq" 
dest_dir = os.path.join(source_dir, "images")  # The new directory for copied images
    # Create the destination directory if it doesn't exist
os.makedirs(dest_dir, exist_ok=True)
# Iterate through all subdirectories in the source directory
for sub_dir in sorted(os.listdir(source_dir)):
    sub_dir_path = os.path.join(source_dir, sub_dir)
    if os.path.isdir(sub_dir_path)  and sub_dir != "images":
        # Iterate through all files in the subdirectory
        for file_name in os.listdir(sub_dir_path):
            file_path = os.path.join(sub_dir_path, file_name)
            if os.path.isfile(file_path):  # Check if it's a file
                # Copy the file to the destination directory
                shutil.copy(file_path, dest_dir)

# Define the base directories
base_dir = 'ffhq'
images_dir = os.path.join(base_dir, 'images')
output_dir = os.path.join(base_dir, 'reduced/images')
# Create new directory structure
train_dir = os.path.join(output_dir, 'train')
test_dir = os.path.join(output_dir, 'test')
val_dir = os.path.join(output_dir, 'val')

# Create the directories if they do not exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Get all image files
image_files = [f for f in os.listdir(images_dir) if f.endswith('.png')]

#Behavior of the Dataset and Loader ==> Preprocessing (preprocess Method): The preprocess method divides the dataset into two parts:
# The first 2000 entries go into self.test_dataset. The rest go into self.train_dataset.
# This division is fixed and deterministic because the seed for random.shuffle is set using random.seed(1234).
num_test_images = 2000 #int(len(image_files) * 0.10)
test_images = random.sample(image_files, num_test_images)

# Move the images to the test folder
for image in test_images:
    shutil.copy(os.path.join(images_dir, image), os.path.join(test_dir, image))

In [None]:
#Prepare list_attr_celeba.txt with labels (all 1) for single transformation
import os
import pandas as pd

# Define the path to the file
file_path = "celeba_orig/list_attr_celeba.txt"  # Replace with the actual file path

# Read the file starting from the second line, and add 'image_id' as the first column
column_names = ['image_id'] + [
    "5_o_Clock_Shadow", "Arched_Eyebrows", "Attractive", "Bags_Under_Eyes", "Bald", "Bangs",
    "Big_Lips", "Big_Nose", "Black_Hair", "Blond_Hair", "Blurry", "Brown_Hair", "Bushy_Eyebrows",
    "Chubby", "Double_Chin", "Eyeglasses", "Goatee", "Gray_Hair", "Heavy_Makeup", "High_Cheekbones",
    "Male", "Mouth_Slightly_Open", "Mustache", "Narrow_Eyes", "No_Beard", "Oval_Face", "Pale_Skin",
    "Pointy_Nose", "Receding_Hairline", "Rosy_Cheeks", "Sideburns", "Smiling", "Straight_Hair",
    "Wavy_Hair", "Wearing_Earrings", "Wearing_Hat", "Wearing_Lipstick", "Wearing_Necklace",
    "Wearing_Necktie", "Young"
]

# Read the file, skipping the first line and adding column names
df = pd.read_csv(file_path, sep=r'\s+', header=None, skiprows=2, names=column_names)

# Define file paths
reduced_images_dir = "ffhq/reduced/images/test"
reduced_attr_file = "ffhq/reduced/images/list_attr_celeba.txt"

# Get the list of image filenames in the reduced directory (convert to sorted list)
reduced_images = sorted(os.listdir(reduced_images_dir))  # Sort the filenames to maintain order

# Limit df to the number of reduced_images (if there are more rows in df than image files)
df_filtered = df.head(len(reduced_images))

# Ensure that df_filtered has the same length as reduced_images
if len(df_filtered) == len(reduced_images):
    # Assign the filenames (as sorted list) to the 'image_id' column
    df_filtered['image_id'] = reduced_images
    # Set all attribute values (columns 1 and onward) to 1
    df_filtered.iloc[:, 1:] = 1
else:
    print(f"Warning: Number of image files in reduced_images ({len(reduced_images)}) does not match the number of rows in df_filtered ({len(df_filtered)}).")

In [None]:
# Step 3: run mobilenet for generation of hair, gender and young labels
import tensorflow as tf
import numpy as np
import pandas as pd
import os
from tensorflow.keras.preprocessing import image

# Load the saved model
model = tf.keras.models.load_model('./tf-gpu/my_model_finetuned.h5')

# Define a function to preprocess the input image
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(128, 128))  # Resize the image
    img_array = image.img_to_array(img)  # Convert the image to a numpy array
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array = img_array / 255.0  # Normalize to [0, 1]
    return img_array

# Define categories
categories = ['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young']

# Directory containing images
image_directory = './ffhq/reduced/images/test'  # Replace with the path to your image directory

# Loop over each image in the directory and make predictions
for img_filename in os.listdir(image_directory):
    if img_filename.endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(image_directory, img_filename)
        
        # Preprocess the image
        img = preprocess_image(img_path)
        
        # Get predictions from the model
        predictions = model.predict(img)  # This will be a 2D array (1, 5), corresponding to the 5 categories
        
        # Translate predictions to dataframe values
        male_pred = predictions[0][3]  # Prediction for 'Male'
        young_pred = predictions[0][4]  # Prediction for 'Young'
        
        # Determine Male and Young values based on threshold
        male_value = 1 if male_pred > 0.5 else -1
        young_value = 1 if young_pred > 0.5 else -1
        
        # Find the index of the highest hair color prediction
        hair_predictions = predictions[0][:3]  # First three predictions correspond to hair colors (Black, Blond, Brown)
        max_hair_index = np.argmax(hair_predictions)  # Get the index of the highest prediction
        hair_values = [-1, -1, -1]  # Initialize all hair color values to -1
        hair_values[max_hair_index] = 1  # Set the highest hair color prediction to 1
                
        # Get the corresponding image_id from df (assuming the image_id matches the img_filename)
        image_id = df_filtered.loc[df_filtered['image_id'] == img_filename, 'image_id'].values

        if len(image_id) > 0:
            # Update df_filtered with the predictions
            df_filtered.loc[df_filtered['image_id'] == img_filename, ['Young', 'Male', 'Black_Hair', 'Blond_Hair', 'Brown_Hair']] = [
                young_value, male_value, hair_values[0], hair_values[1], hair_values[2]
            ]
        else:
            print(f"Warning: Image filename {img_filename} not found in df['image_id']!")

In [None]:
# Step 4: Open the file in write mode
with open(reduced_attr_file, 'w') as f:
    # Write the number of rows in df_filtered
    f.write(f"{len(df_filtered)}\n")
    
    # Write the column names excluding 'image_id' (join the rest with spaces)
    f.write(" ".join(df_filtered.columns[1:]) + "\n")
    
    # Write the data in df_filtered (row by row) to avoid extra new lines
    for index, row in df_filtered.iterrows():
        # Write each row to the file, with space-separated values
        f.write(" ".join(row.astype(str).values[0:]) + "\n")