In [None]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/COGS181-FinalProject/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/COGS181-FinalProject


In [None]:
import os
import random
import shutil
import kagglehub
import zipfile
from concurrent.futures import ThreadPoolExecutor
from PIL import Image

## Loading / Preparing Human Faces (FFHQ) & AI Faces (thispersondoesnotexist) Datasets

In [None]:
# Real Human Faces Dataset (FFHQ)

### Randomly select 10,000 images for the sake of training, from the total 70,000 images in the FFHQ dataset.

ffhq_path = "/content/drive/My Drive/COGS181-FinalProject/FFHQ_Images"
real_faces_path = "/content/drive/My Drive/COGS181-FinalProject/Real_Faces"

image_paths = []
for root, _, files in os.walk(ffhq_path):
    for file in files:
        if file.lower().endswith(('.png')):
            image_paths.append(os.path.join(root, file))

selected_images = random.sample(image_paths, 10000) # Randomly Selecting 10,000 Images

### Save the randomly selected images to a new folder as our training data (Real_Faces).

def copy_image(img_path):
    filename = os.path.basename(img_path)
    destination_path = os.path.join(real_faces_path, filename)
    shutil.copy2(img_path, destination_path)

with ThreadPoolExecutor(max_workers=10) as executor:
    executor.map(copy_image, selected_images)

print("Selected training images have been saved as Real_Faces.")

In [None]:
# AI-Generated Faces Dataset (ThisPersonDoesNotExist) - Obtained from: https://www.kaggle.com/datasets/almightyj/person-face-dataset-thispersondoesnotexist

# Load Dataset

kaggle_path = kagglehub.dataset_download("almightyj/person-face-dataset-thispersondoesnotexist")
ai_faces_path = "/content/drive/My Drive/COGS181-FinalProject/AI_Faces"

if kaggle_path.endswith(".zip"):
    extract_path = "/content/AI_Faces_Extracted"
    os.makedirs(extract_path, exist_ok=True)

    with zipfile.ZipFile(kaggle_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

    extracted_files = [os.path.join(extract_path, file) for file in os.listdir(extract_path)]

    def move_image(file_path):
        shutil.move(file_path, ai_faces_path)

    with ThreadPoolExecutor(max_workers=10) as executor:
        executor.map(move_image, extracted_files)
else:
    shutil.move(kaggle_path, ai_faces_path)

print("Selected training images have been saved as AI_Faces.")

Downloading from https://www.kaggle.com/api/v1/datasets/download/almightyj/person-face-dataset-thispersondoesnotexist?dataset_version_number=1...


100%|██████████| 4.45G/4.45G [00:31<00:00, 154MB/s]

Extracting files...





Selected training images have been saved as AI_Faces.


In [None]:
# Converting AI_Faces Images to .png Format

ai_faces_path = "/content/drive/My Drive/COGS181-FinalProject/AI_Faces"

image_paths = [os.path.join(ai_faces_path, f) for f in os.listdir(ai_faces_path) if f.lower().endswith(('.jpg', '.jpeg'))]

def convert_and_delete(img_path):
    filename = os.path.splitext(os.path.basename(img_path))[0] + ".png"
    destination_path = os.path.join(ai_faces_path, filename)

    with Image.open(img_path) as img:
        img = img.convert("RGB")
        img.save(destination_path, "PNG")

    os.remove(img_path)

with ThreadPoolExecutor(max_workers=10) as executor:
    executor.map(convert_and_delete, image_paths)

print("All .jpg images have been converted to .png")

All .jpg images have been converted to .png.
