In [2]:
import os
import random
import shutil
import  cv2
from mtcnn.mtcnn import MTCNN




In [5]:
import os
import random
import shutil

# Function to preprocess and crop images
def preprocess_and_crop(image_paths, output_dir):
    detector = MTCNN()
    cropped_images = []
    for img_path in image_paths:
        img = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        faces = detector.detect_faces(img_rgb)
        for i, result in enumerate(faces):
            x, y, w, h = result['box']
            # Extract the face region
            face_img = img_rgb[y:y+h, x:x+w]
            cropped_images.append((face_img, os.path.basename(img_path), result['confidence']))
    return cropped_images

# Function to split dataset into train, val, and test sets
def split_dataset(dataset_dir, output_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    # Create output directories
    train_dir = os.path.join(output_dir, 'train')
    val_dir = os.path.join(output_dir, 'val')
    test_dir = os.path.join(output_dir, 'test')
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Iterate over class directories
    for class_name in os.listdir(dataset_dir):
        class_dir = os.path.join(dataset_dir, class_name)
        if os.path.isdir(class_dir):
            # Collect image paths for the current class
            image_paths = [os.path.join(class_dir, img_name) for img_name in os.listdir(class_dir)]

            # Shuffle dataset
            random.shuffle(image_paths)

            # Calculate split sizes
            total_samples = len(image_paths)
            num_train = int(train_ratio * total_samples)
            num_val = int(val_ratio * total_samples)

            # Preprocess and crop images
            cropped_images = preprocess_and_crop(image_paths, output_dir)

            # Split dataset
            train_data = cropped_images[:num_train]
            val_data = cropped_images[num_train:num_train+num_val]
            test_data = cropped_images[num_train+num_val:]

            # Save cropped images to appropriate directories
            for data, directory in [(train_data, train_dir), (val_data, val_dir), (test_data, test_dir)]:
                for img, img_name, confidence in data:
                    label_dir = os.path.join(directory, class_name)
                    os.makedirs(label_dir, exist_ok=True)
                    cv2.imwrite(os.path.join(label_dir, f"{img_name}_{confidence}.jpg"), cv2.cvtColor(img, cv2.COLOR_RGB2))

# Example usage
dataset_dir = 'test_dataset'
output_dir = 'cropped_dataset'
split_dataset(dataset_dir, output_dir)


