In [1]:
import os
import cv2
import shutil
from tqdm import tqdm

In [2]:
def detect_faces(image_path, face_cascade):
    """Deteksi wajah dalam gambar."""
    image = cv2.imread(image_path)
    if image is None:
        return False
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    return len(faces) > 0

In [3]:
def clean_dataset(input_dir, output_dir, cascade_path):
    """Membersihkan dataset dengan mendeteksi wajah."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    face_cascade = cv2.CascadeClassifier(cascade_path)

    for subset in ['train', 'test']:
        subset_input_path = os.path.join(input_dir, subset)
        subset_output_path = os.path.join(output_dir, subset)

        if not os.path.exists(subset_output_path):
            os.makedirs(subset_output_path)

        for emotion in os.listdir(subset_input_path):
            emotion_input_path = os.path.join(subset_input_path, emotion)
            emotion_output_path = os.path.join(subset_output_path, emotion)

            if not os.path.exists(emotion_output_path):
                os.makedirs(emotion_output_path)

            for img_name in tqdm(os.listdir(emotion_input_path), desc=f"Processing {subset}/{emotion}"):
                img_path = os.path.join(emotion_input_path, img_name)

                # Hanya simpan gambar yang terdeteksi wajahnya
                if detect_faces(img_path, face_cascade):
                    shutil.copy(img_path, os.path.join(emotion_output_path, img_name))


In [4]:
# Path ke dataset
input_directory = "fer-2013"  # Ganti dengan path dataset Anda
output_directory = "dataset"  # Ganti dengan path untuk dataset bersih

In [5]:
# Path ke file haarcascade untuk deteksi wajah
cascade_file_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"


In [7]:
# Membersihkan dataset
clean_dataset(input_directory, output_directory, cascade_file_path)

Processing train/angry: 100%|██████████| 3995/3995 [00:28<00:00, 140.86it/s]
Processing train/disgust: 100%|██████████| 436/436 [00:03<00:00, 142.70it/s]
Processing train/fear: 100%|██████████| 4097/4097 [00:33<00:00, 123.30it/s]
Processing train/happy: 100%|██████████| 7215/7215 [01:08<00:00, 104.77it/s]
Processing train/neutral: 100%|██████████| 4965/4965 [00:46<00:00, 107.39it/s]
Processing train/sad: 100%|██████████| 4830/4830 [00:36<00:00, 132.50it/s]
Processing train/surprise: 100%|██████████| 3171/3171 [00:23<00:00, 136.55it/s]
Processing test/angry: 100%|██████████| 958/958 [00:07<00:00, 131.57it/s]
Processing test/disgust: 100%|██████████| 111/111 [00:00<00:00, 133.57it/s]
Processing test/fear: 100%|██████████| 1024/1024 [00:07<00:00, 128.80it/s]
Processing test/happy: 100%|██████████| 1774/1774 [00:15<00:00, 113.54it/s]
Processing test/neutral: 100%|██████████| 1233/1233 [00:09<00:00, 125.94it/s]
Processing test/sad: 100%|██████████| 1247/1247 [00:08<00:00, 153.81it/s]
Proces

In [8]:
# Path ke dataset
input_directory = "fer2013-2"  # Ganti dengan path dataset Anda
output_directory = "dataset2"  # Ganti dengan path untuk dataset bersih

In [9]:
clean_dataset(input_directory, output_directory, cascade_file_path)

Processing train/angry: 100%|██████████| 3994/3994 [00:30<00:00, 131.95it/s]
Processing train/disgusted: 100%|██████████| 436/436 [00:03<00:00, 143.01it/s]
Processing train/fearful: 100%|██████████| 4097/4097 [00:30<00:00, 134.66it/s]
Processing train/happy: 100%|██████████| 7215/7215 [00:55<00:00, 129.85it/s]
Processing train/neutral: 100%|██████████| 4965/4965 [00:40<00:00, 123.51it/s]
Processing train/sad: 100%|██████████| 4830/4830 [00:36<00:00, 134.10it/s]
Processing train/surprised: 100%|██████████| 3171/3171 [00:24<00:00, 129.70it/s]
Processing test/angry: 100%|██████████| 958/958 [00:08<00:00, 109.78it/s]
Processing test/disgusted: 100%|██████████| 111/111 [00:01<00:00, 106.65it/s]
Processing test/fearful: 100%|██████████| 1024/1024 [00:09<00:00, 106.08it/s]
Processing test/happy: 100%|██████████| 1774/1774 [00:14<00:00, 125.22it/s]
Processing test/neutral: 100%|██████████| 1233/1233 [00:09<00:00, 130.32it/s]
Processing test/sad: 100%|██████████| 1247/1247 [00:09<00:00, 134.05i