In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\lapto\\OneDrive\\Desktop\\Face-reg'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataprocessConfig:
    root_dir: Path
    preprocessed_dir: Path
    data: Path


In [5]:
from src.Facerecognition.constants import *
from src.Facerecognition.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])


    
    def get_data_processing_config(self) -> DataprocessConfig:
        config = self.config.data_preprocessing
        data = os.path.join(self.config.data_ingestion.unzip_dir, "Facerecognition")
        create_directories([Path(config.root_dir)])
        
        data_preprocess_config = DataprocessConfig(
            root_dir=Path(config.root_dir),
            preprocessed_dir=config.preprocessed_dir,
            data = Path(data)
        )

        return data_preprocess_config

In [7]:
import urllib.request as request
import zipfile
from src.Facerecognition import logger
from src.Facerecognition.utils.common import  get_size
import cv2
import os
import numpy as np
from PIL import Image



In [8]:
class DataPreprocess:
    def __init__(self, config: DataprocessConfig):
        self.config = config

    def preprocess_data(self, data_preprocess_config):
        image_width = 224
        image_height = 224
        cascade_file_path = os.path.abspath('haarcascade_frontalface_default.xml')
        facecascade = cv2.CascadeClassifier(cascade_file_path)
        images_dir = data_preprocess_config.data

        # Create a new directory for preprocessed images within the root directory
        preprocessed_images_dir = data_preprocess_config.root_dir / "preprocessed_images"
        preprocessed_images_dir.mkdir(parents=True, exist_ok=True)

        current_id = 0
        label_ids = {}

        for root, _, files in os.walk(images_dir):
            for file in files:
                if file.endswith(("png", "jpg", "jpeg")):
                    # Path of the image
                    path = os.path.join(root, file)

                    # Get the label name (name of the person)
                    label = os.path.basename(root).replace(" ", ".").lower()

                    # Create a subdirectory for each class (label)
                    class_dir = preprocessed_images_dir / label
                    class_dir.mkdir(parents=True, exist_ok=True)

                    # Load the image
                    imgtest = cv2.imread(path, cv2.IMREAD_COLOR)
                    image_array = np.array(imgtest, "uint8")

                    # Get the faces detected in the image
                    faces = facecascade.detectMultiScale(imgtest, scaleFactor=1.1, minNeighbors=5)

                    # If not exactly 1 face is detected, skip this photo
                    if len(faces) != 1:
                        print(f'---Photo skipped---\n')
                        continue

                    # Save the detected face(s) in the class subdirectory
                    for (x_, y_, w, h) in faces:
                        # Resize the detected face to 224x224
                        size = (image_width, image_height)

                        # Detected face region
                        roi = image_array[y_: y_ + h, x_: x_ + w]

                        # Resize the detected head to the target size
                        resized_image = cv2.resize(roi, size)

                        preprocessed_image_path = class_dir / f"{current_id}.jpg"
                        im = Image.fromarray(resized_image)
                        im.save(preprocessed_image_path)
                        current_id += 1


In [9]:
# class DataPreprocess:
#     def __init__(self, config: DataprocessConfig):
#         self.config = config

#     def preprocess_data(self, data_preprocess_config):
#         image_width = 224
#         image_height = 224
#         cascade_file_path = os.path.abspath('haarcascade_frontalface_default.xml')

#         facecascade = cv2.CascadeClassifier(cascade_file_path)
#         images_dir = data_preprocess_config.data

#         # Create a new directory for preprocessed images within the root directory
#         preprocessed_images_dir = data_preprocess_config.root_dir / "preprocessed_images"
#         preprocessed_images_dir.mkdir(parents=True, exist_ok=True)

#         current_id = 0
#         label_ids = {}

#         for root, _, files in os.walk(images_dir):
#             for file in files:
#                 if file.endswith(("png", "jpg", "jpeg")):
#                     # Path of the image
#                     path = os.path.join(root, file)

#                     # Get the label name (name of the person)
#                     label = os.path.basename(root).replace(" ", ".").lower()

#                     # Add the label (key) and its number (value)
#                     if label not in label_ids:
#                         label_ids[label] = current_id
#                         current_id += 1

#                     # Load the image
#                     imgtest = cv2.imread(path, cv2.IMREAD_COLOR)
#                     image_array = np.array(imgtest, "uint8")

#                     # Get the faces detected in the image
#                     faces = facecascade.detectMultiScale(imgtest, scaleFactor=1.1, minNeighbors=5)

#                     # If not exactly 1 face is detected, skip this photo
#                     if len(faces) != 1:
#                         print(f'---Photo skipped---\n')
#                         continue

#                     # Save the detected face(s) and associate them with the label
#                     for (x_, y_, w, h) in faces:
#                         # Resize the detected face to 224x224
#                         size = (image_width, image_height)

#                         # Detected face region
#                         roi = image_array[y_: y_ + h, x_: x_ + w]

#                         # Resize the detected head to the target size
#                         resized_image = cv2.resize(roi, size)

#                         preprocessed_image_path = preprocessed_images_dir / f"{current_id}.jpg"
#                         im = Image.fromarray(resized_image)
#                         im.save(preprocessed_image_path)
#                         current_id += 1


In [10]:
try:
    config = ConfigurationManager()
    data_preprocess_config = config.get_data_processing_config()
    data_preprocess = DataPreprocess(config=data_preprocess_config)
    data_preprocess.preprocess_data(data_preprocess_config)
except Exception as e:
    raise e

[2023-10-13 11:14:01,420: INFO: common yaml file: config\config.yaml loaded successfully]
[2023-10-13 11:14:01,423: INFO: common yaml file: params.yaml loaded successfully]
[2023-10-13 11:14:01,426: INFO: common created directory at: artifacts]
[2023-10-13 11:14:01,426: INFO: common created directory at: artifacts\data_preprocessing]
---Photo skipped---

---Photo skipped---

---Photo skipped---

---Photo skipped---

---Photo skipped---

---Photo skipped---

---Photo skipped---

---Photo skipped---

---Photo skipped---

---Photo skipped---

