In [None]:
import cv2
import os
import zipfile
import xml.etree.ElementTree as ET
import shutil
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Step 1: Preprocess the images
def preprocess_image(image):
    # Check if the image is empty
    if image is None:
        return None

    # Resize the image to a standard size (e.g., 224x224)
    resized_image = cv2.resize(image, (224, 224))

    # Perform any other necessary preprocessing steps

    return resized_image

# Step 2: Detect faces
def detect_faces(image):
    # Check if the image is empty
    if image is None:
        return []

    # Load pre-trained face detection model from OpenCV
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Perform face detection
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    return faces

# Example usage
zipfile_path = '/content/drive/MyDrive/Datasets/dataset_new.zip'
output_zipfile = '/content/drive/MyDrive/Datasets/processed_dataset.zip'

# Extract the zipfile
with zipfile.ZipFile(zipfile_path, 'r') as zip_ref:
    zip_ref.extractall('/content')

# Create a new zip file
with zipfile.ZipFile(output_zipfile, 'w') as output_zip:
    # Process each image in the folder
    for folder_name in ['train', 'test', 'val']:
        folder_path = os.path.join('/content', folder_name)
        image_files = sorted(os.listdir(folder_path))  # Sort the files based on the file name

        for image_file in image_files:
            image_path = os.path.join(folder_path, image_file)

            # Load image using OpenCV
            image = cv2.imread(image_path)

            # Preprocess the image
            preprocessed_image = preprocess_image(image)

            # Check if the image is empty
            if preprocessed_image is None:
                continue

            # Detect faces in the preprocessed image
            detected_faces = detect_faces(preprocessed_image)

            # Get the XML label path for the current image
            xml_file = os.path.splitext(image_file)[0] + '.xml'
            xml_path = os.path.join(folder_path, xml_file)

            # Parse the XML file
            try:
                tree = ET.parse(xml_path)
                root = tree.getroot()
            except ET.ParseError as e:
                print(f"Error parsing XML: {xml_file}")
                print(f"Error message: {str(e)}")
                continue

            # Find the label element in the XML file and assign the label as "focused" or "not focused"
            label_element = root.find('object/name')
            if label_element is None:
                print(f"Label element not found in XML: {xml_file}")
                continue

            label = label_element.text

            if label == 'Fokus':
                label = 'focused'
            elif label == 'TidakFokus':
                label = 'not focused'

            # Manually label the detected faces with the label from the XML
            labels = [label] * len(detected_faces)

            # Remove the existing "Fokus" and "TidakFokus" objects
            root.findall(".//object[name='Fokus']")  # Find all "Fokus" objects
            for obj in root.findall(".//object[name='Fokus']"):
                root.remove(obj)
            root.findall(".//object[name='TidakFokus']")  # Find all "TidakFokus" objects
            for obj in root.findall(".//object[name='TidakFokus']"):
                root.remove(obj)

            # Update the XML with the new labels and bounding boxes
            for i, (x, y, w, h) in enumerate(detected_faces):
                # Create a new face element
                face_element = ET.SubElement(root, 'object')

                # Add the label to the face element
                name_element = ET.SubElement(face_element, 'name')
                name_element.text = label

                # Add the bounding box coordinates for the face
                bndbox_element = ET.SubElement(face_element, 'bndbox')
                xmin_element = ET.SubElement(bndbox_element, 'xmin')
                xmin_element.text = str(x)
                ymin_element = ET.SubElement(bndbox_element, 'ymin')
                ymin_element.text = str(y)
                xmax_element = ET.SubElement(bndbox_element, 'xmax')
                xmax_element.text = str(x + w)
                ymax_element = ET.SubElement(bndbox_element, 'ymax')
                ymax_element.text = str(y + h)

            # Save the modified XML file
            tree.write(xml_path)

            # Add the processed image and XML file to the output zip
            output_zip.write(image_path)
            output_zip.write(xml_path)

            print(f"Processed: {image_file}")

# Remove the extracted folder
for folder_name in ['train', 'test', 'val']:
    folder_path = os.path.join('/content', folder_name)
    shutil.rmtree(folder_path)

print("Processing complete. Output stored in processed_dataset.zip")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Processed: 0003_2m_-15P_-10V_-10H.jpg
Processed: 0003_2m_-15P_-10V_-15H.jpg
Processed: 0003_2m_-15P_-10V_-5H.jpg
Processed: 0003_2m_-15P_-10V_0H.jpg
Processed: 0003_2m_-15P_-10V_10H.jpg
Processed: 0003_2m_-15P_-10V_15H.jpg
Processed: 0003_2m_-15P_-10V_5H.jpg
Processed: 0003_2m_-15P_0V_-10H.jpg
Processed: 0003_2m_-15P_0V_-15H.jpg
Processed: 0003_2m_-15P_0V_-5H.jpg
Processed: 0003_2m_-15P_0V_0H.jpg
Processed: 0003_2m_-15P_0V_10H.jpg
Processed: 0003_2m_-15P_0V_15H.jpg
Processed: 0003_2m_-15P_0V_5H.jpg
Processed: 0003_2m_-15P_10V_-10H.jpg
Processed: 0003_2m_-15P_10V_-15H.jpg
Processed: 0003_2m_-15P_10V_-5H.jpg
Processed: 0003_2m_-15P_10V_0H.jpg
Processed: 0003_2m_-15P_10V_10H.jpg
Processed: 0003_2m_-15P_10V_15H.jpg
Processed: 0003_2m_-15P_10V_5H.jpg
Processed: 0003_2m_-30P_-10V_-10H.jpg
Processed: 0003_2m_-30P_-10V_-15H.jpg
Processed: 0003_2m_-30P_-10V_-5H.jpg
Pr

In [None]:
import cv2
import os
import zipfile
import xml.etree.ElementTree as ET
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Step 1: Preprocess the images
def preprocess_image(image):
    # Check if the image is empty
    if image is None:
        return None

    # Resize the image to a standard size (e.g., 224x224)
    resized_image = cv2.resize(image, (224, 224))

    # Perform any other necessary preprocessing steps

    return resized_image

# Step 2: Detect faces
def detect_faces(image):
    # Check if the image is empty
    if image is None:
        return []

    # Load pre-trained face detection model from OpenCV
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Perform face detection
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    return faces

# Example usage
zipfile_path = '/content/drive/MyDrive/Datasets/dataset_new.zip'
output_folder = '/content/drive/MyDrive/Datasets/data'

# Extract the zipfile
with zipfile.ZipFile(zipfile_path, 'r') as zip_ref:
    zip_ref.extractall(output_folder)

# Process each image in the folder
for folder_name in ['train', 'test', 'val']:
    folder_path = os.path.join(output_folder, folder_name)
    image_files = sorted(os.listdir(folder_path))  # Sort the files based on the file name

    for image_file in image_files:;
        image_path = os.path.join(folder_path, image_file)

        # Load image using OpenCV
        image = cv2.imread(image_path)

        # Preprocess the image
        preprocessed_image = preprocess_image(image)

        # Check if the image is empty
        if preprocessed_image is None:
            continue

        # Detect faces in the preprocessed image
        detected_faces = detect_faces(preprocessed_image)

        # Get the XML label path for the current image
        xml_file = os.path.splitext(image_file)[0] + '.xml'
        xml_path = os.path.join(folder_path, xml_file)

        # Parse the XML file
        try:
            tree = ET.parse(xml_path)
            root = tree.getroot()
        except ET.ParseError as e:
            print(f"Error parsing XML: {xml_file}")
            print(f"Error message: {str(e)}")
            continue

        # Find the label element in the XML file and assign the label as "focused" or "not focused"
        label_element = root.find('object/name')
        if label_element is None:
            print(f"Label element not found in XML: {xml_file}")
            continue

        label = label_element.text

        if label == 'Fokus':
            label = 'focused'
        elif label == 'TidakFokus':
            label = 'not focused'

        # Manually label the detected faces with the label from the XML
        labels = [label] * len(detected_faces)

        # Update the XML with the new labels and bounding boxes
        for i, (x, y, w, h) in enumerate(detected_faces):
            face_element = root.find('object')
            if face_element is None:
                print(f"No 'object' element found in XML: {xml_file}")
                continue

            # Create a new face element
            face_element = ET.SubElement(root, 'object')

            # Add the label to the face element
            name_element = ET.SubElement(face_element, 'name')
            name_element.text = label

            # Add the bounding box coordinates for the face
            bndbox_element = ET.SubElement(face_element, 'bndbox')
            xmin_element = ET.SubElement(bndbox_element, 'xmin')
            xmin_element.text = str(x)
            ymin_element = ET.SubElement(bndbox_element, 'ymin')
            ymin_element.text = str(y)
            xmax_element = ET.SubElement(bndbox_element, 'xmax')
            xmax_element.text = str(x + w)
            ymax_element = ET.SubElement(bndbox_element, 'ymax')
            ymax_element.text = str(y + h)

        # Save the modified XML file
        tree.write(xml_path)

        print(f"Updated XML: {xml_file}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Updated XML: 0003_2m_-15P_-10V_-10H.xml
Updated XML: 0003_2m_-15P_-10V_-15H.xml
Updated XML: 0003_2m_-15P_-10V_-5H.xml
Updated XML: 0003_2m_-15P_-10V_0H.xml
Updated XML: 0003_2m_-15P_-10V_10H.xml
Updated XML: 0003_2m_-15P_-10V_15H.xml
Updated XML: 0003_2m_-15P_-10V_5H.xml
Updated XML: 0003_2m_-15P_0V_-10H.xml
Updated XML: 0003_2m_-15P_0V_-15H.xml
Updated XML: 0003_2m_-15P_0V_-5H.xml
Updated XML: 0003_2m_-15P_0V_0H.xml
Updated XML: 0003_2m_-15P_0V_10H.xml
Updated XML: 0003_2m_-15P_0V_15H.xml
Label element not found in XML: 0003_2m_-15P_0V_5H.xml
Updated XML: 0003_2m_-15P_10V_-10H.xml
Updated XML: 0003_2m_-15P_10V_-15H.xml
Updated XML: 0003_2m_-15P_10V_-5H.xml
Updated XML: 0003_2m_-15P_10V_0H.xml
Updated XML: 0003_2m_-15P_10V_10H.xml
Updated XML: 0003_2m_-15P_10V_15H.xml
Updated XML: 0003_2m_-15P_10V_5H.xml
Updated XML: 0003_2m_-30P_-10V_-10H.xml
Updated XML: 0

In [None]:
import cv2
import os
import zipfile
import xml.etree.ElementTree as ET
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Step 1: Preprocess the images
def preprocess_image(image):
    # Check if the image is empty
    if image is None:
        return None

    # Resize the image to a standard size (e.g., 224x224)
    resized_image = cv2.resize(image, (224, 224))

    # Perform any other necessary preprocessing steps

    return resized_image

# Step 2: Detect faces
def detect_faces(image):
    # Check if the image is empty
    if image is None:
        return []

    # Load pre-trained face detection model from OpenCV
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Perform face detection
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    return faces

# Example usage
zipfile_path = '/content/drive/MyDrive/Datasets/output.zip'
output_folder = '/content/drive/MyDrive/Datasets/data'
image_files = sorted(os.listdir(output_folder))  # Sort the files based on the file name

for image_file in image_files:
    image_path = os.path.join(folder_path, image_file)

    # Load image using OpenCV
    image = cv2.imread(image_path)

    # Preprocess the image
    preprocessed_image = preprocess_image(image)

    # Check if the image is empty
    if preprocessed_image is None:
        continue

    # Detect faces in the preprocessed image
    detected_faces = detect_faces(preprocessed_image)

    # Get the XML label path for the current image
    xml_file = os.path.splitext(image_file)[0] + '.xml'
    xml_path = os.path.join(folder_path, xml_file)

    # Parse the XML file
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
    except ET.ParseError as e:
        print(f"Error parsing XML: {xml_file}")
        print(f"Error message: {str(e)}")
        continue

    # Find the label element in the XML file and assign the label as "focused" or "not focused"
    label_element = root.find('object/name')
    if label_element is None:
        print(f"Label element not found in XML: {xml_file}")
        continue

    label = label_element.text

    if label == 'Fokus':
        label = 'focused'
    elif label == 'TidakFokus':
        label = 'not focused'

    # Manually label the detected faces with the label from the XML
    labels = [label] * len(detected_faces)

    # Update the XML with the new labels and bounding boxes
    for i, (x, y, w, h) in enumerate(detected_faces):
        face_element = root.find('object')
        if face_element is None:
            print(f"No 'object' element found in XML: {xml_file}")
            continue

        # Create a new face element
        face_element = ET.SubElement(root, 'object')

        # Add the label to the face element
        name_element = ET.SubElement(face_element, 'name')
        name_element.text = label

        # Add the bounding box coordinates for the face
        bndbox_element = ET.SubElement(face_element, 'bndbox')
        xmin_element = ET.SubElement(bndbox_element, 'xmin')
        xmin_element.text = str(x)
        ymin_element = ET.SubElement(bndbox_element, 'ymin')
        ymin_element.text = str(y)
        xmax_element = ET.SubElement(bndbox_element, 'xmax')
        xmax_element.text = str(x + w)
        ymax_element = ET.SubElement(bndbox_element, 'ymax')
        ymax_element.text = str(y + h)

    # Save the modified XML file
    tree.write(xml_path)

    print(f"Updated XML: {xml_file}")
