In [4]:
import cv2
import numpy as np
import os
import pandas as pd
from pathlib import Path
import logging

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    filename='feature_extraction.log'
)

def extract_geometric_features(image_path):
    """
    Extract geometric features from a single image with error handling.
    Returns a dictionary of features or None if extraction fails.
    """
    try:
        # Read image
        image = cv2.imread(str(image_path))
        if image is None:
            logging.error(f"Failed to read image: {image_path}")
            return None

        # Convert to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # Apply Gaussian blur to reduce noise
        blurred = cv2.GaussianBlur(gray, (5, 5), 0)

        # Threshold the image using Otsu's method
        _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # Find contours
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        if not contours:
            logging.warning(f"No contours found in image: {image_path}")
            return None

        # Get the largest contour
        largest_contour = max(contours, key=cv2.contourArea)

        # Calculate features
        area = cv2.contourArea(largest_contour)
        perimeter = cv2.arcLength(largest_contour, True)

        # Avoid division by zero
        circularity = 4 * np.pi * area / (perimeter * perimeter) if perimeter > 0 else 0

        # Calculate bounding rectangle
        x, y, w, h = cv2.boundingRect(largest_contour)
        aspect_ratio = float(w)/h if h > 0 else 0

        # Calculate moments
        M = cv2.moments(largest_contour)
        cx = int(M["m10"] / M["m00"]) if M["m00"] != 0 else 0
        cy = int(M["m01"] / M["m00"]) if M["m00"] != 0 else 0

        # Calculate convex hull
        hull = cv2.convexHull(largest_contour)
        hull_area = cv2.contourArea(hull)
        solidity = float(area)/hull_area if hull_area > 0 else 0

        # Calculate extent
        rect_area = w * h
        extent = float(area)/rect_area if rect_area > 0 else 0

        return {
            'area': area,
            'perimeter': perimeter,
            'circularity': circularity,
            'aspect_ratio': aspect_ratio,
            'centroid_x': cx,
            'centroid_y': cy,
            'solidity': solidity,
            'extent': extent
        }

    except Exception as e:
        logging.error(f"Error processing {image_path}: {str(e)}")
        return None

def process_folders(base_path):
    """
    Process all images in folders A and M under the base path
    """
    try:
        # Initialize lists to store results
        features_list = []
        labels = []
        filenames = []

        # Process both folders
        for folder in ['A', 'M']:
            folder_path = Path(base_path) / folder
            if not folder_path.exists():
                logging.error(f"Folder not found: {folder_path}")
                continue

            logging.info(f"Processing folder: {folder}")

            # Process each image in the folder
            for img_path in folder_path.glob('*.jpg'):
                logging.info(f"Processing image: {img_path.name}")

                features = extract_geometric_features(img_path)
                if features is not None:
                    features_list.append(features)
                    labels.append(folder)
                    filenames.append(img_path.name)

        if not features_list:
            logging.error("No features were successfully extracted")
            return None

        # Create DataFrame
        df = pd.DataFrame(features_list)
        df['label'] = labels
        df['filename'] = filenames

        return df

    except Exception as e:
        logging.error(f"Error in process_folders: {str(e)}")
        return None

if __name__ == "__main__":
    # Ganti dengan path folder 'gambar' Anda
    base_folder = "data/"

    # Process images and get features
    features_df = process_folders(base_folder)

    if features_df is not None:
        # Save results to CSV
        output_path = Path(base_folder) / 'geometric_features.csv'
        features_df.to_csv(output_path, index=False)
        print(f"\nResults saved to: {output_path}")

        # Print summary statistics
        print("\nFeature Summary Statistics:")
        print(features_df.groupby('label').mean())

        # Print number of images processed
        print("\nNumber of images processed:")
        print(features_df['label'].value_counts())

        # Print any failed images
        total_images = sum(len(list(Path(base_folder / folder).glob('*.jpg')))
                          for folder in ['A', 'M'])
        processed_images = len(features_df)
        if total_images != processed_images:
            print(f"\nWarning: {total_images - processed_images} images failed to process")
            print("Check feature_extraction.log for details")
    else:
        print("Error occurred during processing. Check feature_extraction.log for details")


Results saved to: data\gambar\geometric_features.csv

Feature Summary Statistics:


TypeError: agg function failed [how->mean,dtype->object]