In [None]:
import cv2
import numpy as np
import os
import glob
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
from sklearn.preprocessing import normalize
from PIL import Image
from rembg import remove

# Color Feature Extraction
def extract_color_features(image_path):
    """
    Extract color features from the image using color histograms and average RGB values.
    """
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error loading image: {image_path}")
        return None

    # Step 1: Remove background
    pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    img_no_bg = remove(pil_img)
    img_no_bg = np.array(img_no_bg)

    # Step 2: Convert the image to RGB
    rgb_image = cv2.cvtColor(img_no_bg, cv2.COLOR_RGBA2RGB)

    # Step 3: Calculate color histograms for each channel (Red, Green, Blue)
    hist_r = cv2.calcHist([rgb_image], [0], None, [256], [0, 256])
    hist_g = cv2.calcHist([rgb_image], [1], None, [256], [0, 256])
    hist_b = cv2.calcHist([rgb_image], [2], None, [256], [0, 256])

    # Step 4: Normalize histograms
    hist_r = normalize(hist_r, axis=0, norm='l1').flatten()
    hist_g = normalize(hist_g, axis=0, norm='l1').flatten()
    hist_b = normalize(hist_b, axis=0, norm='l1').flatten()

    # Step 5: Calculate average color values (mean of RGB channels)
    avg_r = np.mean(rgb_image[:, :, 0])
    avg_g = np.mean(rgb_image[:, :, 1])
    avg_b = np.mean(rgb_image[:, :, 2])

    # Step 6: Combine the color histograms and average values
    color_features = np.hstack([hist_r, hist_g, hist_b])

    return {
        "Image": os.path.basename(image_path),
        "Avg Red": avg_r,
        "Avg Green": avg_g,
        "Avg Blue": avg_b,
      
    }

# Cut Feature Extraction
def extract_geometric_features(image_path):
    """
    Extract geometric features from a gemstone image.
    """
    features = {}
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: Unable to read image {image_path}. Skipping.")
        return None

    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian Blur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Edge detection
    edges = cv2.Canny(blurred, threshold1=50, threshold2=150)

    # Find contours
    contours, _ = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        print(f"Warning: No contours found in image {image_path}. Skipping.")
        return None

    # Assume the largest contour corresponds to the gemstone
    contour = max(contours, key=cv2.contourArea)

    # Compute Bounding Rectangle
    x, y, w, h = cv2.boundingRect(contour)
    aspect_ratio = float(w) / h if h != 0 else 0

    # Compute Perimeter and Area
    perimeter = cv2.arcLength(contour, True)
    area = cv2.contourArea(contour)

    # Compute Circularity
    circularity = (4 * np.pi * area) / (perimeter ** 2) if perimeter != 0 else 0

    # Compute Convex Hull and Convexity
    hull = cv2.convexHull(contour)
    hull_area = cv2.contourArea(hull)
    convexity = area / hull_area if hull_area != 0 else 0

    # Compute Edge Sharpness
    edge_sharpness = cv2.Laplacian(gray, cv2.CV_64F).var()

    # Compute Symmetry Metrics
    flipped_horizontal = cv2.flip(gray, 1)
    symmetry_horizontal = cv2.absdiff(gray, flipped_horizontal)
    horizontal_symmetry_score = 1 - (np.mean(symmetry_horizontal) / 255)

    flipped_vertical = cv2.flip(gray, 0)
    symmetry_vertical = cv2.absdiff(gray, flipped_vertical)
    vertical_symmetry_score = 1 - (np.mean(symmetry_vertical) / 255)

    symmetry = (horizontal_symmetry_score + vertical_symmetry_score) / 2

    # Populate features
    features['Image'] = os.path.basename(image_path)
    features['Aspect_Ratio'] = aspect_ratio
    features['Perimeter'] = perimeter
    features['Area'] = area
    features['Circularity'] = circularity
    features['Convexity'] = convexity
    features['Edge_Sharpness'] = edge_sharpness
    features['Symmetry'] = symmetry

    return features

# Clarity Feature Extraction
def extract_clarity_features(image_path):
    """
    Extract clarity-related features from a gemstone image.
    """
    features = {}
    # Read and preprocess the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: Unable to read image {image_path}. Skipping.")
        return None

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # 1. Texture Analysis using GLCM
    glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    features['Contrast'] = graycoprops(glcm, 'contrast')[0, 0]
    features['Homogeneity'] = graycoprops(glcm, 'homogeneity')[0, 0]
    features['Energy'] = graycoprops(glcm, 'energy')[0, 0]
    features['Correlation'] = graycoprops(glcm, 'correlation')[0, 0]

    # 2. Edge Density
    edges = cv2.Canny(gray, threshold1=50, threshold2=150)
    edge_density = np.sum(edges) / (gray.shape[0] * gray.shape[1])
    features['Edge_Density'] = edge_density

    # 3. Variance of Intensity
    features['Intensity_Variance'] = np.var(gray)

    # 4. Color Uniformity
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hue_std = np.std(hsv[:, :, 0])
    saturation_std = np.std(hsv[:, :, 1])
    features['Hue_Std'] = hue_std
    features['Saturation_Std'] = saturation_std

    # 5. Add image name
    features['Image'] = os.path.basename(image_path)

    return features

def process_images(input_folder, output_folder):
    """
    Process all images in the specified folder and extract features for color, cut, and clarity.
    """
    color_features_list = []
    cut_features_list = []
    clarity_features_list = []

    # Process each image in the folder
    for filename in os.listdir(input_folder):
        if filename.endswith(('.png', '.jpg', '.jpeg')):  # Adjust for your image formats
            image_path = os.path.join(input_folder, filename)

            # Extract color features
            color_features = extract_color_features(image_path)
            if color_features:
                color_features_list.append(color_features)

            # Extract cut features
            cut_features = extract_geometric_features(image_path)
            if cut_features:
                cut_features_list.append(cut_features)

            # Extract clarity features
            clarity_features = extract_clarity_features(image_path)
            if clarity_features:
                clarity_features_list.append(clarity_features)

    # Combine all features into a single DataFrame
    color_df = pd.DataFrame(color_features_list)
    cut_df = pd.DataFrame(cut_features_list)
    clarity_df = pd.DataFrame(clarity_features_list)

    # Merge the dataframes on the "Image" column
    combined_df = pd.merge(color_df, cut_df, on="Image", how="outer")
    combined_df = pd.merge(combined_df, clarity_df, on="Image", how="outer")

    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Save the combined DataFrame to a CSV file
    output_csv = os.path.join(output_folder, 'combined_features.csv')
    combined_df.to_csv(output_csv, index=False)
    print(f"Combined feature extraction completed. CSV saved at {output_csv}")

# Define input and output directories
input_folder = r"C:\Users\Muralish\Desktop\Sapphires_Cleaned\Blue Sapphires\page1\B9543\cleaned\B9543_video2"
output_folder = r"C:\Users\Muralish\Desktop\Sapphires_Cleaned\Blue Sapphires\page1\B9543\cleaned\B9543_video2\feature_extraction"

# Run the processing function
process_images(input_folder, output_folder)
