In [1]:
# Cell 1: Import Required Libraries
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans


In [2]:
# Cell 2: Define Function to Get Dominant Color
def get_dominant_color(image, k=1):
    # Convert image to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Reshape the image to a list of pixels
    pixels = image.reshape((-1, 3))
    # Use KMeans clustering to find the dominant color
    kmeans = KMeans(n_clusters=k)
    kmeans.fit(pixels)
    # Return the most dominant color
    return kmeans.cluster_centers_[0]


In [3]:
# Cell 3: Define Function to Process Images in a Class Folder
def process_images_in_folder(folder_path, class_name):
    data = []
    for image_name in os.listdir(folder_path):
        # Full path to image
        image_path = os.path.join(folder_path, image_name)
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            continue  # Skip files that are not images
        # Extract the dominant color
        dominant_color = get_dominant_color(image)
        # Append the image details and dominant color
        data.append([class_name, image_name, dominant_color[0], dominant_color[1], dominant_color[2]])
    return data


In [4]:
# Modify the preprocess_dataset function to limit printing
def preprocess_dataset(base_folder):
    for class_name in os.listdir(base_folder):
        class_folder = os.path.join(base_folder, class_name)
        
        if os.path.isdir(class_folder):
            print(f"Processing class: {class_name}...")  # Keep this print statement to show progress

            # Process images in the class folder
            class_data = process_images_in_folder(class_folder, class_name)
            
            # Create a DataFrame for the class
            df = pd.DataFrame(class_data, columns=['Class', 'Image_Name', 'R', 'G', 'B'])
            
            # Save to a CSV named after the class
            output_csv = os.path.join(base_folder, f"{class_name}_colors.csv")
            df.to_csv(output_csv, index=False)
            
            # Notify when processing is done for a class
            print(f"Finished processing and saved: {class_name}_colors.csv")


In [5]:
# Cell 5: Test the Preprocessing on a Sample Dataset
test_base_folder = "./images"  # Replace with the path to your test folder

# Run the preprocessing on the test dataset
preprocess_dataset(test_base_folder)


Processing class: bed...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

Finished processing and saved: bed_colors.csv
Processing class: couch...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

Finished processing and saved: couch_colors.csv
Processing class: dining table...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

Finished processing and saved: dining table_colors.csv
Processing class: chair...


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

Finished processing and saved: chair_colors.csv


  super()._check_params_vs_input(X, default_n_init=10)
