In [34]:
import os
import cv2
import json
import numpy as np
from sklearn.cluster import KMeans

keyframes_path = '/Users/huyenphung/Desktop/backend/keyframe_infomaton/keyframe/L00'
output_file_path = '/Users/huyenphung/Desktop/senior-project/keyframe_infomaton/color_metadata/L00'
os.makedirs(output_file_path, exist_ok=True)

# Custom color palette (you can adjust this based on your needs)
color_palette = {
    'black': [0, 0, 0],
    'blue': [0, 0, 255],
    'brown': [150, 75, 0],
    'green': [0, 128, 0],
    'grey': [128, 128, 128],
    'orange': [255, 165, 0],
    'pink': [255, 192, 203],
    'purple': [128, 0, 128],
    'red': [255, 0, 0],
    'white': [255, 255, 255],
    'yellow': [255, 255, 0]
}

# Helper function to match RGB values to the closest color in the palette
def get_closest_color_name(r, g, b):
    min_distance = float('inf')
    closest_color = None

    for color_name, color_rgb in color_palette.items():
        distance = (r - color_rgb[0]) ** 2 + (g - color_rgb[1]) ** 2 + (b - color_rgb[2]) ** 2
        if distance < min_distance:
            min_distance = distance
            closest_color = color_name

    return closest_color

# Function to extract dominant colors using KMeans
def extract_dominant_colors(image, num_clusters=5):
    # Reshape the image to a list of pixels
    image = image.reshape((image.shape[0] * image.shape[1], 3))

    # Use KMeans to cluster pixel colors
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(image)

    # Extract the cluster centers (dominant colors)
    dominant_colors = kmeans.cluster_centers_.astype(int)

    # Map the dominant colors to the closest color names from the custom palette
    color_context = [get_closest_color_name(r, g, b) for r, g, b in dominant_colors]

    return color_context

# Loop through all keyframes in the directory
for filename in os.listdir(keyframes_path):
    if filename.endswith('.jpg'):
        keyframe_path = os.path.join(keyframes_path, filename)

        # Load the image using OpenCV
        try:
            image = cv2.imread(keyframe_path)
            if image is None:
                raise ValueError(f"Unable to load image {keyframe_path}")
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        except Exception as e:
            print(f"Error reading image {keyframe_path}: {e}")
            continue  # Skip to the next image if there's an error

        # Extract dominant colors from the image using KMeans
        dominant_colors = extract_dominant_colors(image)

        # Save the result to a JSON file
        output_path = os.path.join(output_file_path, f'{os.path.splitext(filename)[0]}.json')
        with open(output_path, 'w') as output_file:
            json.dump(dominant_colors, output_file)

        print(f"Saved color extraction result for {filename} to {output_path}")

Saved color extraction result for keyframe_58.825433.jpg to /Users/huyenphung/Desktop/senior-project/keyframe_infomaton/color_metadata/L00/keyframe_58.825433.json
Saved color extraction result for keyframe_77.811067.jpg to /Users/huyenphung/Desktop/senior-project/keyframe_infomaton/color_metadata/L00/keyframe_77.811067.json
Saved color extraction result for keyframe_88.988900.jpg to /Users/huyenphung/Desktop/senior-project/keyframe_infomaton/color_metadata/L00/keyframe_88.988900.json
Saved color extraction result for keyframe_0.000000.jpg to /Users/huyenphung/Desktop/senior-project/keyframe_infomaton/color_metadata/L00/keyframe_0.000000.json
Saved color extraction result for keyframe_3.603600.jpg to /Users/huyenphung/Desktop/senior-project/keyframe_infomaton/color_metadata/L00/keyframe_3.603600.json
Saved color extraction result for keyframe_73.573500.jpg to /Users/huyenphung/Desktop/senior-project/keyframe_infomaton/color_metadata/L00/keyframe_73.573500.json
Saved color extraction res