In [None]:
import os
import glob
import torch
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.cluster import KMeans

: 

In [None]:
# Directory containing the keyframes
keyframes_dir = '/Users/vihuynh1301/Documents/GitHub/SeniorProject/keyframe_infomaton/keyframe'
all_keyframe_paths = dict()

# Parsing keyframe paths
for part in sorted(os.listdir(keyframes_dir)):
    data_part = part.split('_')[0]  # e.g., "part_01" -> "part"
    all_keyframe_paths[data_part] = dict()

for data_part in sorted(all_keyframe_paths.keys()):
    data_part_path = f'{keyframes_dir}/{data_part}'
    video_dirs = sorted(os.listdir(data_part_path))
    video_ids = [video_dir.split('_')[-1] for video_dir in video_dirs]
    for video_id, video_dir in zip(video_ids, video_dirs):
        keyframe_paths = sorted(glob.glob(f'{data_part_path}/{video_dir}/*.jpg'))
        all_keyframe_paths[data_part][video_id] = keyframe_paths

: 

In [None]:
# Custom color palette (you can adjust it as per your requirement)
color_palette = {
    'black': [0, 0, 0],
    'blue': [0, 0, 255],
    'brown': [150, 75, 0],
    'green': [0, 128, 0],
    'grey': [128, 128, 128],
    'orange': [255, 165, 0],
    'pink': [255, 192, 203],
    'purple': [128, 0, 128],
    'red': [255, 0, 0],
    'white': [255, 255, 255],
    'yellow': [255, 255, 0]
}

# Helper function to match RGB values to the closest color in the palette
def get_closest_color_name(r, g, b):
    min_distance = float('inf')
    closest_color = None

    for color_name, color_rgb in color_palette.items():
        distance = (r - color_rgb[0]) ** 2 + (g - color_rgb[1]) ** 2 + (b - color_rgb[2]) ** 2
        if distance < min_distance:
            min_distance = distance
            closest_color = color_name

    return closest_color

# Function to extract dominant colors using K-Means clustering
def get_image_colors(image_path, num_clusters=5):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.reshape((image.shape[0] * image.shape[1], 3))  # Reshape the image to be a list of pixels

    # Use KMeans to cluster pixel colors
    kmeans = KMeans(n_clusters=num_clusters)
    kmeans.fit(image)

    dominant_colors = kmeans.cluster_centers_.astype(int)

    # Map the dominant colors to the closest colors in the custom palette
    color_context = [get_closest_color_name(r, g, b) for r, g, b in dominant_colors]
    
    return color_context

: 

In [None]:
# Create output directories
save_dir_all = 'context_encoded'
if not os.path.exists(save_dir_all):
    os.mkdir(save_dir_all)

save_dir = f'{save_dir_all}/colors_encoded'
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

# Main loop to process keyframes and extract color context
for key, video_keyframe_paths in tqdm(all_keyframe_paths.items()):
    video_ids = sorted(video_keyframe_paths.keys())
    if not os.path.exists(os.path.join(save_dir, key)):
        os.mkdir(os.path.join(save_dir, key))

    for video_id in tqdm(video_ids):
        video_color_contexts = []
        video_keyframe_path = video_keyframe_paths[video_id]

        for image_path in video_keyframe_path:
            # Use K-means to get the dominant colors from the image
            context = get_image_colors(image_path)
            context_str = ' '.join(context)  # Join the color names into a string
            video_color_contexts.append(context_str)

        # Error check
        if len(video_color_contexts) != len(video_keyframe_path):
            print("Something went wrong!!!!!")
            break

        # Save the video color context to a text file
        with open(f"{save_dir}/{key}/{video_id}.txt", "w") as f:
            for item in video_color_contexts:
                f.write("%s\n" % item)

print("Color extraction completed!")

: 