#### **Import Libraries**

In [2]:
import os
import cv2
import json
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from skimage.color import rgb2lab, deltaE_cie76

In [3]:
%cd D:/AIC2024/dataset

D:\AIC2024\dataset


#### **Parsing Data Path**

In [4]:
def parse_data_path(feature_dir='./distilled_keyframe'):
    all_feature_paths = dict()
    for feature_part in sorted(os.listdir(feature_dir)):
        all_feature_paths[feature_part] = dict()
    for feature_part in sorted(all_feature_paths.keys()):
        feature_part_path = f'{feature_dir}/{feature_part}'
        feature_paths = sorted(os.listdir(feature_part_path))
        feature_ids = [feature_path.split('.')[0] for feature_path in feature_paths]
        for feature_id, feature_path in zip(feature_ids, feature_paths):
            feature_path_full = f'{feature_part_path}/{feature_path}'
            all_feature_paths[feature_part][feature_id] = feature_path_full
    return all_feature_paths

def calculate_centroid(points):
    if points.size == 0:
        raise ValueError("Tập dữ liệu không được để trống.")
    # Tính toán trung bình theo từng chiều
    centroid = np.mean(points, axis=0)
    return centroid

In [None]:
all_keyframe_paths = parse_data_path(feature_dir='./distilled_keyframe')

#### **Creating Color Palette**

In [5]:
color_palette = []
id2color = []

color_palette_dir = './color_palette'
for label in os.listdir(color_palette_dir):
    label_dir = f'{color_palette_dir}/{label}'
    color_dataset = []
    for color_path in os.listdir(label_dir):
        color_path = f'{label_dir}/{color_path}'
        color_img = cv2.imread(color_path)
        color_img = cv2.cvtColor(color_img, cv2.COLOR_BGR2RGB)
        color_dataset.append(color_img[0, 0, :].reshape(-1, 3))
    centroid = calculate_centroid(np.array(color_dataset))
    color_palette.append(centroid)
    id2color.append(label)

color_palette = rgb2lab(np.uint8(np.asarray(color_palette))).reshape(-1, 3)
id2color = np.asarray(id2color)

print('Color Palette:', color_palette)
print('Id2Color:', id2color)

Color Palette: [[  3.19739074   0.894756    -0.63929314]
 [ 45.20923058  10.17622654 -54.15436811]
 [ 40.0129862   19.37278276  28.11057534]
 [ 56.15203379 -50.01876545  46.1682887 ]
 [ 52.63406311  -0.74927434  -0.25947839]
 [ 62.18897587  53.96934336  68.49116015]
 [ 42.33080498  66.11269551  48.86185425]
 [ 40.23054752  64.68849953 -57.6744468 ]
 [ 86.7712191    0.33727719   0.12389392]
 [ 91.59772204 -13.33452778  77.03914845]]
Id2Color: ['black' 'blue' 'brown' 'green' 'grey' 'orange' 'red' 'violet' 'white'
 'yellow']


#### **Color Textual Encoder**

In [11]:
class ColorTextualEncoder:

    def __init__(self, color_palette, id2color, row_str=None, col_str=None):
        self.color_palette = color_palette
        self.id2color = id2color
        self.row_str = row_str or ['0', '1', '2', '3', '4', '5', '6']
        self.col_str = col_str or ['a', 'b', 'c', 'd', 'e', 'f', 'g']
        self.x_pts = np.linspace(0, 1, len(self.row_str) + 1)
        self.y_pts = np.linspace(0, 1, len(self.col_str) + 1)
        self.grid_bboxes, self.grid_labels = self.initialize_grid_bboxes()

    def initialize_grid_bboxes(self):
        grid_bboxes, grid_labels = [], []
        for row, row_label in enumerate(self.row_str):
            for col, col_label in enumerate(self.col_str):
                bbox = [self.x_pts[col], self.y_pts[row], self.x_pts[col + 1], self.y_pts[row + 1]]
                grid_bboxes.append(bbox)
                grid_labels.append(f"{col_label}{row_label}")
        return np.array(grid_bboxes), grid_labels
    
    def visual_grid_bboxes(self, image):
        if image is None:
            image = np.zeros((210, 210, 3), dtype=np.uint8)
        grid_image = image.copy()
        h, w, _ = grid_image.shape

        for bbox, label in zip(self.grid_bboxes, self.grid_labels):
            x_start, y_start, x_end, y_end = (np.array(bbox) * [w, h, w, h]).astype(int)
            grid_image = cv2.putText(
                grid_image, label, (x_start + (x_end - x_start) // 2 - 10, y_start + (y_end - y_start) // 2),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA
            )
            grid_image = cv2.rectangle(grid_image, (x_start, y_start), (x_end - 1, y_end - 1), (255, 255, 0), 1)
        plt.imshow(cv2.cvtColor(grid_image, cv2.COLOR_RGB2BGR))

    def distances_to_probabilities_inverse(self, distances):
        inverse_distances = 1 / (distances + 1e-5)
        return inverse_distances / np.sum(inverse_distances, axis=1, keepdims=True)


    def predict_pixels_color(self, deltaE_cie76_diff):
        two_most_color_indices = np.argsort(deltaE_cie76_diff, axis=1)[:, :2]
        two_most_color_distances = np.take_along_axis(deltaE_cie76_diff, two_most_color_indices, axis=1)
        probabilities = self.distances_to_probabilities_inverse(two_most_color_distances)
        filtered_mask = np.ones(probabilities.shape, dtype=bool)
        filtered_mask[:, 1] = (probabilities[:, 1] / probabilities[:, 0]) > 0.5
        return self.id2color[two_most_color_indices[filtered_mask]]

    def match_image_to_pallete(self, RGB_image, threshold = 0.07):
        # Converts RGB to CIE 1976 L*a*b values. The CIELAB color space
        # (also known as CIE L*a*b*) is a color space defined by the
        n_pixels = RGB_image.shape[0] * RGB_image.shape[1]
        lab_image = rgb2lab(np.uint8(np.asarray(RGB_image))).reshape(-1, 1, 3)
        # Euclidean distance between two points in Lab color space.
        deltaE_cie76_diff = deltaE_cie76(lab_image, self.color_palette)
        color_prediction = np.array(self.predict_pixels_color(deltaE_cie76_diff))
        color_labels, color_frequencies = np.unique(color_prediction, return_counts=True)
        selected_colors = color_labels[color_frequencies > n_pixels*threshold]
        return selected_colors

    def textual_encoding_color_bboxes(self, image):
        h, w, _ = image.shape
        encoding_color = []
        
        for bbox, label in zip(self.grid_bboxes, self.grid_labels):
            x0, y0, x1, y1 = (np.array(bbox) * [w, h, w, h]).astype(int)
            matched_colors = self.match_image_to_pallete(image[y0:y1, x0:x1, :])
            for color in matched_colors:
                encoding_color.append(label + color)
        return ' '.join(sorted(encoding_color))

    def textual_encoding_color_classes(self, image):
        h, w, _ = image.shape
        encoding_color = []
        
        for bbox in self.grid_bboxes:
            x0, y0, x1, y1 = (np.array(bbox) * [w, h, w, h]).astype(int)
            crop_image = image[y0:y1, x0:x1, :]
            matched_colors = self.match_image_to_pallete(crop_image)
            encoding_color.extend(matched_colors)
        return ' '.join(sorted(encoding_color))

In [None]:
color_encoder = ColorTextualEncoder(color_palette, id2color)
color_encoder.visual_grid_bboxes(image=None)

#### **Color Bounding Box and Color Class**

In [None]:
def write_text_file(text, file_path):
    with open(file_path, 'w') as f:
        f.write(text)

def write_json_file(json_data, file_path):
    with open(file_path, 'w') as f:
        json.dump(json_data, f, ensure_ascii=False, indent=4)

def sorted_by_id(keyframe_paths):
    id_path_keyframes = []
    for keyframe_path in keyframe_paths:
        keyframe_filename = keyframe_path.split('/')[-1]
        keyframe_id = int(keyframe_filename.split('.')[0])
        id_path_keyframes.append((keyframe_id, keyframe_path))
    sorted_id_path_keyframes = sorted(id_path_keyframes, key=lambda id_path: id_path[0])
    return [id_path[1] for id_path in sorted_id_path_keyframes]

In [None]:
def encode_metadata(image_path):
    image = cv2.imread(image_path)
    RGB_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    encoded_bbox = color_encoder.textual_encoding_color_bboxes(RGB_image)
    encoded_class = color_encoder.textual_encoding_color_classes(RGB_image)
    return {
        'color_bbox': encoded_bbox,
        'color_class': encoded_class
    }

In [None]:
save_dir = './metadata/color/features'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

for video_part, video_path_dict in all_keyframe_paths.items():
    full_save_dir = save_dir + '/' + video_part
    os.makedirs(full_save_dir, exist_ok=True)
    video_ids = video_path_dict.keys()
    for video_id in tqdm(video_ids, desc=f'Encoding Part {video_part}'):

        video_id_metadata_records = {}
        video_id_path = video_path_dict[video_id]
        keyframe_image_paths = [video_id_path + '/' + keyframe_image_path for keyframe_image_path in os.listdir(video_id_path)]
        for keyframe_image_path in sorted_by_id(keyframe_image_paths):
            video_id_metadata_records[keyframe_image_path] = encode_metadata(keyframe_image_path)
        write_json_file(video_id_metadata_records, os.path.join(full_save_dir, f'{video_id}.json'))