In [None]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
import os
import csv

In [None]:
def imread_unicode(path):
    try:
        stream = np.fromfile(path, dtype=np.uint8)
        image = cv2.imdecode(stream, cv2.IMREAD_COLOR)
        return image
    except Exception as e:
        print(f"[LỖI imread_unicode] {path}: {e}")
        return None

In [None]:
def extract_hsv_histogram(image, h_bins=18, s_bins=8, v_bins=8):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist_h = cv2.calcHist([hsv], [0], None, [h_bins], [0, 180])
    hist_s = cv2.calcHist([hsv], [1], None, [s_bins], [0, 256])
    hist_v = cv2.calcHist([hsv], [2], None, [v_bins], [0, 256])
    hist = np.concatenate([hist_h, hist_s, hist_v]).flatten()
    hist /= hist.sum()
    return hist

In [None]:
def extract_dominant_colors(image, k=3):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pixels = image_rgb.reshape(-1, 3)

    kmeans = KMeans(n_clusters=k, n_init=10)
    kmeans.fit(pixels)

    colors = kmeans.cluster_centers_
    counts = np.bincount(kmeans.labels_)
    percentages = counts / counts.sum()

    features = []
    for i in range(k):
        features.extend(colors[i])        # R, G, B
        features.append(percentages[i])   # Tỉ lệ %
    return np.array(features)

In [None]:
def process_image(image_path, hsv_bins=(18, 8, 8), kmeans_k=3):
    image = imread_unicode(image_path)
    if image is None:
        print(f"[LỖI] Không đọc được ảnh: {image_path}")
        return None
    image = cv2.resize(image, (600, 400))
    hsv_feat = extract_hsv_histogram(image, *hsv_bins)
    kmeans_feat = extract_dominant_colors(image, k=kmeans_k)
    return np.concatenate([hsv_feat, kmeans_feat])

In [None]:
def extract_features_from_folder(folder_path, output_csv):
    header = [f"hsv_{i}" for i in range(18 + 8 + 8)]  # HSV = 34 chiều
    header += [f"kmeans_r{i//4+1}" if i % 4 == 0 else
               f"kmeans_g{i//4+1}" if i % 4 == 1 else
               f"kmeans_b{i//4+1}" if i % 4 == 2 else
               f"kmeans_p{i//4+1}" for i in range(3 * 4)]  # K-means = 12 chiều
    header = ['filename'] + header

    with open(output_csv, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(header)

        for file in os.listdir(folder_path):
            filename = os.fsdecode(file)
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                path = os.path.join(folder_path, filename)
                features = process_image(path)
                if features is not None:
                    writer.writerow([filename] + features.tolist())
                    print(f"✔ Đã xử lý: {filename}")
                else:
                    print(f"✘ Lỗi xử lý: {filename}")

In [None]:
# Đường dẫn đến thư mục chứa ảnh và file CSV đầu ra
folder_path = "../../data"  # hoặc chỉnh đường dẫn tùy vào cấu trúc của bạn
output_csv = "features.csv"

extract_features_from_folder(folder_path, output_csv)