In [None]:
import os
import numpy as np
import cv2
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from skimage.feature import local_binary_pattern
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from scipy.spatial.distance import canberra

class CBIRSystem:
    def __init__(self, database_path, patch_size=128, n_color_clusters=64):
        """
        Inisialisasi sistem CBIR
        
        Parameters:
            database_path (str): Path ke folder database Brodatz
            patch_size (int): Ukuran potongan citra (default: 128)
            n_color_clusters (int): Jumlah cluster untuk kuantisasi warna (default: 64)
        """
        self.database_path = database_path
        self.patch_size = patch_size
        self.n_color_clusters = n_color_clusters
        self.image_descriptors = []
        self.image_patches = []
        self.class_labels = []
        self.patch_indices = []  # Untuk tracking patch dari citra mana
        self.kmeans_color = None
        self.feature_scaler = None
        self.nn_model = None
        
    def load_and_preprocess_images(self):
        """Memuat dan memproses citra dari database"""
        print("Memuat dan memproses citra dari database...")
        
        # Daftar semua file citra dalam format D{nomor}_COLORED.tif
        image_files = sorted([f for f in os.listdir(self.database_path) 
                           if f.endswith('_COLORED.tif') and f.startswith('D')])
        
        # Kumpulkan semua pixel untuk training K-Means
        all_pixels = []
        
        for class_idx, img_file in enumerate(image_files):
            img_path = os.path.join(self.database_path, img_file)
            img = cv2.imread(img_path)
            
            if img is None:
                print(f"Gagal memuat citra: {img_file}")
                continue
                
            # Resize citra jika diperlukan
            if img.shape[0] != 640 or img.shape[1] != 640:
                img = cv2.resize(img, (640, 640))
                
            # Split citra menjadi 25 patch (5x5 grid)
            for i in range(5):
                for j in range(5):
                    y_start = i * self.patch_size
                    y_end = y_start + self.patch_size
                    x_start = j * self.patch_size
                    x_end = x_start + self.patch_size
                    
                    patch = img[y_start:y_end, x_start:x_end]
                    self.image_patches.append(patch)
                    self.class_labels.append(class_idx)
                    self.patch_indices.append((class_idx, i, j))
                    
                    # Kumpulkan pixel untuk training K-Means
                    pixels = patch.reshape(-1, 3).astype(np.float32)
                    sample_size = min(1000, len(pixels))  # Ambil sampel dari setiap patch
                    sample_indices = np.random.choice(len(pixels), sample_size, replace=False)
                    all_pixels.extend(pixels[sample_indices])
        
        # Training K-Means dengan pixel dari semua patch
        print("Training K-Means untuk kuantisasi warna...")
        all_pixels = np.array(all_pixels)
        sample_size = min(100000, len(all_pixels))
        sample_indices = np.random.choice(len(all_pixels), sample_size, replace=False)
        sample_pixels = all_pixels[sample_indices]
        
        self.kmeans_color = KMeans(n_clusters=self.n_color_clusters, random_state=42, n_init=10)
        self.kmeans_color.fit(sample_pixels)
        
        # Ekstrak fitur dari semua patch
        print("Ekstraksi fitur dari semua patch...")
        for patch in self.image_patches:
            features = self.extract_features(patch)
            self.image_descriptors.append(features)
                    
        self.image_descriptors = np.array(self.image_descriptors)
        self.class_labels = np.array(self.class_labels)
        print(f"Berhasil memuat {len(self.image_patches)} patch citra dari {len(image_files)} kelas.")
        
    def extract_features(self, image):
        """Mengekstrak fitur LBP dan warna dari citra"""
        # Konversi ke grayscale untuk LBP
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        # 1. Ekstrak fitur LBP dengan parameter yang lebih baik
        radius = 2
        n_points = 8 * radius
        lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
        
        # Hitung histogram LBP
        n_bins = n_points + 2  # uniform bins + non-uniform bin
        lbp_hist, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins))
        lbp_hist = lbp_hist.astype(float)
        lbp_hist /= (lbp_hist.sum() + 1e-8)  # Normalisasi dengan epsilon untuk stabilitas
        
        # 2. Hitung Maximum Run Length dari LBP
        max_run_length = self.calculate_max_run_length(lbp)
        
        # 3. Ekstrak fitur warna menggunakan K-Means
        color_features = self.extract_color_features(image)
        
        # 4. Ekstrak fitur statistik tambahan
        mean_colors = np.mean(image.reshape(-1, 3), axis=0)
        std_colors = np.std(image.reshape(-1, 3), axis=0)
        
        # Gabungkan semua fitur
        features = np.concatenate([
            lbp_hist,               # Histogram LBP
            [max_run_length],       # Maximum run length
            color_features,         # Histogram warna (K-Means)
            mean_colors,            # Rata-rata warna
            std_colors              # Standar deviasi warna
        ])
        
        return features
        
    def calculate_max_run_length(self, lbp_image):
        """Menghitung maximum run length dari citra LBP"""
        max_run = 0
        rows, cols = lbp_image.shape
        
        # Periksa arah horizontal
        for i in range(rows):
            current_val = lbp_image[i, 0]
            current_run = 1
            
            for j in range(1, cols):
                if lbp_image[i, j] == current_val:
                    current_run += 1
                else:
                    max_run = max(max_run, current_run)
                    current_val = lbp_image[i, j]
                    current_run = 1
            max_run = max(max_run, current_run)
                    
        # Periksa arah vertikal
        for j in range(cols):
            current_val = lbp_image[0, j]
            current_run = 1
            
            for i in range(1, rows):
                if lbp_image[i, j] == current_val:
                    current_run += 1
                else:
                    max_run = max(max_run, current_run)
                    current_val = lbp_image[i, j]
                    current_run = 1
            max_run = max(max_run, current_run)
                    
        return max_run
        
    def extract_color_features(self, image):
        """Mengekstrak fitur warna menggunakan K-Means"""
        # Ubah bentuk citra menjadi array pixel
        pixels = image.reshape(-1, 3).astype(np.float32)
        
        # Prediksi cluster untuk semua pixel
        clusters = self.kmeans_color.predict(pixels)
        
        # Buat histogram warna
        hist, _ = np.histogram(clusters, bins=self.n_color_clusters, range=(0, self.n_color_clusters-1))
        hist = hist.astype(float)
        hist /= (hist.sum() + 1e-8)  # Normalisasi dengan epsilon
        
        return hist
    
    def visualize_color_features(self, image, show_details=True):
        """Visualisasi fitur warna dari sebuah citra"""
        # Ekstrak fitur warna
        color_hist = self.extract_color_features(image)
        mean_colors = np.mean(image.reshape(-1, 3), axis=0)
        std_colors = np.std(image.reshape(-1, 3), axis=0)
        
        # Dapatkan warna representatif dari cluster centers
        cluster_colors = self.kmeans_color.cluster_centers_
        
        if show_details:
            print("=== FITUR WARNA ===")
            print(f"Histogram warna (K-Means clusters): {len(color_hist)} bins")
            print(f"Top 10 cluster dengan frekuensi tertinggi:")
            top_indices = np.argsort(color_hist)[-10:][::-1]
            for i, idx in enumerate(top_indices):
                color = cluster_colors[idx].astype(int)
                print(f"  Cluster {idx}: {color_hist[idx]:.4f} (BGR: {color})")
            
            print(f"\nRata-rata warna BGR: [{mean_colors[0]:.2f}, {mean_colors[1]:.2f}, {mean_colors[2]:.2f}]")
            print(f"Standar deviasi BGR: [{std_colors[0]:.2f}, {std_colors[1]:.2f}, {std_colors[2]:.2f}]")
        
        # Visualisasi
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        # 1. Citra asli
        axes[0,0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        axes[0,0].set_title('Citra Asli')
        axes[0,0].axis('off')
        
        # 2. Histogram distribusi cluster
        axes[0,1].bar(range(len(color_hist)), color_hist, alpha=0.7)
        axes[0,1].set_title('Distribusi Cluster Warna')
        axes[0,1].set_xlabel('Cluster ID')
        axes[0,1].set_ylabel('Frekuensi Normalisasi')
        axes[0,1].grid(True, alpha=0.3)
        
        # 3. Palet warna dari cluster centers (top 20)
        top_20_indices = np.argsort(color_hist)[-20:][::-1]
        palette = np.zeros((50, len(top_20_indices)*20, 3), dtype=np.uint8)
        
        for i, idx in enumerate(top_20_indices):
            color = cluster_colors[idx].astype(np.uint8)
            palette[:, i*20:(i+1)*20] = color
            
        axes[1,0].imshow(palette)
        axes[1,0].set_title('Palet Warna (Top 20 Cluster)')
        axes[1,0].axis('off')
        
        # 4. Visualisasi citra berdasarkan cluster
        pixels = image.reshape(-1, 3).astype(np.float32)
        clusters = self.kmeans_color.predict(pixels)
        
        # Buat citra dengan warna cluster centers
        clustered_image = cluster_colors[clusters].reshape(image.shape).astype(np.uint8)
        axes[1,1].imshow(cv2.cvtColor(clustered_image, cv2.COLOR_BGR2RGB))
        axes[1,1].set_title('Citra Hasil Kuantisasi K-Means')
        axes[1,1].axis('off')
        
        plt.tight_layout()
        plt.show()
        
        return color_hist, mean_colors, std_colors
    
    def compare_color_features(self, image1, image2):
        """Membandingkan fitur warna antara dua citra"""
        # Ekstrak fitur warna dari kedua citra
        color_hist1 = self.extract_color_features(image1)
        color_hist2 = self.extract_color_features(image2)
        
        mean_colors1 = np.mean(image1.reshape(-1, 3), axis=0)
        mean_colors2 = np.mean(image2.reshape(-1, 3), axis=0)
        
        # Hitung jarak
        hist_distance = np.sum(np.abs(color_hist1 - color_hist2) / (color_hist1 + color_hist2 + 1e-8))
        mean_distance = np.linalg.norm(mean_colors1 - mean_colors2)
        
        print("=== PERBANDINGAN FITUR WARNA ===")
        print(f"Jarak histogram warna (Canberra): {hist_distance:.4f}")
        print(f"Jarak rata-rata warna (Euclidean): {mean_distance:.4f}")
        
        # Visualisasi perbandingan
        fig, axes = plt.subplots(2, 3, figsize=(18, 10))
        
        # Citra 1
        axes[0,0].imshow(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB))
        axes[0,0].set_title('Citra 1')
        axes[0,0].axis('off')
        
        # Citra 2
        axes[0,1].imshow(cv2.cvtColor(image2, cv2.COLOR_BGR2RGB))
        axes[0,1].set_title('Citra 2')
        axes[0,1].axis('off')
        
        # Perbandingan histogram
        x = np.arange(len(color_hist1))
        width = 0.35
        axes[0,2].bar(x - width/2, color_hist1, width, label='Citra 1', alpha=0.7)
        axes[0,2].bar(x + width/2, color_hist2, width, label='Citra 2', alpha=0.7)
        axes[0,2].set_title('Perbandingan Histogram Warna')
        axes[0,2].set_xlabel('Cluster ID')
        axes[0,2].set_ylabel('Frekuensi')
        axes[0,2].legend()
        axes[0,2].grid(True, alpha=0.3)
        
        # Kuantisasi citra 1
        pixels1 = image1.reshape(-1, 3).astype(np.float32)
        clusters1 = self.kmeans_color.predict(pixels1)
        clustered_image1 = self.kmeans_color.cluster_centers_[clusters1].reshape(image1.shape).astype(np.uint8)
        axes[1,0].imshow(cv2.cvtColor(clustered_image1, cv2.COLOR_BGR2RGB))
        axes[1,0].set_title('Kuantisasi Citra 1')
        axes[1,0].axis('off')
        
        # Kuantisasi citra 2
        pixels2 = image2.reshape(-1, 3).astype(np.float32)
        clusters2 = self.kmeans_color.predict(pixels2)
        clustered_image2 = self.kmeans_color.cluster_centers_[clusters2].reshape(image2.shape).astype(np.uint8)
        axes[1,1].imshow(cv2.cvtColor(clustered_image2, cv2.COLOR_BGR2RGB))
        axes[1,1].set_title('Kuantisasi Citra 2')
        axes[1,1].axis('off')
        
        # Perbedaan histogram
        hist_diff = np.abs(color_hist1 - color_hist2)
        axes[1,2].bar(range(len(hist_diff)), hist_diff, alpha=0.7, color='red')
        axes[1,2].set_title('Perbedaan Histogram')
        axes[1,2].set_xlabel('Cluster ID')
        axes[1,2].set_ylabel('|Frekuensi1 - Frekuensi2|')
        axes[1,2].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        return hist_distance, mean_distance
        
    def modified_canberra_distance(self, x, y):
        """Implementasi jarak Canberra yang dimodifikasi"""
        numerator = np.abs(x - y)
        denominator = np.abs(x) + np.abs(y) + 1e-8  # Tambahkan epsilon untuk stabilitas
        return np.sum(numerator / denominator)
        
    def train_model(self, distance_metric='canberra'):
        """Melatih model untuk pencarian citra"""
        print("Melatih model CBIR...")
        
        # Standarisasi fitur
        self.feature_scaler = StandardScaler()
        scaled_features = self.feature_scaler.fit_transform(self.image_descriptors)
        
        # Buat model Nearest Neighbors
        if distance_metric == 'canberra':
            self.nn_model = NearestNeighbors(n_neighbors=50, metric='canberra')
        elif distance_metric == 'euclidean':
            self.nn_model = NearestNeighbors(n_neighbors=50, metric='euclidean')
        else:  # manhattan
            self.nn_model = NearestNeighbors(n_neighbors=50, metric='manhattan')
            
        self.nn_model.fit(scaled_features)
        print(f"Model berhasil dilatih menggunakan metric: {distance_metric}")
        
    def query_image(self, query_image_path, k=10):
        """Mencari citra yang mirip dengan citra query"""
        query_img = cv2.imread(query_image_path)
        if query_img is None:
            print("Error: Gagal memuat citra query")
            return []
            
        # Resize citra query jika diperlukan
        if query_img.shape[0] != self.patch_size or query_img.shape[1] != self.patch_size:
            query_img = cv2.resize(query_img, (self.patch_size, self.patch_size))
            
        # Ekstrak fitur
        query_features = self.extract_features(query_img)
        scaled_features = self.feature_scaler.transform([query_features])
        
        # Cari citra yang mirip
        distances, indices = self.nn_model.kneighbors(scaled_features, n_neighbors=k)
        
        # Kembalikan hasil
        results = []
        for i, dist in zip(indices[0], distances[0]):
            results.append({
                'patch': self.image_patches[i],
                'distance': dist,
                'class': self.class_labels[i]
            })
            
        return results
        
    def analyze_feature_importance(self):
        """Analisis pentingnya fitur"""
        features = self.image_descriptors
        
        # Hitung statistik fitur
        feature_means = np.mean(features, axis=0)
        feature_stds = np.std(features, axis=0)
        
        # Identifikasi komponen fitur
        lbp_size = 18  # n_points + 2 untuk uniform LBP
        run_length_size = 1
        color_size = self.n_color_clusters
        mean_color_size = 3
        std_color_size = 3
        
        print("=== ANALISIS FITUR ===")
        print(f"- LBP Histogram: {lbp_size} fitur")
        print(f"- Maximum Run Length: {run_length_size} fitur")
        print(f"- Color Histogram: {color_size} fitur")
        print(f"- Mean Colors: {mean_color_size} fitur")
        print(f"- Std Colors: {std_color_size} fitur")
        print(f"Total fitur: {len(feature_means)}")
        
        # Analisis fitur warna secara khusus
        start_idx = lbp_size + run_length_size
        color_features = features[:, start_idx:start_idx+color_size]
        mean_color_features = features[:, start_idx+color_size:start_idx+color_size+mean_color_size]
        std_color_features = features[:, start_idx+color_size+mean_color_size:]
        
        print(f"\n=== STATISTIK FITUR WARNA ===")
        print(f"Histogram warna - Rata-rata: {np.mean(color_features):.4f}, Std: {np.std(color_features):.4f}")
        print(f"Mean colors - Rata-rata: {np.mean(mean_color_features, axis=0)}")
        print(f"Std colors - Rata-rata: {np.mean(std_color_features, axis=0)}")

# Contoh penggunaan untuk visualisasi fitur warna
def demo_color_features():
    """Demonstrasi fitur warna"""
    # Buat citra contoh jika tidak ada database
    # Atau gunakan database yang ada
    
    # Contoh dengan citra sintetis
    np.random.seed(42)
    
    # Citra dengan dominasi warna merah
    red_image = np.zeros((128, 128, 3), dtype=np.uint8)
    red_image[:, :, 2] = np.random.randint(150, 255, (128, 128))  # Red channel
    red_image[:, :, 1] = np.random.randint(0, 50, (128, 128))    # Green channel
    red_image[:, :, 0] = np.random.randint(0, 50, (128, 128))    # Blue channel
    
    # Citra dengan dominasi warna biru
    blue_image = np.zeros((128, 128, 3), dtype=np.uint8)
    blue_image[:, :, 0] = np.random.randint(150, 255, (128, 128))  # Blue channel
    blue_image[:, :, 1] = np.random.randint(0, 50, (128, 128))    # Green channel
    blue_image[:, :, 2] = np.random.randint(0, 50, (128, 128))    # Red channel
    
    # Inisialisasi sistem CBIR
    cbir = CBIRSystem("./dummy", n_color_clusters=16)  # Gunakan cluster lebih sedikit untuk demo
    
    # Training K-Means dengan pixel dari kedua citra
    all_pixels = []
    all_pixels.extend(red_image.reshape(-1, 3))
    all_pixels.extend(blue_image.reshape(-1, 3))
    
    cbir.kmeans_color = KMeans(n_clusters=16, random_state=42, n_init=10)
    cbir.kmeans_color.fit(np.array(all_pixels, dtype=np.float32))
    
    print("=== DEMO FITUR WARNA ===")
    
    # Visualisasi fitur warna citra merah
    print("\n1. Analisis Citra Dominasi Merah:")
    color_hist_red, mean_red, std_red = cbir.visualize_color_features(red_image)
    
    # Visualisasi fitur warna citra biru
    print("\n2. Analisis Citra Dominasi Biru:")
    color_hist_blue, mean_blue, std_blue = cbir.visualize_color_features(blue_image)
    
    # Perbandingan kedua citra
    print("\n3. Perbandingan Fitur Warna:")
    hist_dist, mean_dist = cbir.compare_color_features(red_image, blue_image)

if __name__ == "__main__":
    # Jalankan demo fitur warna
    demo_color_features()


=== Analisis Fitur Warna ===


NameError: name 'cbir' is not defined