In [1]:
import os
import cv2
import numpy as np
from glob import glob

def load_uc_merced_dataset(base_path="/kaggle/input/image-retrivel/UCMerced_LandUse"):
    data = []
    labels = []
    classes = sorted(os.listdir(base_path))
    
    for label in classes:
        img_paths = glob(os.path.join(base_path, label, '*.tif'))
        for path in img_paths:
            img = cv2.imread(path)
            img = cv2.resize(img, (256, 256))
            data.append(img)
            labels.append(label)
    
    return np.array(data), np.array(labels)

In [7]:
def extract_color_histogram(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, [8, 8, 8],
                        [0, 180, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

In [8]:
# 🌾 Texture Features (GLCM)
def extract_texture_glcm(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    glcm = graycomatrix(gray, [1], [0, np.pi/4, np.pi/2], 256, symmetric=True, normed=True)
    features = [graycoprops(glcm, prop).flatten() for prop in 
                ['contrast', 'correlation', 'energy', 'homogeneity']]
    return np.concatenate(features)

In [9]:
# Shape Features (Hu Moments)
def extract_shape_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        largest = max(contours, key=cv2.contourArea)
        moments = cv2.HuMoments(cv2.moments(largest)).flatten()
        # Log scale transform with sign
        return -np.sign(moments) * np.log10(np.abs(moments) + 1e-10)
    else:
        return np.zeros(7)

# Combine all features into one hybrid vector
def build_feature_vector(image):
    color = extract_color_histogram(image)
    texture = extract_texture_glcm(image)
    shape = extract_shape_features(image)
    return np.concatenate([color, texture, shape])

In [12]:
# Step 3: Main Execution
if __name__ == "__main__":
    print("🔄 Loading dataset...")
    images, labels = load_uc_merced_dataset("/kaggle/input/image-retrivel/UCMerced_LandUse/Images")

    print("🧠 Extracting hybrid features...")
    feature_vectors = []
    for img in images:
        feat = build_feature_vector(img)
        feature_vectors.append(feat)

    features = np.array(feature_vectors)
    print(f"✅ Done. Feature matrix shape: {features.shape}")

🔄 Loading dataset...
🧠 Extracting hybrid features...
✅ Done. Feature matrix shape: (2100, 531)
