### Extracting features from 1200 images locally 

In [1]:
import os
import glob
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize
import joblib

In [2]:
# ----------------------------
# Step 1: Image Preprocessing and Local Descriptor Extraction
# ----------------------------

def preprocess_image(image_path, size=256):
    """
    Load an image in grayscale and resize it to size x size.
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError(f"Cannot load image: {image_path}")
    img_resized = cv2.resize(img, (size, size))
    return img_resized.astype(np.float32)

def compute_gradients(image):
    """
    Compute image gradients using the Sobel operator.
    (Gaussian derivatives can be used for more robustness.)
    """
    grad_x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
    magnitude = np.sqrt(grad_x**2 + grad_y**2)
    orientation = np.arctan2(grad_y, grad_x)
    orientation = np.mod(orientation, np.pi)  # Map angles to [0, π)
    return magnitude, orientation

def extract_local_descriptors(image, grid_size=(28, 28), patch_size_ratio=0.125,
                              num_spatial_bins=4, num_orientation_bins=4):
    """
    For each patch (sampled on a grid) in the image, subdivide it into
    (num_spatial_bins x num_spatial_bins) cells and compute a histogram of 
    gradient orientations (with num_orientation_bins bins) for each cell.
    Concatenate the histograms from all cells to yield a 64-dimensional descriptor.
    Returns an array of descriptors (one per patch).
    """
    magnitude, orientation = compute_gradients(image)
    h, w = image.shape
    descriptors = []
    # Generate grid points (centers) for patches
    xs = np.linspace(0, w-1, grid_size[1], dtype=int)
    ys = np.linspace(0, h-1, grid_size[0], dtype=int)
    patch_size = int(patch_size_ratio * w)  # e.g., ~32 pixels for a 256x256 image
    half_patch = patch_size // 2

    for y in ys:
        for x in xs:
            # Define patch boundaries (with border checks)
            x1 = max(x - half_patch, 0)
            x2 = min(x + half_patch, w)
            y1 = max(y - half_patch, 0)
            y2 = min(y + half_patch, h)
            patch_mag = magnitude[y1:y2, x1:x2]
            patch_orient = orientation[y1:y2, x1:x2]
            # Determine cell sizes within the patch
            cell_h = (y2 - y1) // num_spatial_bins
            cell_w = (x2 - x1) // num_spatial_bins
            descriptor = []
            # Iterate over cells
            for i in range(num_spatial_bins):
                for j in range(num_spatial_bins):
                    cy1 = y1 + i * cell_h
                    cy2 = cy1 + cell_h
                    cx1 = x1 + j * cell_w
                    cx2 = cx1 + cell_w
                    # Extract cell region
                    cell_orient = patch_orient[cy1 - y1:cy2 - y1, cx1 - x1:cx2 - x1]
                    cell_mag = patch_mag[cy1 - y1:cy2 - y1, cx1 - x1:cx2 - x1]
                    # Compute histogram for cell
                    hist, _ = np.histogram(cell_orient, bins=num_orientation_bins, 
                                           range=(0, np.pi), weights=cell_mag)
                    descriptor.extend(hist)
            descriptor = np.array(descriptor, dtype=np.float32)
            norm_val = np.linalg.norm(descriptor)
            if norm_val > 0:
                descriptor /= norm_val
            descriptors.append(descriptor)
    return np.array(descriptors)  # Shape: (num_patches, 64)



In [3]:
# ----------------------------
# Step 2: Build Visual Vocabulary using K-Means
# ----------------------------

def gather_all_descriptors(dataset_folder):
    """
    Iterate over all images in the dataset folder.
    Assumes that dataset_folder has subfolders for each category.
    Returns:
      - all_descriptors: an array of all local descriptors extracted.
      - image_paths: a list of image paths (for later feature extraction).
      - labels: a list of integer labels corresponding to each image.
    """
    all_descriptors = []
    image_paths = []
    labels = []
    categories = sorted(os.listdir(dataset_folder))
    # You may want to map category names to integer labels:
    label_dict = {cat: idx for idx, cat in enumerate(categories)}
    
    for cat in categories:
        cat_path = os.path.join(dataset_folder, cat)
        if not os.path.isdir(cat_path):
            continue
        # Process common image file types
        for ext in ['*.jpg', '*.png', '*.jpeg', '*.bmp']:
            files = glob.glob(os.path.join(cat_path, ext))
            for file in files:
                try:
                    img = preprocess_image(file)
                    desc = extract_local_descriptors(img)
                    # Append all descriptors from this image
                    all_descriptors.append(desc)
                    image_paths.append(file)
                    labels.append(label_dict[cat])
                except Exception as e:
                    print(f"Error processing {file}: {e}")
    # Concatenate all descriptors (each image returns an array of shape (num_patches, 64))
    all_descriptors = np.vstack(all_descriptors)
    return all_descriptors, image_paths, labels, label_dict

def build_vocabulary(all_descriptors, vocab_size=500, save_path='vocabulary.npy'):
    """
    Learn a visual vocabulary by applying k-means to the collected descriptors.
    Returns the vocabulary (a NumPy array of shape (vocab_size, descriptor_dim)).
    """
    print("Clustering descriptors to build vocabulary...")
    kmeans = KMeans(n_clusters=vocab_size, random_state=42, n_init=10)
    kmeans.fit(all_descriptors)
    vocabulary = kmeans.cluster_centers_
    np.save(save_path, vocabulary)
    print(f"Vocabulary saved to {save_path}")
    return vocabulary


In [4]:
# ----------------------------
# Step 3: Compute Global Image Feature (Histogram of Visual Words)
# ----------------------------
from scipy.spatial.distance import cdist

def soft_quantize_descriptors(descriptors, vocabulary, sigma=0.3):
    """
    For each descriptor, compute Gaussian weighted contributions to each vocabulary center,
    then sum up contributions into a histogram.
    """
    distances = cdist(descriptors, vocabulary, metric='euclidean')
    # Compute weights using a Gaussian kernel
    weights = np.exp(-distances**2 / (2 * sigma**2))
    # Normalize weights for each descriptor
    weights /= weights.sum(axis=1, keepdims=True)
    # Sum contributions over all descriptors to obtain histogram
    hist = weights.sum(axis=0)
    if hist.sum() > 0:
        hist /= hist.sum()
    return hist

def extract_image_feature(image, vocabulary):
    """
    For a given image, extract local descriptors and compute a global 500-D feature 
    vector by quantizing the descriptors with the visual vocabulary.
    """
    descriptors = extract_local_descriptors(image)
    feature_vector = soft_quantize_descriptors(descriptors, vocabulary)
    return feature_vector

def extract_features_dataset(dataset_folder, vocabulary):
    """
    Process all images in the dataset folder, extract their global features,
    and return:
      - features: an array of shape (num_images, vocab_size)
      - labels: a list of integer labels corresponding to each image.
    """
    features = []
    labels = []
    categories = sorted(os.listdir(dataset_folder))
    label_dict = {cat: idx for idx, cat in enumerate(categories)}
    
    for cat in categories:
        cat_path = os.path.join(dataset_folder, cat)
        if not os.path.isdir(cat_path):
            continue
        for ext in ['*.jpg', '*.png']:
            files = glob.glob(os.path.join(cat_path, ext))
            for file in files:
                try:
                    img = preprocess_image(file)
                    feat = extract_image_feature(img, vocabulary)
                    features.append(feat)
                    labels.append(label_dict[cat])
                except Exception as e:
                    print(f"Error processing {file}: {e}")
    features = np.array(features)
    return features, labels

In [5]:
# ----------------------------
# Main Function for Feature Extraction Pipeline
# ----------------------------

def main():
    dataset_folder = '../sketches'
    
    # gathering all local descriptors from the dataset
    print("Extracting local descriptors from dataset images...")
    all_descriptors, image_paths, image_labels, label_dict = gather_all_descriptors(dataset_folder)
    print(f"Collected {all_descriptors.shape[0]} descriptors from {len(image_paths)} images.")
    
    # building the visual vocabulary
    vocab_path = 'vocabulary.npy'
    if os.path.exists(vocab_path):
        print(f"Loading existing vocabulary from {vocab_path}")
        vocabulary = np.load(vocab_path)
    else:
        vocabulary = build_vocabulary(all_descriptors, vocab_size=500, save_path=vocab_path)
    
    # computing the global feature vector for each image
    print("Extracting global features for each image...")
    features, labels = extract_features_dataset(dataset_folder, vocabulary)
    print(f"Extracted features shape: {features.shape}")
    
    # saving features and labels for training your recognition model
    np.save('image_features.npy', features)
    np.save('image_labels.npy', np.array(labels))
    print("Features and labels saved as 'image_features.npy' and 'image_labels.npy'.")
    
if __name__ == '__main__':
    
    main()

Extracting local descriptors from dataset images...
Collected 940800 descriptors from 1200 images.
Clustering descriptors to build vocabulary...
Vocabulary saved to vocabulary.npy
Extracting global features for each image...
Extracted features shape: (1200, 500)
Features and labels saved as 'image_features.npy' and 'image_labels.npy'.


In [7]:
dataset_folder = '../sketches'

# Load the saved features and labels
features = np.load('image_features.npy')
labels = np.load('image_labels.npy')

# Recreate the label dictionary based on the dataset folder structure
dataset_folder = 'sketches'
categories = sorted(os.listdir(dataset_folder))
label_dict = {cat: idx for idx, cat in enumerate(categories)}

# Reverse the label dictionary to map labels back to category names
label_dict_reverse = {v: k for k, v in label_dict.items()}

# Find the first image of each category and display its feature array
first_image_features = {}
for label in np.unique(labels):
    category_name = label_dict_reverse[label]
    first_image_index = np.where(labels == label)[0][0]
    first_image_features[category_name] = features[first_image_index]

# Display the feature arrays for the first image of each category
for category, feature_array in first_image_features.items():
    print(f"Category: {category}, Feature Array: {feature_array}")

FileNotFoundError: [Errno 2] No such file or directory: 'sketches'