In [10]:
import cv2
import numpy as np
from scipy.io import loadmat
from tensorflow.keras.models import load_model
from scipy.spatial.distance import cdist

In [11]:
def preprocess_image(image_path, size=256):
    """
    Load the image in grayscale, resize it to size x size (256x256),
    and center it if needed.
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError("Image not found or unable to load.")
    # Resize image to 256x256
    img_resized = cv2.resize(img, (size, size))
    return img_resized

In [12]:
def compute_gradients(image):
    """
    Compute image gradients using Sobel operator.
    (In the paper, Gaussian derivatives are used for more robust estimates.)
    """
    grad_x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
    magnitude = np.sqrt(grad_x**2 + grad_y**2)
    orientation = np.arctan2(grad_y, grad_x)  # Angle in radians
    # Map orientations to [0, π)
    orientation = np.mod(orientation, np.pi)
    return magnitude, orientation

In [13]:
def extract_local_features(image, grid_size=(28, 28), patch_ratio=0.125, num_bins=4):
    """
    Extract local descriptors from the image on a regular grid.
    Each descriptor is a histogram of gradient orientations computed
    from a patch (patch size = 12.5% of image size).
    """
    magnitude, orientation = compute_gradients(image)
    features = []
    h, w = image.shape
    # Determine grid points uniformly across the image
    xs = np.linspace(0, w-1, grid_size[1], dtype=int)
    ys = np.linspace(0, h-1, grid_size[0], dtype=int)
    patch_size = int(patch_ratio * w)  # e.g., 0.125*256 ≈ 32 pixels
    half_patch = patch_size // 2

    for y in ys:
        for x in xs:
            # Define patch boundaries with proper handling at the borders
            x1 = max(x - half_patch, 0)
            x2 = min(x + half_patch, w)
            y1 = max(y - half_patch, 0)
            y2 = min(y + half_patch, h)
            # Extract patch gradients and orientations
            patch_orient = orientation[y1:y2, x1:x2]
            patch_mag = magnitude[y1:y2, x1:x2]
            # Compute weighted histogram of orientations in the patch
            hist, _ = np.histogram(patch_orient, bins=num_bins, range=(0, np.pi), weights=patch_mag)
            # Normalize the histogram
            norm = np.linalg.norm(hist)
            if norm > 0:
                hist = hist / norm
            features.append(hist)
    features = np.array(features)  # Shape: (number_of_patches, num_bins)
    return features

In [None]:
def quantize_features(features, vocabulary):
    """
    Quantize each feature by assigning it to the closest visual word.
    Uses hard assignment (nearest neighbor) based on Euclidean distance.
    """
    # vocabulary is assumed to be of shape (num_words, feature_dim)
    distances = cdist(features, vocabulary, 'euclidean')
    word_indices = np.argmin(distances, axis=1)
    return word_indices

In [15]:
def build_histogram(word_indices, num_words):
    """
    Build a normalized histogram of visual words.
    """
    hist = np.zeros(num_words)
    for idx in word_indices:
        hist[idx] += 1
    # Normalize the histogram by the total number of features
    if np.sum(hist) > 0:
        hist = hist / np.sum(hist)
    return hist

In [16]:
def predict_category(hist, svm_model):
    """
    Predict the category using the pre-trained multi-class SVM.
    Here, svm_model is assumed to be a scikit-learn compatible classifier.
    """
    # Reshape histogram to (1, -1) as the model expects a 2D array
    pred = svm_model.predict(hist.reshape(1, -1))
    return pred[0]

In [None]:
def main(test_image_path, mat_file_path, model_file_path):
    # Step 1: Load and preprocess the test image
    img = preprocess_image(test_image_path)
    
    # Step 2: Extract local features from the test image
    features = extract_local_features(img)
    
    # Step 3: Load the visual vocabulary from the .mat file.
    # It is assumed that the MAT file contains a variable 'vocabulary'
    data = loadmat(mat_file_path)
    if 'vocabulary' not in data:
        raise ValueError("The MAT file does not contain 'vocabulary'.")
    vocabulary = data['vocabulary']  # Expected shape: (num_words, feature_dim)
    num_words = vocabulary.shape[0]
    
    # Step 4: Quantize the features using the visual vocabulary
    word_indices = quantize_features(features, vocabulary)
    
    # Step 5: Build the normalized histogram of visual words (feature representation)
    hist = build_histogram(word_indices, num_words)
    
    # Step 6: Load the pre-trained SVM model (assumed to be saved as a pickle file)
    with open(model_file_path, 'rb') as f:
        svm_model = load_model("best_ann_model.h5")
    
    # Step 7: Predict the category of the test image using the SVM classifier
    prediction = svm_model.predict(hist.reshape(1, -1))
    predicted_class = prediction.argmax(axis=1)[0]
    print("Predicted class:", predicted_class)

if __name__ == '__main__':
    # Replace these paths with the actual paths on your system.
    test_image_path = 'path_to_your_test_image.png'
    mat_file_path = 'feature_shog_smooth.mat'
    model_file_path = 'best_ann_model.h5'
    main(test_image_path, mat_file_path, model_file_path)
    

ValueError: The MAT file does not contain 'vocabulary'.

In [21]:
import cv2
import numpy as np
from scipy.io import loadmat
from tensorflow.keras.models import load_model
from scipy.spatial.distance import cdist

def evaluate_dataset(mat_file_path):
    """
    Load the MAT file containing the precomputed features.
    The MAT file is assumed to have a matrix 'A' with size 20,000x502:
      - Column 0: category id in [1,250]
      - Column 1: partition id in [1,10]
      - Columns 2-501: the 500-dimensional feature vector.
      
    This function splits the data into training (partition==1) and test 
    (partition==2) sets, then performs 1-NN classification and prints accuracy.
    """
    # Load MAT file
    data = loadmat(mat_file_path)
    if 'A' not in data:
        raise ValueError("MAT file does not contain variable 'A'")
    A = data['A']
    
    # Split into training and testing partitions based on column 1 (partition id)
    partition_train = (A[:, 1] == 1)
    partition_test  = (A[:, 1] == 2)
    
    # Training features: columns 2 to end; ground truth: column 0
    M = A[partition_train, 2:]
    categories_train = A[partition_train, 0]
    
    # Test features and labels
    N = A[partition_test, 2:]
    categories_test = A[partition_test, 0]
    
    # Compute pairwise squared Euclidean distances between training and test features.
    # D[i,j] is the distance between training sample i and test sample j.
    D = (np.sum(M**2, axis=1).reshape(-1, 1) 
         - 2 * M.dot(N.T) 
         + np.sum(N**2, axis=1).reshape(1, -1))
    
    # For each test sample, find the index of the closest training sample.
    nearest_idx = np.argmin(D, axis=0)
    # Predicted categories are the categories of the nearest neighbors.
    categories_predicted = categories_train[nearest_idx]
    
    # Calculate accuracy
    accuracy = np.mean(categories_predicted == categories_test) * 100
    print(f"1-NN classification accuracy on test partition: {accuracy:.2f}% (chance ~0.4%)")

def preprocess_image(image_path, size=256):
    """
    Load a sketch image in grayscale and resize it to a square of given size.
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError("Image not found or unable to load.")
    img_resized = cv2.resize(img, (size, size))
    return img_resized

def extract_feature_from_image(image):
    """
    Extract a 500-dimensional feature vector from the input sketch image.
    
    This is a placeholder function. In the paper the feature extraction involves:
      - Rescaling the image to 256x256,
      - Computing image gradients (e.g. with Gaussian derivatives),
      - Sampling on a 28x28 grid,
      - Constructing spatial histograms (using soft kernel codebook coding)
      - Aggregating the descriptors into a 500-dim feature vector.
    
    Replace this placeholder with your actual feature extraction code.
    """
    # --- Placeholder implementation ---
    # Here, we simply flatten a resized image and then project (dummy operation)
    # to obtain a 500-dimensional vector.
    img_flat = image.flatten().astype(np.float32)
    # Create a dummy projection matrix to simulate feature extraction.
    # In practice, you would replace this with your own extraction pipeline.
    np.random.seed(42)  # For reproducibility
    projection = np.random.rand(img_flat.shape[0], 500).astype(np.float32)
    feature_vector = img_flat.dot(projection)
    # Normalize the feature vector
    norm = np.linalg.norm(feature_vector)
    if norm > 0:
        feature_vector /= norm
    # --- End placeholder ---
    return feature_vector

def predict_category(feature_vector, model_file_path):
    """
    Load the pre-trained ANN model from an H5 file and predict the category
    given a 500-dimensional feature vector.
    """
    # Load the Keras ANN model (do not use pickle for H5 files)
    ann_model = load_model(model_file_path)
    
    # Reshape the feature vector for the model (batch_size, feature_dim)
    feature_vector = feature_vector.reshape(1, -1)
    
    # Predict using the ANN model
    predictions = ann_model.predict(feature_vector)
    
    # The predicted category is the index with the highest score.
    # If your category labels are 1-indexed (1 to 250), add 1.
    predicted_class = predictions.argmax(axis=1)[0] + 1
    return predicted_class

def test_new_image(test_image_path, model_file_path):
    """
    Process a user-made sketch image, extract its feature vector,
    and use the ANN model to predict its category.
    """
    # Step 1: Preprocess the image
    img = preprocess_image(test_image_path)
    
    # Step 2: Extract a 500-dimensional feature vector
    feature_vector = extract_feature_from_image(img)
    
    # Step 3: Predict the category using the ANN model
    category = predict_category(feature_vector, model_file_path)
    print("Predicted category for the test image:", category)

def main():
    # File paths (update these paths according to your setup)
    mat_file_path = 'feature_extraction/features_shog_smooth.mat'
    model_file_path = 'best_ann_model.h5'
    test_image_path = 'sketches/windmill/19602.png'
    
    print("=== Evaluating precomputed dataset (1-NN) ===")
    evaluate_dataset(mat_file_path)
    
    print("\n=== Testing new image with ANN model ===")
    test_new_image(test_image_path, model_file_path)

if __name__ == '__main__':
    main()


=== Evaluating precomputed dataset (1-NN) ===
1-NN classification accuracy on test partition: 24.90% (chance ~0.4%)

=== Testing new image with ANN model ===




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
Predicted category for the test image: 73


In [30]:
import cv2
import numpy as np
from scipy.io import loadmat
from tensorflow.keras.models import load_model
from scipy.spatial.distance import cdist

# ----------------------------
# Feature extraction functions
# ----------------------------

def preprocess_image(image_path, size=256):
    """
    Load a sketch image in grayscale, resize to size x size.
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError(f"Image not found or unable to load: {image_path}")
    img_resized = cv2.resize(img, (size, size))
    return img_resized.astype(np.float32)

def compute_gradients(image):
    """
    Compute image gradients using Sobel operator.
    (Note: In the paper, Gaussian derivatives are used for robustness.)
    """
    grad_x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
    magnitude = np.sqrt(grad_x**2 + grad_y**2)
    orientation = np.arctan2(grad_y, grad_x)
    orientation = np.mod(orientation, np.pi)  # Map to [0, π)
    return magnitude, orientation

def extract_local_descriptors(image, grid_size=(28, 28), patch_size_ratio=0.125,
                              num_spatial_bins=4, num_orientation_bins=4):
    """
    Extract a 64-dimensional descriptor for each patch.
    Each patch (of size patch_size_ratio * image width) is subdivided into
    num_spatial_bins x num_spatial_bins cells. For each cell, a histogram
    of gradient orientations (with num_orientation_bins bins) is computed.
    The descriptors from all cells are concatenated (resulting in a 64-D vector).
    """
    magnitude, orientation = compute_gradients(image)
    h, w = image.shape
    descriptors = []
    xs = np.linspace(0, w-1, grid_size[1], dtype=int)
    ys = np.linspace(0, h-1, grid_size[0], dtype=int)
    patch_size = int(patch_size_ratio * w)  # e.g., 0.125 * 256 ≈ 32 pixels
    half_patch = patch_size // 2

    for y in ys:
        for x in xs:
            # Define patch boundaries
            x1 = max(x - half_patch, 0)
            x2 = min(x + half_patch, w)
            y1 = max(y - half_patch, 0)
            y2 = min(y + half_patch, h)
            patch_mag = magnitude[y1:y2, x1:x2]
            patch_orient = orientation[y1:y2, x1:x2]
            # Determine cell size
            cell_h = (y2 - y1) // num_spatial_bins
            cell_w = (x2 - x1) // num_spatial_bins
            descriptor = []
            # For each cell in the patch
            for i in range(num_spatial_bins):
                for j in range(num_spatial_bins):
                    cy1 = y1 + i * cell_h
                    cy2 = cy1 + cell_h
                    cx1 = x1 + j * cell_w
                    cx2 = cx1 + cell_w
                    # Ensure indices are within patch bounds
                    cell_orient = patch_orient[cy1 - y1:cy2 - y1, cx1 - x1:cx2 - x1]
                    cell_mag = patch_mag[cy1 - y1:cy2 - y1, cx1 - x1:cx2 - x1]
                    # Compute histogram for the cell
                    hist, _ = np.histogram(cell_orient, bins=num_orientation_bins, range=(0, np.pi), weights=cell_mag)
                    descriptor.extend(hist)
            descriptor = np.array(descriptor, dtype=np.float32)
            # Normalize the descriptor
            norm = np.linalg.norm(descriptor)
            if norm > 0:
                descriptor /= norm
            descriptors.append(descriptor)
    return np.array(descriptors)  # Shape: (num_patches, 64)

def quantize_descriptors(descriptors, vocabulary):
    """
    Quantize each 64-D descriptor to the nearest word in the vocabulary.
    vocabulary is expected to be a (500, 64) NumPy array.
    Returns a normalized histogram (500-D vector).
    """
    # Compute Euclidean distances between each descriptor and each vocabulary word
    distances = cdist(descriptors, vocabulary, metric='euclidean')
    assignments = np.argmin(distances, axis=1)  # Hard assignment
    hist = np.zeros(vocabulary.shape[0], dtype=np.float32)
    for idx in assignments:
        hist[idx] += 1
    # Normalize the histogram
    if hist.sum() > 0:
        hist /= hist.sum()
    return hist

def extract_feature(image, vocabulary):
    """
    Extract the 500-D feature for the image by computing local descriptors
    and quantizing them using the provided vocabulary.
    """
    descriptors = extract_local_descriptors(image)
    feature_vector = quantize_descriptors(descriptors, vocabulary)
    return feature_vector  # 500-dimensional vector

# ----------------------------
# Prediction and evaluation
# ----------------------------

def evaluate_dataset(mat_file_path):
    """
    Evaluate 1-NN on precomputed features stored in the MAT file.
    (This function uses the features provided in the MAT file.)
    """
    data = loadmat(mat_file_path)
    if 'A' not in data:
        raise ValueError("MAT file does not contain variable 'A'")
    A = data['A']
    
    partition_train = (A[:, 1] == 1)
    partition_test  = (A[:, 1] == 2)
    
    M = A[partition_train, 2:]
    categories_train = A[partition_train, 0]
    
    N = A[partition_test, 2:]
    categories_test = A[partition_test, 0]
    
    D = (np.sum(M**2, axis=1).reshape(-1, 1) 
         - 2 * M.dot(N.T) 
         + np.sum(N**2, axis=1).reshape(1, -1))
    
    nearest_idx = np.argmin(D, axis=0)
    categories_predicted = categories_train[nearest_idx]
    
    accuracy = np.mean(categories_predicted == categories_test) * 100
    print(f"1-NN classification accuracy on test partition: {accuracy:.2f}% (chance ~0.4%)")

def predict_category(feature_vector, model_file_path):
    """
    Load the pre-trained ANN model from an H5 file and predict the category
    for the given 500-D feature vector.
    """
    # Ensure feature_vector is reshaped to (1, 500)
    feature_vector = feature_vector.reshape(1, -1)
    ann_model = load_model(model_file_path)
    predictions = ann_model.predict(feature_vector)
    predicted_class = predictions.argmax(axis=1)[0] + 1  # Adjust if categories are 1-indexed.
    return predicted_class

def test_new_image(test_image_path, model_file_path, vocabulary_path='vocabulary.npy'):
    """
    Process a user-made sketch image, extract its 500-D feature vector using
    the visual vocabulary, and use the ANN model to predict its category.
    """
    img = preprocess_image(test_image_path)
    
    # Load the visual vocabulary (should be a NumPy file of shape (500, 64))
    try:
        vocabulary = np.load(vocabulary_path)
    except FileNotFoundError:
        raise ValueError(f"Vocabulary file '{vocabulary_path}' not found. Ensure you have a (500,64) vocabulary.")
    
    feature_vector = extract_feature(img, vocabulary)
    category = predict_category(feature_vector, model_file_path)
    print("Predicted category for the test image:", category)

def main():
    # Update these paths according to your setup
    mat_file_path = 'feature_extraction/features_shog_smooth.mat'
    model_file_path = 'best_ann_model.h5'
    test_image_path = 'sketches/pen/11921.png'
    vocabulary_path = 'vocabulary.npy'  # This file must exist (shape: 500x64)
    
    print("=== Evaluating precomputed dataset (1-NN) ===")
    evaluate_dataset(mat_file_path)
    
    print("\n=== Testing new image with ANN model ===")
    test_new_image(test_image_path, model_file_path, vocabulary_path)

if __name__ == '__main__':
    main()


=== Evaluating precomputed dataset (1-NN) ===
1-NN classification accuracy on test partition: 24.90% (chance ~0.4%)

=== Testing new image with ANN model ===


ValueError: Vocabulary file 'vocabulary.npy' not found. Ensure you have a (500,64) vocabulary.