In [1]:
import cv2
import os
import numpy as np
from glob import glob

## Preprocessing

In [2]:
def preprocess_image(image_path, output_size=(256, 256)):
    # Load the image in grayscale mode
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print("Error loading image:", image_path)
        return None

    # Binarize the image using Otsu's thresholding
    # This converts the image to a binary image (0 and 255)
    _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    
    # Optionally, you can perform additional morphological operations here
    # For example, if you want to remove noise or fill small gaps:
    # kernel = np.ones((3, 3), np.uint8)
    # binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    
    # Normalize the image size by resizing
    binary_resized = cv2.resize(binary, output_size, interpolation=cv2.INTER_AREA)
    
    return binary_resized

In [3]:
def process_dataset(input_folder, output_folder, output_size=(256, 256)):
    # Create the output directory if it does not exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Get list of image files in the input folder (assuming PNG images)
    image_files = glob(os.path.join(input_folder, "**", "*.png"), recursive=True)
    
    print(f"Found {len(image_files)} images in {input_folder}.")
    
    # Process each image
    for image_path in image_files:
        preprocessed = preprocess_image(image_path, output_size)
        if preprocessed is not None:
            filename = os.path.basename(image_path)
            output_path = os.path.join(output_folder, filename)
            cv2.imwrite(output_path, preprocessed)
    
    print("Preprocessing complete. Preprocessed images saved to:", output_folder)

In [4]:

if __name__ == "__main__":
    # Set the paths to your dataset directory and the directory to save processed images.
    input_folder = "./sketches"       
    output_folder = "./processed_sketches"  
    
    output_size = (256, 256)
    
    process_dataset(input_folder, output_folder, output_size)


Found 20000 images in ./sketches.
Preprocessing complete. Preprocessed images saved to: ./processed_sketches


## Normalisation of pixels



In [None]:
import os
import cv2
import numpy as np

# defining path for input and output folders!
FOLDER_PATH = "processed_sketches"
NORMALIZED_PATH = "normalized_sketches" 

# ensures output folder exists
os.makedirs(NORMALIZED_PATH, exist_ok=True)

def normalize_images(folder_path, output_folder):
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)

        # Read image in grayscale
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"Skipping {filename} (not a valid image)")
            continue

        # Normalize pixel values to range [0,1]
        img_normalized = img.astype("float32") / 255.0
        # Convert back to 8-bit for saving as PNG/JPG
        img_uint8 = (img_normalized * 255).astype("uint8")
        # Save the normalized image
        output_path = os.path.join(output_folder, filename)
        cv2.imwrite(output_path, img_uint8)

    print(f"All images normalized and saved in '{output_folder}'")

# Run fucntion
normalize_images(FOLDER_PATH, NORMALIZED_PATH)


All images normalized and saved in 'normalized_sketches'


### Feature Extraction:

In [2]:
import os
import numpy as np
import cv2
from glob import glob
from skimage import io, color
from skimage.feature import hog
from skimage.filters import gabor
from sklearn.cluster import MeanShift
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
def extract_hog_features(image, pixels_per_cell=(16, 16), cells_per_block=(2, 2), orientations=9):
    """
    Extract HOG features from a grayscale image.
    """
    # skimage.feature.hog returns a flat vector when feature_vector=True.
    hog_features = hog(image,
                       orientations=orientations,
                       pixels_per_cell=pixels_per_cell,
                       cells_per_block=cells_per_block,
                       block_norm='L2-Hys',
                       transform_sqrt=True,
                       feature_vector=True)
    return hog_features

In [4]:
def extract_gabor_features(image, frequencies=[0.1, 0.2, 0.3], orientations=[0, np.pi/4, np.pi/2, 3*np.pi/4]):
    """
    Extract Gabor filter responses from the image.
    For each frequency and orientation, we compute the mean and variance
    of the magnitude response, then concatenate these values.
    """
    features = []
    for freq in frequencies:
        for theta in orientations:
            filt_real, filt_imag = gabor(image, frequency=freq, theta=theta)
            magnitude = np.sqrt(filt_real**2 + filt_imag**2)
            features.append(magnitude.mean())
            features.append(magnitude.var())
    return np.array(features)

In [5]:
def extract_hybrid_features(image):
    """
    Extract hybrid features by combining HOG and Gabor-based features.
    Assumes image is a normalized grayscale image.
    """
    hog_feat = extract_hog_features(image)
    gabor_feat = extract_gabor_features(image)
    # Concatenate the two feature vectors
    hybrid_feat = np.concatenate([hog_feat, gabor_feat])
    return hybrid_feat

In [6]:
def build_feature_matrix(images):
    """
    Given a list of preprocessed images, extract hybrid features for each.
    Returns a 2D array where each row is a hybrid feature vector.
    """
    feature_list = []
    for idx, image in enumerate(images):
        feat = extract_hybrid_features(image)
        feature_list.append(feat)
        if (idx + 1) % 50 == 0:
            print(f"Extracted features from {idx+1} images.")
    return np.vstack(feature_list)

In [7]:
def perform_mean_shift_clustering(X_features, bandwidth=None):
    """
    Apply Mean Shift clustering on the feature matrix.
    Returns the cluster labels and the fitted MeanShift model.
    """
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(X_features)
    labels = ms.labels_
    print(f"Mean Shift found {len(np.unique(labels))} clusters.")
    return labels, ms

In [8]:
def train_and_evaluate(X_features, y_labels):
    """
    Split the data into training and testing sets, then train a multi-class SVM
    classifier and evaluate its performance.
    """
    X_train, X_test, y_train, y_test = train_test_split(X_features, y_labels, test_size=0.3, random_state=42, stratify=y_labels)
    
    # Use a linear SVM; you may experiment with other kernels as well.
    clf = SVC(kernel='linear', probability=True, random_state=42)
    clf.fit(X_train, y_train)
    
    # Predict on test set
    y_pred = clf.predict(X_test)
    
    # Evaluate performance
    acc = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy: {acc*100:.2f}%")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    
    return clf

In [None]:
if __name__ == "__main__":
    # Set the path to your dataset (folder structure: dataset/<class_name>/*.png)
    NORMALIZED_PATH = "normalized_sketches"

    print("Loading dataset...")
    images = []
    labels = []
    for filename in os.listdir(NORMALIZED_PATH):
        img_path = os.path.join(NORMALIZED_PATH, filename)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            images.append(img)
            labels.append(0)  
            
    print(f"Loaded {len(images)} images from {len(set(labels))} classes.")
    
    print("Extracting hybrid features (HOG + Gabor) from images...")
    X_features = build_feature_matrix(images)
    print("Feature matrix shape:", X_features.shape)
    
    # ----------------------------
    # Unsupervised Clustering using Mean Shift
    # ----------------------------
    print("Performing Mean Shift clustering on the features...")
    ms_labels, ms_model = perform_mean_shift_clustering(X_features)
    
    # ----------------------------
    # Train and Evaluate a Classifier
    # ----------------------------
    print("Training and evaluating a multi-class SVM classifier...")
    classifier = train_and_evaluate(X_features, np.array(labels))


# **Mean Shift Clustering Algorithm**

Mean Shift is a **centroid-based** clustering algorithm. It iteratively shifts data points towards denser regions (clusters move towards higher density), making it useful for identifying clusters **without predefining the number of clusters**, which suits best for image classification. It is based on **Kernel Density Estimation (KDE)** and moves points towards higher-density areas.



In [6]:
import numpy as np
import cv2
import matplotlib.pyplot as plt

class MeanShiftImageSegmentation:
    def __init__(self, bandwidth=30, spatial_weight=0.1, tol=1):
        """
        Initialize Mean Shift for image segmentation.

        Parameters:
        - bandwidth: Controls how much a pixel can shift.
        - spatial_weight: Weighs spatial coordinates (to avoid grouping far pixels).
        - tol: Convergence threshold.
        """
        self.bandwidth = bandwidth
        self.spatial_weight = spatial_weight
        self.tol = tol

    def fit(self, image):
        """
        Applies Mean Shift to segment the given image.

        Parameters:
        - image: Input image (H, W, 3) in RGB format.

        Returns:
        - segmented_image: Image after Mean Shift segmentation.
        """
        # Convert image to float and reshape to feature space
        h, w, c = image.shape
        img_lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)  # Convert to LAB color space
        pixels = np.reshape(img_lab, (h * w, c)).astype(np.float32)

        # Add spatial (x, y) coordinates
        X, Y = np.meshgrid(np.arange(w), np.arange(h))
        spatial_features = np.stack((X.ravel(), Y.ravel()), axis=1).astype(np.float32)
        features = np.concatenate((pixels, self.spatial_weight * spatial_features), axis=1)

        # Mean Shift clustering
        for _ in range(10):  # Limit to 10 iterations for efficiency
            new_features = np.copy(features)
            for i in range(features.shape[0]):
                distances = np.linalg.norm(features - features[i], axis=1)
                weights = np.exp(-0.5 * (distances / self.bandwidth) ** 2)
                new_features[i] = np.sum(features * weights[:, None], axis=0) / np.sum(weights)

            # Check for convergence
            shift_distance = np.linalg.norm(new_features - features, axis=1)
            if np.max(shift_distance) < self.tol:
                break
            features = new_features

        # Assign colors based on clusters
        clustered_pixels = new_features[:, :3].reshape(h, w, c).astype(np.uint8)
        segmented_image = cv2.cvtColor(clustered_pixels, cv2.COLOR_LAB2RGB)  # Convert back to RGB

        return segmented_image

# If we want to run it on a folder-> add here!
