In [167]:
import cv2
import numpy as np
import os
from pathlib import Path
import matplotlib.pyplot as plt
from abc import ABC, abstractmethod
from typing import Tuple, Any, Optional, List


class ImagePreprocessor(ABC):
    """Abstract base class for image preprocessing algorithms."""
    
    @abstractmethod
    def preprocess(self, image_path: str, save_path: Optional[str] = None) -> np.ndarray:
        """
        Preprocess the input image.
        
        Args:
            image_path (str): Path to the input image
            save_path (str, optional): Path to save the processed image
            
        Returns:
            np.ndarray: Processed image
        """
        pass

class SofaSegmenter(ImagePreprocessor):
    """Sofa segmentation and background removal using GrabCut algorithm."""
    
    def __init__(self, padding: int = 10, max_size: int = 800, iterations: int = 1):
        """
        Initialize the sofa segmenter.
        
        Args:
            padding (int): Padding to add around the segmented sofa
            max_size (int): Maximum dimension size for resizing while maintaining aspect ratio
            iterations (int): Number of GrabCut iterations
        """
        if padding < 0 or max_size <= 0 or iterations <= 0:
            raise ValueError("Invalid parameters: padding must be >= 0, max_size and iterations must be > 0")
        
        self.padding = padding
        self.max_size = max_size
        self.iterations = iterations
        
    def _resize_image(self, image: np.ndarray) -> Tuple[np.ndarray, float]:
        """
        Resize image while maintaining aspect ratio if it exceeds max_size.
        
        Args:
            image: Input image
            
        Returns:
            Tuple[np.ndarray, float]: (Resized image, scale factor)
        """
        height, width = image.shape[:2]
        max_dim = max(height, width)
        
        if max_dim > self.max_size:
            scale = self.max_size / max_dim
            new_width = int(width * scale)
            new_height = int(height * scale)
            resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
            return resized, scale
        return image, 1.0

    def _create_initial_mask(self, height: int, width: int) -> np.ndarray:
        """
        Create initial mask for GrabCut with sofa-specific regions.
        
        Args:
            height (int): Image height
            width (int): Image width
            
        Returns:
            np.ndarray: Initialized mask with background/foreground regions
        """
        # Initialize as probable background
        mask = np.ones((height, width), np.uint8) * cv2.GC_PR_BGD
        
        # Border parameters
        border = int(min(height, width) * 0.05)
        
        # sofa typically occupies the central portion of the image
        sofa_regions = {
            'outer': {'y': (0.2, 0.8), 'x': (0.1, 0.9), 'value': cv2.GC_PR_FGD},
            'inner': {'y': (0.3, 0.7), 'x': (0.2, 0.8), 'value': cv2.GC_FGD}
        }
        
        # Mark borders as definite background
        mask[:border, :] = cv2.GC_BGD
        mask[-border:, :] = cv2.GC_BGD
        mask[:, :border] = cv2.GC_BGD
        mask[:, -border:] = cv2.GC_BGD
        
        # Mark sofa regions
        for region in sofa_regions.values():
            y_start = int(height * region['y'][0])
            y_end = int(height * region['y'][1])
            x_start = int(width * region['x'][0])
            x_end = int(width * region['x'][1])
            mask[y_start:y_end, x_start:x_end] = region['value']
        
        return mask

    def _get_bounding_box(self, mask: np.ndarray) -> Tuple[int, int, int, int]:
        """
        Get bounding box coordinates from the largest contour in the mask.
        
        Args:
            mask: Binary mask
            
        Returns:
            Tuple[int, int, int, int]: (x, y, width, height) of bounding box
        """
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours:
            largest_contour = max(contours, key=cv2.contourArea)
            return cv2.boundingRect(largest_contour)
        return (0, 0, mask.shape[1], mask.shape[0])

    def _segment_sofa(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int, int, int]]:
        """
        Segment sofa from image using GrabCut algorithm.
        
        Args:
            image: Input BGR image
            
        Returns:
            Tuple[np.ndarray, Tuple[int, int, int, int]]: (Segmented image, bounding box)
        """
        height, width = image.shape[:2]
        mask = self._create_initial_mask(height, width)
        
        background_model = np.zeros((1, 65), np.float64)
        foreground_model = np.zeros((1, 65), np.float64)
        
        # Perform GrabCut segmentation
        cv2.grabCut(image, mask, None, background_model, foreground_model, 
                   self.iterations, cv2.GC_INIT_WITH_MASK)
        
        # Create binary mask and apply it
        binary_mask = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
        segmented_image = cv2.bitwise_and(image, image, mask=binary_mask)
        
        return segmented_image, self._get_bounding_box(binary_mask)

    def preprocess(self, image_path: str, save_path: Optional[str] = None) -> np.ndarray:
        """
        Preprocess sofa image using GrabCut-based segmentation.
        
        Args:
            image_path: Path to input image
            save_path: Optional path to save the processed image
            
        Returns:
            np.ndarray: Processed image with background removed and cropped
        """
        # Load and validate image
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Failed to load image from {image_path}")
        
        # Process image at reduced size for efficiency
        resized_image, scale = self._resize_image(image)
        segmented_image, (x, y, w, h) = self._segment_sofa(resized_image)
        
        # Scale coordinates back to original size if needed
        if scale != 1.0:
            x, y, w, h = [int(val / scale) for val in (x, y, w, h)]
            segmented_image = cv2.resize(segmented_image, (image.shape[1], image.shape[0]), 
                                       interpolation=cv2.INTER_CUBIC)
        
        # Add padding and ensure coordinates are within image bounds
        x = max(0, x - self.padding)
        y = max(0, y - self.padding)
        w = min(image.shape[1] - x, w + 2 * self.padding)
        h = min(image.shape[0] - y, h + 2 * self.padding)
        
        # Crop to sofa region
        result = segmented_image[y:y+h, x:x+w]
        
        if save_path:
            cv2.imwrite(save_path, result)
        
        return result

In [168]:
import cv2
import numpy as np
import os
from pathlib import Path
import matplotlib.pyplot as plt
from abc import ABC, abstractmethod
from typing import Tuple, Optional, List

In [169]:
class FeatureExtractor(ABC):
    """Abstract base class for feature extractors."""
    
    @abstractmethod
    def extract_features(self, image: np.ndarray) -> Tuple[list, Optional[np.ndarray]]:
        """
        Extract features from an image.
        
        Args:
            image (np.ndarray): Input image as a numpy array (BGR format)
            
        Returns:
            tuple: (keypoints, descriptors)
                - keypoints: List of keypoint objects
                - descriptors: numpy array of descriptors or None if no features found
        """
        pass
    
    @abstractmethod
    def compute_similarity(self, desc1: np.ndarray, desc2: np.ndarray) -> float:
        """
        Compute similarity between two sets of descriptors.
        
        Args:
            desc1 (np.ndarray): First set of descriptors
            desc2 (np.ndarray): Second set of descriptors
            
        Returns:
            float: Similarity score between 0 and 1
        """
        pass

In [170]:
def visualize_images(img1: np.ndarray, img2: np.ndarray, similarity_score: Optional[float] = None) -> None:
    """
    Display two images side by side.
    
    Args:
        img1 (np.ndarray): First image as numpy array (BGR format)
        img2 (np.ndarray): Second image as numpy array (BGR format)
        similarity_score (float, optional): Similarity score to display
    """
    # Convert from BGR to RGB
    img1_rgb = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
    img2_rgb = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
    
    # Create figure with two subplots side by side
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))
    
    # Display images
    ax1.imshow(img1_rgb)
    ax1.set_title('Query Image')
    ax1.axis('off')
    
    ax2.imshow(img2_rgb)
    ax2.set_title('Most Similar Image')
    ax2.axis('off')
    
    if similarity_score is not None:
        plt.suptitle(f'Similarity Score: {similarity_score:.2f}', fontsize=16)
    
    plt.tight_layout()
    plt.show()

In [171]:
def find_most_similar_sofa(query_image: np.ndarray, database_dir: str, feature_extractor: FeatureExtractor) -> Tuple[np.ndarray, float]:
    """
    Find the most similar sofa image in the database using feature_extractor.
    
    Args:
        query_image (np.ndarray): Query image as numpy array (BGR format)
        database_dir (str): Directory containing database images
        feature_extractor (FeatureExtractor): Feature extractor object
        
    Returns:
        tuple: (most_similar_image, similarity_score, image_path)
            - most_similar_image: numpy array of the most similar image
            - similarity_score: float indicating similarity (0-1)
    """
    # Extract features from query image
    query_features = feature_extractor.extract_features(query_image)
    
    best_score = -1
    best_match = None
    
    # Compare with all images in database
    for img_path in Path(database_dir).glob('*.jpg'):
        # Load database image
        db_img = cv2.imread(str(img_path))
        if db_img is None:
            print(f"Warning: Could not read image at {img_path}, skipping...")
            continue
            
        # Extract features and compute similarity
        db_features = feature_extractor.extract_features(db_img)
        score = feature_extractor.compute_similarity(query_features, db_features)
        
        # Update best match if current score is higher
        if score > best_score:
            best_score = score
            best_match = db_img
                
    if best_match is None:
        raise ValueError("No valid matches found in the database")
    
    return best_match, best_score

# Color Histogram

## 1. Color Feature Extraction

### HSV Color Space
- The image is converted from BGR to HSV (Hue, Saturation, Value) color space
- HSV is chosen because it better represents how humans perceive color:
  - Hue: The actual color (0-180 degrees)
  - Saturation: Color intensity (0-255)
  - Value: Brightness (0-255)

### Color Histograms
- Separate histograms are computed for each HSV channel using 32 bins
- Each histogram is normalized to [0,1] range for consistent comparison
- The three histograms are concatenated to form a single color feature vector

## 2. Texture Feature Extraction

### GLCM (Gray-Level Co-occurrence Matrix)
- The image is converted to grayscale and resized to 64x64 for consistency
- Gray levels are reduced to 8 for computational efficiency
- GLCM captures spatial relationships between pixels by counting how often pairs of pixels with specific values occur in a specific spatial relationship

### Texture Features Extracted from GLCM
Four statistical measures are computed:
1. **Contrast**: Measures local variations in the GLCM
2. **Homogeneity**: Measures closeness of element distribution in GLCM
3. **Energy**: Sum of squared elements in GLCM
4. **Correlation**: Measures linear dependency of gray levels

## 3. Similarity Computation

The similarity between two images is calculated using a weighted combination:

### Color Similarity (70% weight)
- Uses correlation between color histograms
- Correlation measures the statistical relationship between histograms

### Texture Similarity (30% weight)
- Uses cosine similarity between texture feature vectors
- Cosine similarity measures the angle between feature vectors

### Final Score
- Combined score = 0.7 * color_similarity + 0.3 * texture_similarity
- Score is clamped to [0,1] range

The weights (0.7 for color, 0.3 for texture) can be adjusted based on specific requirements.

I think this approach is a good option for feature extraction and similarity computation rather than SIFT because it takes color into consideration.

In [172]:
class ColorHistogramExtractor(FeatureExtractor):
    """Color Histogram and Texture feature extractor implementation."""
    
    def __init__(self, hist_bins=32):
        """
        Initialize the Color Histogram extractor.
        
        Args:
            hist_bins (int): Number of bins for the color histogram
        """
        self.hist_bins = hist_bins
        
    def extract_features(self, image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Extract color histogram and texture features from an image.
        
        Args:
            image (np.ndarray): Input image as a numpy array (BGR format)
            
        Returns:
            tuple: (color_hist, texture_features)
                - color_hist: Combined color histogram for all channels
                - texture_features: Texture features from gray-scale image
        """
        if image is None or image.size == 0:
            raise ValueError("Invalid input image")
            
        # Convert BGR to HSV color space
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        
        # Calculate color histogram for each channel
        hist_h = cv2.calcHist([hsv], [0], None, [self.hist_bins], [0, 180])
        hist_s = cv2.calcHist([hsv], [1], None, [self.hist_bins], [0, 256])
        hist_v = cv2.calcHist([hsv], [2], None, [self.hist_bins], [0, 256])
        
        # Normalize histograms
        cv2.normalize(hist_h, hist_h, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
        cv2.normalize(hist_s, hist_s, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
        cv2.normalize(hist_v, hist_v, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
        
        # Combine histograms
        color_features = np.concatenate([hist_h, hist_s, hist_v])
        
        # Calculate texture features using gray-scale image
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        # Calculate GLCM (Gray-Level Co-Occurrence Matrix) features
        glcm = self._calculate_glcm(gray)
        texture_features = self._extract_glcm_features(glcm)
        
        return color_features, texture_features
    
    def compute_similarity(self, features1: Tuple[np.ndarray, np.ndarray], 
                         features2: Tuple[np.ndarray, np.ndarray]) -> float:
        """
        Compute similarity between two sets of features.
        
        Args:
            features1: Tuple of (color_hist1, texture_features1)
            features2: Tuple of (color_hist2, texture_features2)
            
        Returns:
            float: Similarity score between 0 and 1
        """
        color_hist1, texture_features1 = features1
        color_hist2, texture_features2 = features2
        
        # Compare color histograms using correlation
        color_similarity = cv2.compareHist(color_hist1, color_hist2, cv2.HISTCMP_CORREL)
        
        # Compare texture features using cosine similarity
        texture_similarity = self._cosine_similarity(texture_features1, texture_features2)
        
        # Combine similarities
        combined_similarity = 0.7 * color_similarity + 0.3 * texture_similarity
        
        return max(0.0, min(1.0, combined_similarity))
    
    def _calculate_glcm(self, gray_image: np.ndarray) -> np.ndarray:
        """Calculate Gray-Level Co-Occurrence Matrix."""
        gray_image = cv2.resize(gray_image, (64, 64))
        levels = 8
        gray_image = ((gray_image / 256) * levels).astype(np.uint8)
        
        glcm = np.zeros((levels, levels))
        h, w = gray_image.shape
        
        for i in range(h-1):
            for j in range(w-1):
                current = gray_image[i, j]
                right = gray_image[i, j+1]
                glcm[current, right] += 1
                
        # Normalize GLCM
        glcm = glcm / glcm.sum()
        return glcm
    
    def _extract_glcm_features(self, glcm: np.ndarray) -> np.ndarray:
        """Extract features from GLCM."""
        contrast = np.sum(np.square(np.arange(glcm.shape[0])) * glcm)
        homogeneity = np.sum(glcm / (1 + np.square(np.arange(glcm.shape[0]))))
        energy = np.sum(np.square(glcm))
        correlation = np.sum(glcm * np.outer(np.arange(glcm.shape[0]), np.arange(glcm.shape[0])))
        
        return np.array([contrast, homogeneity, energy, correlation])
    
    def _cosine_similarity(self, v1: np.ndarray, v2: np.ndarray) -> float:
        """Calculate cosine similarity between two vectors."""
        return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

In [None]:
data_dir = "../data/sofas/processed"
query_image_path = "../data/sofas/test/image_1.jpg"

query_img = SofaSegmenter(
    padding=20,
    max_size=800
).preprocess(query_image_path)
feature_extractor = ColorHistogramExtractor()

# Find most similar image
similar_image, similarity_score = find_most_similar_sofa(query_img, data_dir, feature_extractor)
print(f"Similarity score: {similarity_score:.2f}")

# Visualize matches
visualize_images(query_img, similar_image)