In [2]:
# problem 1: pixel-wise matching
import cv2
import numpy as np

def l1_distance(x, y):
    return abs(x - y)

def l2_distance(x, y):
    return (x - y) ** 2

def pixelwise_disparity(left_img_path, right_img_path, disparity_range=16, scale=16, cost_func=l1_distance):
    # Read images and convert to grayscale
    left = cv2.imread(left_img_path, 0)
    right = cv2.imread(right_img_path, 0)
    
    # Convert to float32 for calculations
    left = left.astype(np.float32)
    right = right.astype(np.float32)
    
    height, width = left.shape[:2]
    
    # Initialize disparity map
    disparity_map = np.zeros((height, width), np.uint8)
    
    # Set max cost based on cost function
    max_cost = 255 if cost_func == l1_distance else 255**2
    
    for y in range(height):
        for x in range(width):
            min_cost = max_cost
            best_disparity = 0
            
            for d in range(disparity_range):
                if (x - d) < 0:
                    cost = max_cost
                else:
                    cost = cost_func(left[y, x], right[y, x - d])
                
                if cost < min_cost:
                    min_cost = cost
                    best_disparity = d
            
            disparity_map[y, x] = best_disparity * scale
    
    return disparity_map

# Example usage:
disparity = pixelwise_disparity('left.png', 'right.png')
cv2.imshow('Disparity Map', disparity)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# problem 2: window-based matching
def window_based_disparity(left_img_path, right_img_path, disparity_range=64, kernel_size=5, scale=3, cost_func=l1_distance):
    # Read images and convert to grayscale
    left = cv2.imread(left_img_path, 0)
    right = cv2.imread(right_img_path, 0)
    
    # Convert to float32 for calculations
    left = left.astype(np.float32)
    right = right.astype(np.float32)
    
    height, width = left.shape[:2]
    
    # Initialize disparity map
    disparity_map = np.zeros((height, width), np.uint8)
    
    # Calculate half kernel size
    kernel_half = (kernel_size - 1) // 2
    
    # Set max cost based on cost function and window size
    max_cost = (255 * kernel_size**2) if cost_func == l1_distance else (255**2 * kernel_size**2)
    
    for y in range(kernel_half, height - kernel_half):
        for x in range(kernel_half, width - kernel_half):
            min_cost = max_cost
            best_disparity = 0
            
            for d in range(disparity_range):
                total_cost = 0
                
                for v in range(-kernel_half, kernel_half + 1):
                    for u in range(-kernel_half, kernel_half + 1):
                        if (x + u - d) < 0:
                            cost = max_cost
                        else:
                            cost = cost_func(left[y + v, x + u], right[y + v, (x + u) - d])
                        
                        total_cost += cost
                
                if total_cost < min_cost:
                    min_cost = total_cost
                    best_disparity = d
            
            disparity_map[y, x] = best_disparity * scale
    
    return disparity_map

# Example usage:
disparity = window_based_disparity('./aloe/Aloe_left_1.png', './aloe/Aloe_right_1.png')
cv2.imshow('Window-based Disparity', disparity)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# problem 3: window-based matching with cosine similarity
def cosine_similarity(x, y):
    numerator = np.dot(x, y)
    denominator = np.linalg.norm(x) * np.linalg.norm(y)
    return numerator / (denominator + 1e-10)  # Small epsilon to avoid division by zero

def window_based_cosine_disparity(left_img_path, right_img_path, disparity_range=64, kernel_size=5, scale=3):
    # Read images and convert to grayscale
    left = cv2.imread(left_img_path, 0)
    right = cv2.imread(right_img_path, 0)
    
    # Convert to float32 for calculations
    left = left.astype(np.float32)
    right = right.astype(np.float32)
    
    height, width = left.shape[:2]
    
    # Initialize disparity map
    disparity_map = np.zeros((height, width), np.uint8)
    
    # Calculate half kernel size
    kernel_half = (kernel_size - 1) // 2
    
    for y in range(kernel_half, height - kernel_half):
        for x in range(kernel_half, width - kernel_half):
            max_similarity = -1
            best_disparity = 0
            
            for d in range(disparity_range):
                if (x - d - kernel_half) < 0:
                    continue  # Skip invalid positions
                
                # Extract windows
                left_window = left[y-kernel_half:y+kernel_half+1, x-kernel_half:x+kernel_half+1]
                right_window = right[y-kernel_half:y+kernel_half+1, (x-d)-kernel_half:(x-d)+kernel_half+1]
                
                # Flatten windows to vectors
                left_vector = left_window.flatten()
                right_vector = right_window.flatten()
                
                # Calculate cosine similarity
                similarity = cosine_similarity(left_vector, right_vector)
                
                if similarity > max_similarity:
                    max_similarity = similarity
                    best_disparity = d
            
            disparity_map[y, x] = best_disparity * scale
    
    return disparity_map

# Example usage:
disparity = window_based_cosine_disparity('aloe_left_1.png', 'aloe_right_2.png') 
cv2.imshow('Cosine Similarity Disparity', disparity)
cv2.waitKey(0)
cv2.destroyAllWindows()