<div align="center">

# **<span>Task 4: Image Segmentation Using Clustering Methods</span>**

</div>

In [None]:
import os
import json
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import pandas as pd
from scipy import ndimage
from skimage import measure
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering, MiniBatchKMeans
from sklearn.metrics import silhouette_score, silhouette_samples
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from scipy import ndimage
from skimage import feature, filters
import cv2

# 1. Dataset Loading

In [None]:
def load_and_preprocess_dataset(image_dir, annotation_path, num_images=50, scale_factor=8):
    with open(annotation_path, 'r') as f:
        all_annotations = json.load(f)

    image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
    
    sampled_files = np.random.choice(image_files, min(num_images, len(image_files)), replace=False)
    
    images = []
    annotations = []
    
    for img_file in sampled_files:
        img_path = os.path.join(image_dir, img_file)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        height, width = img.shape[:2]
        new_width = width // scale_factor
        new_height = height // scale_factor
        img_resized = cv2.resize(img, (new_width, new_height))
        
        images.append(img_resized)

        img_id = os.path.splitext(img_file)[0]
        if img_id in all_annotations:
            annotations.append(all_annotations[img_id])
        else:
            annotations.append(None)
    
    return images, annotations

images, annotations = load_and_preprocess_dataset('/Users/sarayetel/Desktop/UT/Data Science/DataScience/CA5&6/4_Image_Segmentation_Using_Clustering_Methods/data/images', '/Users/sarayetel/Desktop/UT/Data Science/DataScience/CA5&6/4_Image_Segmentation_Using_Clustering_Methods/data/annotations/instances_default.json')

np.save("images.npy", np.array(images, dtype=object))
with open("annotations.json", "w") as f:
    json.dump(annotations, f)

In [4]:
images = np.load("images.npy", allow_pickle=True)
with open("annotations.json", "r") as f:
    annotations = json.load(f)

# 2. Creating Features

In [None]:
def create_advanced_features(image, method='comprehensive'):
    """
    Create advanced features for clustering with multiple feature types
    
    Parameters:
    image: input image
    method: feature creation method ('comprehensive', 'texture_rich', 'edge_aware')
    
    Returns:
    features: feature matrix for clustering
    """
    height, width, channels = image.shape
    pixels = image.reshape(-1, channels) / 255.0  # Normalize to [0, 1]
    
    # Position features (always included)
    x_coords = np.repeat(np.arange(width), height) / width
    y_coords = np.tile(np.arange(height), width) / height
    
    if method == 'comprehensive':
        # Color features
        color_features = pixels
        
        # Texture features using Local Binary Patterns
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        lbp = feature.local_binary_pattern(gray, 8, 1, method='uniform')
        lbp_features = lbp.reshape(-1, 1) / lbp.max()
        
        # Edge features using Sobel filters
        sobel_x = filters.sobel_v(gray)
        sobel_y = filters.sobel_h(gray)
        edge_magnitude = np.sqrt(sobel_x**2 + sobel_y**2)
        edge_magnitude = edge_magnitude.reshape(-1, 1) / edge_magnitude.max()
        
        # Concatenate all features
        features = np.column_stack((color_features, x_coords, y_coords, 
                                   lbp_features, edge_magnitude))
        
    elif method == 'texture_rich':
        # Color features
        color_features = pixels
        
        # Multiple texture features
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # Gabor filter responses
        gabor_filters = []
        for theta in np.arange(0, np.pi, np.pi/4):
            for sigma in (1, 3):
                for frequency in (0.05, 0.25):
                    gabor = filters.gabor(gray, frequency, theta=theta, sigma_x=sigma, sigma_y=sigma)[0]
                    gabor_filters.append(gabor.reshape(-1, 1))
        
        # Local Binary Patterns
        lbp = feature.local_binary_pattern(gray, 8, 1, method='uniform')
        lbp_features = lbp.reshape(-1, 1) / lbp.max()
        
        # Concatenate all features
        texture_features = np.column_stack(gabor_filters + [lbp_features])
        features = np.column_stack((color_features, x_coords, y_coords, texture_features))
        
    elif method == 'edge_aware':
        # Color features
        color_features = pixels
        
        # Edge features with multiple methods
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # Canny edges
        canny_edges = feature.canny(gray, sigma=1).astype(float).reshape(-1, 1)
        
        # Sobel edges
        sobel_x = filters.sobel_v(gray)
        sobel_y = filters.sobel_h(gray)
        sobel_magnitude = np.sqrt(sobel_x**2 + sobel_y**2).reshape(-1, 1) / np.max(sobel_x**2 + sobel_y**2)
        
        # Laplacian edges
        laplacian = filters.laplace(gray).reshape(-1, 1) / np.max(filters.laplace(gray))
        
        # Concatenate all features
        edge_features = np.column_stack([canny_edges, sobel_magnitude, laplacian])
        features = np.column_stack((color_features, x_coords, y_coords, edge_features))
    
    else:
        raise ValueError(f"Unknown feature method: {method}")
    
    return features

# 3. Cluster Pixels

In [None]:
# ------------------------- Parameter Optimization Functions -----------------------------

def find_optimal_k_elbow(features, k_range=range(2, 11), minibatch=True, sample_size=10000):
    """
    Find optimal k using elbow method
    """
    if sample_size < len(features):
        indices = np.random.choice(len(features), sample_size, replace=False)
        sample_features = features[indices]
    else:
        sample_features = features
    
    scaler = StandardScaler()
    feats_scaled = scaler.fit_transform(sample_features)
    
    inertias = []
    for k in k_range:
        if minibatch:
            km = MiniBatchKMeans(n_clusters=k, random_state=42, batch_size=4096)
        else:
            km = KMeans(n_clusters=k, random_state=42, n_init=10)
        km.fit(feats_scaled)
        inertias.append(km.inertia_)
    
    # Calculate the elbow point (point of maximum curvature)
    inertias = np.array(inertias)
    differences = np.diff(inertias)
    second_diff = np.diff(differences)
    elbow_point = np.argmax(np.abs(second_diff)) + 2  # +2 because we lost two elements in diffs
    
    # Plot the elbow curve
    plt.figure(figsize=(10, 6))
    plt.plot(k_range, inertias, 'bo-')
    plt.xlabel('Number of clusters (k)')
    plt.ylabel('Inertia')
    plt.title('Elbow Method For Optimal k')
    plt.axvline(x=k_range[elbow_point], color='r', linestyle='--', label=f'Elbow at k={k_range[elbow_point]}')
    plt.legend()
    plt.show()
    
    return k_range[elbow_point]

def find_optimal_k_silhouette(features, k_range=range(2, 11), minibatch=True, sample_size=10000):
    """
    Find optimal k using silhouette score
    """
    if sample_size < len(features):
        indices = np.random.choice(len(features), sample_size, replace=False)
        sample_features = features[indices]
    else:
        sample_features = features
    
    scaler = StandardScaler()
    feats_scaled = scaler.fit_transform(sample_features)
    
    silhouette_scores = []
    for k in k_range:
        if minibatch:
            km = MiniBatchKMeans(n_clusters=k, random_state=42, batch_size=4096)
        else:
            km = KMeans(n_clusters=k, random_state=42, n_init=10)
        labels = km.fit_predict(feats_scaled)
        
        # Calculate silhouette score
        if k > 1:  # Silhouette score requires at least 2 clusters
            score = silhouette_score(feats_scaled, labels)
            silhouette_scores.append(score)
        else:
            silhouette_scores.append(-1)  # Invalid for k=1
    
    optimal_k = k_range[np.argmax(silhouette_scores)]
    
    # Plot silhouette scores
    plt.figure(figsize=(10, 6))
    plt.plot(k_range, silhouette_scores, 'bo-')
    plt.xlabel('Number of clusters (k)')
    plt.ylabel('Silhouette Score')
    plt.title('Silhouette Method For Optimal k')
    plt.axvline(x=optimal_k, color='r', linestyle='--', label=f'Optimal k={optimal_k}')
    plt.legend()
    plt.show()
    
    return optimal_k

def find_optimal_dbscan_params(features, eps_range=np.arange(0.1, 1.0, 0.1), 
                              min_samples_range=range(5, 50, 5), sample_size=5000):
    """
    Find optimal DBSCAN parameters using silhouette score
    """
    if sample_size < len(features):
        indices = np.random.choice(len(features), sample_size, replace=False)
        sample_features = features[indices]
    else:
        sample_features = features
    
    scaler = StandardScaler()
    feats_scaled = scaler.fit_transform(sample_features)
    
    best_score = -1
    best_eps = None
    best_min_samples = None
    
    # Reduce dimensionality for DBSCAN to improve performance
    pca = PCA(n_components=min(10, feats_scaled.shape[1]))
    feats_reduced = pca.fit_transform(feats_scaled)
    
    results = []
    
    for eps in eps_range:
        for min_samples in min_samples_range:
            db = DBSCAN(eps=eps, min_samples=min_samples, n_jobs=-1)
            labels = db.fit_predict(feats_reduced)
            
            # Only calculate silhouette score if we have at least 2 clusters
            unique_labels = np.unique(labels)
            if len(unique_labels) > 1 and len(unique_labels) < sample_size/2:
                score = silhouette_score(feats_reduced, labels)
                results.append((eps, min_samples, score))
                
                if score > best_score:
                    best_score = score
                    best_eps = eps
                    best_min_samples = min_samples
    
    print(f"Best DBSCAN parameters: eps={best_eps}, min_samples={best_min_samples}, score={best_score}")
    return best_eps, best_min_samples

def find_optimal_agglomerative_params(features, n_clusters_range=range(2, 11), 
                                     linkage_types=['ward', 'complete', 'average', 'single'],
                                     sample_size=5000):
    """
    Find optimal Agglomerative clustering parameters using silhouette score
    """
    if sample_size < len(features):
        indices = np.random.choice(len(features), sample_size, replace=False)
        sample_features = features[indices]
    else:
        sample_features = features
    
    scaler = StandardScaler()
    feats_scaled = scaler.fit_transform(sample_features)
    
    best_score = -1
    best_n_clusters = None
    best_linkage = None
    
    results = []
    
    for n_clusters in n_clusters_range:
        for linkage in linkage_types:
            # Ward linkage can only be used with Euclidean distance
            if linkage == 'ward':
                agg = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage)
            else:
                agg = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage, affinity='cosine')
            
            labels = agg.fit_predict(feats_scaled)
            
            # Calculate silhouette score
            if n_clusters > 1:  # Silhouette score requires at least 2 clusters
                score = silhouette_score(feats_scaled, labels)
                results.append((n_clusters, linkage, score))
                
                if score > best_score:
                    best_score = score
                    best_n_clusters = n_clusters
                    best_linkage = linkage
    
    print(f"Best Agglomerative parameters: n_clusters={best_n_clusters}, linkage={best_linkage}, score={best_score}")
    return best_n_clusters, best_linkage

# ------------------------- Enhanced Clustering wrappers -----------------------------

def run_kmeans_optimized(features, method='silhouette', minibatch=True, sample_size=10000):
    """
    Run KMeans with optimized parameters
    """
    # Find optimal k
    if method == 'silhouette':
        n_clusters = find_optimal_k_silhouette(features, minibatch=minibatch, sample_size=sample_size)
    else:  # elbow method
        n_clusters = find_optimal_k_elbow(features, minibatch=minibatch, sample_size=sample_size)
    
    print(f"Using k={n_clusters} for KMeans")
    
    # Scaling important for mixing different feature types
    scaler = StandardScaler()
    feats_scaled = scaler.fit_transform(features)
    
    if minibatch:
        km = MiniBatchKMeans(n_clusters=n_clusters, random_state=42, batch_size=4096)
    else:
        km = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    
    km.fit(feats_scaled)
    labels = km.labels_
    
    # Calculate final silhouette score
    if n_clusters > 1:
        score = silhouette_score(feats_scaled, labels)
        print(f"Final Silhouette Score: {score}")
    
    return labels, km, scaler, n_clusters

def run_dbscan_optimized(features, sample_size=5000):
    """
    Run DBSCAN with optimized parameters
    """
    # Find optimal parameters
    eps, min_samples = find_optimal_dbscan_params(features, sample_size=sample_size)
    
    print(f"Using eps={eps}, min_samples={min_samples} for DBSCAN")
    
    # Scaling important for mixing different feature types
    scaler = StandardScaler()
    feats_scaled = scaler.fit_transform(features)
    
    # Reduce dimensionality for better performance
    pca = PCA(n_components=min(10, feats_scaled.shape[1]))
    feats_reduced = pca.fit_transform(feats_scaled)
    
    db = DBSCAN(eps=eps, min_samples=min_samples, n_jobs=-1)
    labels = db.fit_predict(feats_reduced)
    
    # Calculate silhouette score if possible
    unique_labels = np.unique(labels)
    if len(unique_labels) > 1 and len(unique_labels) < len(features)/2:
        score = silhouette_score(feats_reduced, labels)
        print(f"Final Silhouette Score: {score}")
    
    return labels, db, scaler, (eps, min_samples)

def run_agglomerative_optimized(features, sample_size=5000):
    """
    Run Agglomerative clustering with optimized parameters
    """
    # Find optimal parameters
    n_clusters, linkage = find_optimal_agglomerative_params(features, sample_size=sample_size)
    
    print(f"Using n_clusters={n_clusters}, linkage={linkage} for Agglomerative Clustering")
    
    # Scaling important for mixing different feature types
    scaler = StandardScaler()
    feats_scaled = scaler.fit_transform(features)
    
    if linkage == 'ward':
        agg = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage)
    else:
        agg = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage, affinity='cosine')
    
    labels = agg.fit_predict(feats_scaled)
    
    # Calculate silhouette score
    if n_clusters > 1:
        score = silhouette_score(feats_scaled, labels)
        print(f"Final Silhouette Score: {score}")
    
    return labels, agg, scaler, (n_clusters, linkage)

# ------------------------- Main clustering function -----------------------------

def cluster_image_pixels(image, clustering_method='kmeans', feature_method='comprehensive', 
                        optimize_params=True, sample_size=10000):
    """
    Main function to cluster image pixels with optimized parameters
    
    Parameters:
    image: input image
    clustering_method: 'kmeans', 'dbscan', or 'agglomerative'
    feature_method: 'comprehensive', 'texture_rich', or 'edge_aware'
    optimize_params: whether to optimize parameters automatically
    sample_size: sample size for parameter optimization
    
    Returns:
    labels: cluster labels for each pixel
    model: fitted clustering model
    scaler: fitted scaler
    params: parameters used for clustering
    """
    # Create advanced features
    print("Creating features...")
    features = create_advanced_features(image, method=feature_method)
    
    # Run clustering with optimized parameters
    print(f"Running {clustering_method} clustering...")
    
    if clustering_method == 'kmeans':
        if optimize_params:
            labels, model, scaler, params = run_kmeans_optimized(
                features, method='silhouette', sample_size=sample_size)
        else:
            labels, model, scaler = run_kmeans(features)
            params = model.n_clusters
            
    elif clustering_method == 'dbscan':
        if optimize_params:
            labels, model, scaler, params = run_dbscan_optimized(
                features, sample_size=sample_size)
        else:
            labels, model, scaler = run_dbscan(features)
            params = (model.eps, model.min_samples)
            
    elif clustering_method == 'agglomerative':
        if optimize_params:
            labels, model, scaler, params = run_agglomerative_optimized(
                features, sample_size=sample_size)
        else:
            labels, model, scaler = run_agglomerative(features)
            params = (model.n_clusters, model.linkage)
            
    else:
        raise ValueError(f"Unknown clustering method: {clustering_method}")
    
    return labels, model, scaler, params

# Example usage
if __name__ == "__main__":
    # Load an image (replace with your image loading code)
    # image = cv2.imread('your_image.jpg')
    # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Example with a sample image (you'll need to replace this with your actual image)
    # For demonstration, creating a dummy image
    height, width = 100, 100
    dummy_image = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8)
    
    # Cluster with optimized parameters
    labels, model, scaler, params = cluster_image_pixels(
        dummy_image, 
        clustering_method='kmeans', 
        feature_method='comprehensive',
        optimize_params=True
    )
    
    # Reshape labels to image dimensions
    segmented_image = labels.reshape(height, width)
    
    # Visualize results
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.imshow(dummy_image)
    plt.title('Original Image')
    plt.axis('off')
    
    plt.subplot(1, 2, 2)
    plt.imshow(segmented_image, cmap='viridis')
    plt.title('Segmented Image')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# 4. Filtering and Merging

In [None]:
def filter_and_merge_clusters(segmented_img, min_size=50, max_size=5000):
    """
    Filter and merge clusters based on size and proximity
    
    Parameters:
    segmented_img: clustered image
    min_size: minimum cluster size to keep
    max_size: maximum cluster size to keep
    
    Returns:
    filtered_img: image after filtering and merging
    """
    # Label connected components
    labeled_array, num_features = ndimage.label(segmented_img)
    
    # Calculate sizes of each component
    component_sizes = np.bincount(labeled_array.ravel())
    
    # Create a mask for components that meet size criteria
    size_mask = (component_sizes >= min_size) & (component_sizes <= max_size)
    size_mask[0] = False  # Background component
    
    # Apply size filter
    filtered_labels = np.zeros_like(labeled_array)
    for label in range(1, num_features + 1):
        if size_mask[label]:
            filtered_labels[labeled_array == label] = label
    
    # Merge nearby clusters based on proximity
    # This is a simplified approach - you might need a more sophisticated method
    props = measure.regionprops(filtered_labels)
    
    # Create a new image with merged clusters
    merged_img = np.zeros_like(filtered_labels)
    current_label = 1
    
    for prop in props:
        # Get bounding box of the region
        min_row, min_col, max_row, max_col = prop.bbox
        
        # Check if this region is close to any existing region in merged_img
        found_nearby = False
        for existing_label in range(1, current_label):
            existing_region = merged_img == existing_label
            if np.any(existing_region):
                # Check if regions are close (simplified distance check)
                existing_props = measure.regionprops(existing_region.astype(int))
                if existing_props:
                    ex_min_row, ex_min_col, ex_max_row, ex_max_col = existing_props[0].bbox
                    
                    # Calculate distance between bounding boxes
                    vertical_dist = max(0, max(min_row, ex_min_row) - min(max_row, ex_max_row))
                    horizontal_dist = max(0, max(min_col, ex_min_col) - min(max_col, ex_max_col))
                    
                    if vertical_dist < 20 and horizontal_dist < 20:  # Threshold for merging
                        merged_img[filtered_labels == prop.label] = existing_label
                        found_nearby = True
                        break
        
        if not found_nearby:
            merged_img[filtered_labels == prop.label] = current_label
            current_label += 1
    
    return merged_img

# Usage example:
# filtered_img = filter_and_merge_clusters(segmented_img, min_size=100, max_size=5000)
# plt.imshow(filtered_img, cmap='nipy_spectral')
# plt.show()

# 5. Even More Clustering!

In [None]:
def create_binary_mask_and_centroids(filtered_img):
    """
    Create binary mask and compute centroids of connected components
    
    Parameters:
    filtered_img: filtered and merged cluster image
    
    Returns:
    binary_mask: binary mask where 1 represents potential players
    centroids: list of centroid coordinates for each component
    """
    # Create binary mask (non-zero values become 1)
    binary_mask = (filtered_img > 0).astype(np.uint8)
    
    # Find connected components
    labeled_mask, num_features = ndimage.label(binary_mask)
    
    # Calculate centroids
    centroids = []
    props = measure.regionprops(labeled_mask)
    
    for prop in props:
        centroids.append(prop.centroid)
    
    # Visualize results
    plt.figure(figsize=(12, 6))
    
    plt.subplot(1, 2, 1)
    plt.imshow(binary_mask, cmap='gray')
    plt.title('Binary Mask')
    
    plt.subplot(1, 2, 2)
    plt.imshow(labeled_mask, cmap='nipy_spectral')
    for centroid in centroids:
        plt.plot(centroid[1], centroid[0], 'ro', markersize=5)
    plt.title('Components with Centroids')
    
    plt.tight_layout()
    plt.show()
    
    return binary_mask, centroids

# Usage example:
# binary_mask, centroids = create_binary_mask_and_centroids(filtered_img)

# 6. Evaluation

In [None]:
def load_ground_truth_masks(annotations, image_shape):
    """
    Load ground truth masks from annotations
    
    Parameters:
    annotations: annotation data for an image
    image_shape: shape of the image (height, width)
    
    Returns:
    gt_mask: ground truth binary mask
    """
    if annotations is None:
        return np.zeros(image_shape[:2], dtype=np.uint8)
    
    height, width = image_shape[:2]
    gt_mask = np.zeros((height, width), dtype=np.uint8)
    
    # Draw polygons for each player
    for player in annotations.get('players', []):
        if 'segmentation' in player and player['segmentation']:
            # Convert polygon coordinates to image scale
            polygon = np.array(player['segmentation']).reshape(-1, 2)
            polygon[:, 0] = polygon[:, 0] * width / 1920  # Original width is 1920
            polygon[:, 1] = polygon[:, 1] * height / 1080  # Original height is 1080
            
            # Draw filled polygon
            polygon = polygon.astype(np.int32)
            cv2.fillPoly(gt_mask, [polygon], 1)
    
    return gt_mask

def calculate_iou(mask1, mask2):
    """
    Calculate Intersection over Union (IoU) between two masks
    """
    intersection = np.logical_and(mask1, mask2).sum()
    union = np.logical_or(mask1, mask2).sum()
    
    if union == 0:
        return 0
    
    return intersection / union

def calculate_dice(mask1, mask2):
    """
    Calculate Dice coefficient between two masks
    """
    intersection = np.logical_and(mask1, mask2).sum()
    
    if (mask1.sum() + mask2.sum()) == 0:
        return 0
    
    return 2 * intersection / (mask1.sum() + mask2.sum())

def evaluate_segmentation(images, annotations, pred_masks):
    """
    Evaluate segmentation results against ground truth
    
    Parameters:
    images: list of images
    annotations: list of annotations
    pred_masks: list of predicted binary masks
    
    Returns:
    results: DataFrame with evaluation metrics for each image
    """
    results = []
    
    for i, (img, ann, pred_mask) in enumerate(zip(images, annotations, pred_masks)):
        # Load ground truth mask
        gt_mask = load_ground_truth_masks(ann, img.shape)
        
        # Calculate metrics
        iou = calculate_iou(pred_mask, gt_mask)
        dice = calculate_dice(pred_mask, gt_mask)
        
        results.append({
            'image_id': i,
            'iou': iou,
            'dice': dice
        })
    
    return pd.DataFrame(results)

# Usage example:
# Assuming we have images, annotations, and pred_masks from previous steps
# results_df = evaluate_segmentation(images, annotations, pred_masks)
# print(results_df.mean())

# Complete Pipeline

In [None]:
def complete_pipeline(image_dir, annotation_path, num_images=10):
    """
    Complete segmentation pipeline
    """
    # Step 1: Load and preprocess dataset
    print("Step 1: Loading and preprocessing dataset...")
    images, annotations = load_and_preprocess_dataset(image_dir, annotation_path, num_images)
    
    # Step 2: Create features
    print("Step 2: Creating features...")
    features = create_features(images, method='color_position')
    
    # Steps 3-5: Cluster, filter, and create binary masks
    pred_masks = []
    for i, (img, feat) in enumerate(zip(images, features)):
        print(f"Processing image {i+1}/{len(images)}")
        
        # Step 3: Cluster pixels
        labels = cluster_pixels(feat, method='kmeans', n_clusters=7)
        segmented_img = labels.reshape(img.shape[0], img.shape[1])
        
        # Step 4: Filter and merge clusters
        filtered_img = filter_and_merge_clusters(segmented_img, min_size=100, max_size=5000)
        
        # Step 5: Create binary mask and centroids
        binary_mask, centroids = create_binary_mask_and_centroids(filtered_img)
        pred_masks.append(binary_mask)
    
    # Step 7: Evaluation
    print("Step 7: Evaluating results...")
    results_df = evaluate_segmentation(images, annotations, pred_masks)
    
    print("Evaluation Results:")
    print(f"Mean IoU: {results_df['iou'].mean():.4f}")
    print(f"Mean Dice: {results_df['dice'].mean():.4f}")
    
    return results_df

# Usage example:
# results = complete_pipeline('path/to/images', 'path/to/annotations.json')

Questions (short answers)


1) Provide an example of a segmentation task where each type is appropriate:


- Semantic segmentation: Satellite imagery land-use classification. Each pixel
should be labeled 'water', 'forest', 'urban', etc. We don't care about
individual instances of 'tree' or 'building' — only the class matters.


- Instance segmentation: Autonomous driving pedestrian detection where you
need to separate each pedestrian as a distinct instance (for counting and
tracking). Here we need per-instance masks.


- Panoptic segmentation: Robotic scene understanding in a cluttered indoor
environment where you need both class labels for stuff (floor, wall) and
individual instance IDs for things (mug, laptop). Panoptic combines both.


2) Dice vs IoU:


- Dice = 2 * (|A ∩ B|) / (|A| + |B|) and IoU = |A ∩ B| / |A ∪ B|.
- Dice tends to be a bit more forgiving for small objects because of the
2*intersection term; IoU penalizes false positives/negatives more strictly.
- For unbalanced masks with small objects, Dice can show higher sensitivity.
IoU is stricter and often preferred for segmentation benchmarks.


3) Autoencoders for clustering images:


- Autoencoders learn a compact latent representation of input images. Train
an autoencoder on the image set; use the encoder's bottleneck outputs as
fixed-length vectors for each image. Then apply clustering (e.g., KMeans)
on these latents instead of raw pixels.
- Advantages: lower dimensionality, noise suppression, features are learned
to capture useful structure. This dramatically speeds up clustering and
yields better cluster separation than clustering on raw pixels or high-dim
vectors.