# CLUSTERING

------------------------------------------

## Common Utils

In [1]:
import sys
from constants import *
from common import *
from metrics import *

sys.path.append("data")
from processing import *
from data.constants import *
from data.utils import load_data_path

## Load Data

In [2]:
import os

data = load_data_path()
plot_results = False

directory_path = os.path.join(EXPERIMENT_PATH, KMEANS)
os.makedirs(directory_path, exist_ok=True)

## Clustering

In [3]:
from sklearn.exceptions import ConvergenceWarning
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
from tqdm import TqdmWarning

simplefilter("ignore", category=ConvergenceWarning)
simplefilter("ignore", category=UserWarning)
simplefilter("ignore", category=TqdmWarning)


### Experiments

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from PIL import Image

num_clusters = 3
default_channels = 1
kmeans = KMeans(n_clusters=num_clusters, n_init='auto', random_state=SEED)

for drone, drone_dir in data.items():
    
    target_label = 2
    
    print(f"- Processing drone: {drone}")
    
    drone_dir_path = os.path.join(directory_path, drone)
    os.makedirs(drone_dir_path, exist_ok=True)
    
    for image_dir in drone_dir:  
        print(f"-- Processing image directory: {image_dir}")
        image_dir_path = os.path.join(drone_dir_path, image_dir)
        os.makedirs(image_dir_path, exist_ok=True)
        
        tiles_paths = data[drone][image_dir][TILE]
        
        for tile_path in tiles_paths:
            file_name = os.path.basename(tile_path)
            print(f"--- Processing tile: {file_name}")    
            image = Image.open(tile_path)
            image_array = np.array(image)
            
            shape = image_array.shape
            height, width = shape[:2]
            try:
                channels = shape[2]
            except IndexError:
                channels = default_channels
                target_label = 1
            
            flattened_image_array = image_array.reshape((height * width, channels))
            
            # Apply K-means clustering
            kmeans.fit(flattened_image_array)
            
            # Get labels for each pixel
            labels = kmeans.predict(flattened_image_array)
            # Reshape labels to match the original image shape
            labels = labels.reshape((height, width))
            
            # Save only the green cluster (assuming it's labeled as [target_label])
            green_segment = (labels == target_label) * 255  # Multiply by 255 to convert boolean to integer (0 or 255)

            # Create a PIL image from the segmented green pixels
            green_image = Image.fromarray(green_segment.astype(np.uint8))
            
            # Save the green segmented image
            path = os.path.join(directory_path, drone, image_dir, file_name)
            green_image.save(path)
            
            if plot_results:
                plt.figure(figsize=(8, 6))
                plt.imshow(green_segment, cmap='Greens')  # Adjust the colormap based on your preference
                plt.axis('off')
                plt.title('Green Pixels')
                plt.show()


In [None]:
# delete_experiment_files(KMEANS)

### Evaluation

In [10]:
import torch
from torchvision import transforms

files = os.listdir(directory_path)  # Update the path to 'directory_path' instead of 'KMEANS'
drone_dirs = os.listdir(directory_path)  # Update the path to 'directory_path' instead of 'KMEANS'
drone_dirs.sort()

# Define transformations to convert to a tensor
transform = transforms.Compose([
    transforms.ToTensor(),  # Converts PIL image or numpy.ndarray to a tensor
])

miou = {}

for drone_dir in drone_dirs:
    
    miou[drone_dir] = {}
    print(f"\n\n{drone_dir}\n\n")
    
    drone_dir_path = os.path.join(directory_path, drone_dir)
    images_dir = os.listdir(drone_dir_path)
    images_dir.sort()
    
    for image_dir in images_dir:
        
        miou[drone_dir][image_dir] = 0
        
        ground_truths = data[drone_dir][image_dir][GROUND_TRUTH]
        
        image_dir_path = os.path.join(drone_dir_path, image_dir)
        sub_dirs = os.listdir(image_dir_path)
        sub_dirs.sort()
        
        for file_name, ground_truth_path in zip(sub_dirs, ground_truths):
            
            image_path = os.path.join(directory_path, drone_dir, image_dir, file_name)
            output = resize_image(file_path=image_path)
            output = convert_to_black_and_white(output, False, 128)
            outputs = transform(output)  # Example predicted masks

            if plot_results:
                plt.imshow(output)
                plt.axis('off')
                plt.show()

            gt = Image.open(ground_truth_path)
            gt = convert_to_black_and_white(image=gt, save_results=False, threshold=128)
            labels = transform(gt)

            if plot_results:
                plt.imshow(gt)
                plt.axis('off')
                plt.show()
                print("Output shape:", outputs.shape)
                print("Label shape:", labels.shape)


            num_classes = 2  # Number of classes (including background)

            miou[drone_dir][image_dir] += calculate_miou(outputs, labels, num_classes)
        miou[drone_dir][image_dir] /= len(ground_truths)
        print(f"Mean IoU {image_dir}:", miou[drone_dir][image_dir])
            
        print("------------------------")
    print("\n\n")



RedEdge


Mean IoU 000: 0.7093120671437876
------------------------
Mean IoU 001: 0.7071644769642841
------------------------
Mean IoU 002: 0.6255108296043344
------------------------
Mean IoU 003: 0.7312779810790923
------------------------
Mean IoU 004: 0.5891863790332762
------------------------





Sequoia


Mean IoU 005: 0.6395362356355797
------------------------
Mean IoU 006: 0.6333282222150869
------------------------
Mean IoU 007: 0.6111489676655485
------------------------





In [74]:
miou[drone_dir][image_dir]

{}