In [None]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from skimage import io
import os

In [None]:
final_csv_path = 'Outout Information Extracted from Geometrical Distortion.ipynb'
final_csv_df = pd.read_csv(final_csv_path)

def calculate_wcss(data, max_clusters=100):
    wcss = []
    for n in range(1, max_clusters + 1): =
        kmeans = KMeans(n_clusters=n, init='k-means++', max_iter=300, n_init=10, random_state=0)
        kmeans.fit(data)
        wcss.append(kmeans.inertia_)
    return wcss

def calculate_bounding_boxes_with_padding(label_image, n_clusters, padding=5):
    bounding_boxes = {}
    for i in range(n_clusters):
        rows, cols = np.where(label_image == i)
        if len(rows) == 0 or len(cols) == 0:
            continue
        y_min, y_max = rows.min() - padding, rows.max() + padding
        x_min, x_max = cols.min() - padding, cols.max() + padding
        y_min = max(y_min, 0)
        y_max = min(y_max, label_image.shape[0] - 1)
        x_min = max(x_min, 0)
        x_max = min(x_max, label_image.shape[1] - 1)

        bounding_boxes[i] = (slice(y_min, y_max), slice(x_min, x_max))
    return bounding_boxes

def extract_patches(image, bounding_boxes):
    patches = {}
    for cluster, bbox in bounding_boxes.items():
        patches[cluster] = image[bbox]
    return patches

def process_image(image_path, final_csv_df, n_clusters, image_id, output_dir):
    image = io.imread(image_path)
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(
        final_csv_df[['X Coordinate (Moving)', 'Y Coordinate (Moving)']].values,
        sample_weight=final_csv_df['Magnitude'].values
    )

    image_shape = image.shape[:2]
    x_coords, y_coords = np.meshgrid(np.arange(image_shape[1]), np.arange(image_shape[0]))
    all_coords = np.c_[y_coords.ravel(), x_coords.ravel()]
    pixels_labels = kmeans.predict(all_coords)
    label_image = pixels_labels.reshape(image_shape)
    bounding_boxes_with_padding = calculate_bounding_boxes_with_padding(label_image, n_clusters, padding=0)
    patches_with_padding = extract_patches(image, bounding_boxes_with_padding)

    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Save each patch
    for cluster, patch in patches_with_padding.items():
        patch_filename = f'patch_img{image_id}_cluster_{cluster}.tif'
        patch_path = os.path.join(output_dir, patch_filename)
        io.imsave(patch_path, patch)

    print(f"Finished processing image {image_id}. Patches saved to {output_dir}.")

# Calculate WCSS and determine the optimal number of clusters
wcss = calculate_wcss(final_csv_df[['X Coordinate (Moving)', 'Y Coordinate (Moving)']].values, max_clusters=10)
n_clusters = np.argmax(np.diff(np.diff(wcss))) + 2  # Finding the "elbow" point

print(f"Optimal number of clusters: {n_clusters}")

# Your paths to images
fixed_image_paths = ['Fixed Image']
moving_image_paths = ['Registered Image']
output_dir = './'

# Process each image
image_id = 1
for path in fixed_image_paths + moving_image_paths:
    process_image(path, final_csv_df, n_clusters, image_id, output_dir)
    image_id += 1