In [9]:
import numpy as np
import os
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from osgeo import gdal
from sklearn.cluster import MiniBatchKMeans


In [14]:
def load_and_preprocess_image(image_path):
    sub_image = gdal.Open(image_path)
    NDVI = sub_image.GetRasterBand(1)
    NDVI_array = NDVI.ReadAsArray()
    return np.nan_to_num(NDVI_array, nan=0.01).flatten()

def load_images_from_folder(folder):
    all_pixels = []
    image_shapes = []
    for filename in os.listdir(folder):
        if filename.endswith(".tif"):
            img_path = os.path.join(folder, filename)
            pixels = load_and_preprocess_image(img_path)
            all_pixels.append(pixels)
            image_shapes.append(gdal.Open(img_path).ReadAsArray().shape)
    return np.concatenate(all_pixels), image_shapes


def cluster_global_data(pixels, optimal_k):
    # Use MiniBatchKMeans for clustering
    kmeans = MiniBatchKMeans(n_clusters=optimal_k, random_state=0, batch_size=10000)  # Adjust the batch_size as needed
    kmeans.fit(pixels.reshape(-1, 1))
    return kmeans, kmeans.labels_


def assign_clusters_to_images(global_labels, image_shapes):
    start = 0
    clustered_images = []
    for shape in image_shapes:
        size = shape[0] * shape[1]
        labels = global_labels[start:start + size].reshape(shape)
        clustered_images.append(labels)
        start += size
    return clustered_images

def calculate_cluster_percentages(labels):
    unique, counts = np.unique(labels, return_counts=True)
    total_pixels = labels.size
    percentages = {k: (count / total_pixels) * 100 for k, count in zip(unique, counts)}
    return percentages

def print_cluster_value_ranges(kmeans, pixels):
    for i, center in enumerate(kmeans.cluster_centers_):
        cluster_pixels = pixels[kmeans.labels_ == i]
        print(f'Cluster {i}: range ({cluster_pixels.min():.3f} - {cluster_pixels.max():.3f})')


In [26]:
# Path to your folder containing the images
folder_path = 'C:\\Users\\Musae\\Documents\\GitHub-REPOs\\Senior-project-main\\Docs\\sub from all'


In [27]:
# Load and concatenate pixels from all images
all_pixels, image_shapes = load_images_from_folder(folder_path)

# Optimal number of clusters
optimal_k = 5

# Perform global clustering
kmeans, global_labels = cluster_global_data(all_pixels, optimal_k)

# Calculate and print cluster percentages
percentages = calculate_cluster_percentages(global_labels)
for cluster, percentage in percentages.items():
    print(f"Cluster {cluster}: {percentage:.2f}%")

# Print cluster value ranges
print_cluster_value_ranges(kmeans, all_pixels.reshape(-1, 1))

# Assign clusters back to original images
clustered_images = assign_clusters_to_images(global_labels, image_shapes)

  super()._check_params_vs_input(X, default_n_init=3)


Cluster 0: 13.95%
Cluster 1: 1.22%
Cluster 2: 73.33%
Cluster 3: 5.30%
Cluster 4: 6.20%
Cluster 0: range (0.029 - 0.049)
Cluster 1: range (0.295 - 0.986)
Cluster 2: range (0.049 - 0.112)
Cluster 3: range (0.112 - 0.295)
Cluster 4: range (-0.995 - 0.029)


first 30 images

Cluster 0: 1.04%
Cluster 1: 68.01%
Cluster 2: 27.39%
Cluster 3: 3.55%

Cluster 0: range (0.321 - 0.986)
Cluster 1: range (0.054 - 0.133)
Cluster 2: range (-0.995 - 0.054)
Cluster 3: range (0.133 - 0.321)

last 30 images

Cluster 0: 48.41%
Cluster 1: 2.95%
Cluster 2: 47.74%
Cluster 3: 0.90%
Cluster 0: range (-0.590 - 0.055)
Cluster 1: range (0.135 - 0.308)
Cluster 2: range (0.055 - 0.135)
Cluster 3: range (0.308 - 0.885)

normal - 2 images

Cluster 0: 61.14%
Cluster 1: 1.24%
Cluster 2: 4.86%
Cluster 3: 32.75%

Cluster 0: range (0.064 - 0.143)
Cluster 1: range (0.332 - 0.919)
Cluster 2: range (0.143 - 0.332)
Cluster 3: range (-0.860 - 0.064)

MiniBatchKMeans - 2 images

Cluster 0: 55.57%
Cluster 1: 1.14%
Cluster 2: 39.27%
Cluster 3: 4.02%
Cluster 0: range (0.068 - 0.154)
Cluster 1: range (0.345 - 0.919)
Cluster 2: range (-0.860 - 0.068)
Cluster 3: range (0.154 - 0.345)



Cluster 0: 35.76%
Cluster 1: 4.54%
Cluster 2: 58.49%
Cluster 3: 1.20%

Cluster 0: range (-0.860 - 0.063)
Cluster 1: range (0.143 - 0.330)
Cluster 2: range (0.063 - 0.143)
Cluster 3: range (0.330 - 0.919)