In [None]:
import os

tif_directory = "C:/Users/KRIDAY PARMAR/Downloads/prospace_assignment/prospace_assignment"

tif_files = [file for file in os.listdir(tif_directory) if file.endswith('.tif')]

num_tif_images = len(tif_files)

print("Number of TIF images:", num_tif_images)


In [None]:
import os
import numpy as np
import cv2
import rasterio
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models


def read_tif_image(file_path):
    with rasterio.open(file_path) as src:
        # Read each band separately
        bands = [src.read(band_idx) for band_idx in range(1, src.count + 1)]
    return bands

def preprocess_image(image, target_size):
    resized_image = cv2.resize(image, target_size)
    resized_image = resized_image / 255.0
    return resized_image


tif_directory = "C:/Users/KRIDAY PARMAR/Downloads/prospace_assignment/prospace_assignment"


tif_files = [os.path.join(tif_directory, file) for file in os.listdir(tif_directory) if file.endswith('.tif')]

images = [read_tif_image(file) for file in tif_files]
target_size = (20, 20)  # Target size for resizing

# Resize each band and stack them to form a single image
resized_images = []
for image in images:
    resized_bands = [preprocess_image(band, target_size) for band in image]
    resized_image = np.stack(resized_bands, axis=-1)  # Stack bands along the last dimension
    resized_images.append(resized_image)

# Convert the list of resized images to a numpy array
resized_images = np.array(resized_images)

# Define the CNN model
def create_cnn_model(input_shape):
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu')
    ])
    return model

input_shape = (target_size[0], target_size[1], resized_images.shape[3])  

# Create the CNN model
model = create_cnn_model(input_shape)
model.summary()
 
# Extract features from each image
feature_vectors = []
for image in resized_images:
    features = model.predict(np.expand_dims(image, axis=0))
    features = features.flatten()
    feature_vectors.append(features)


feature_vectors = np.array(feature_vectors)

print("Shape of feature vectors:", feature_vectors.shape)


In [None]:
from sklearn.cluster import KMeans


num_clusters = 3

kmeans = KMeans(n_clusters=num_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(feature_vectors)

In [None]:
file_cluster_mapping = dict(zip(tif_files, cluster_labels))

for tif_file, cluster_label in file_cluster_mapping.items():
    print(f"TIF File: {tif_file} | Cluster Label: {cluster_label}")

In [None]:
from collections import Counter
cluster_counts = Counter(cluster_labels)
for cluster_label, count in cluster_counts.items():
    print(f"Cluster {cluster_label }: {count} points")


In [None]:
import random
import matplotlib.pyplot as plt

random_indices = random.sample(range(len(tif_files)), 5)  
random_images = [resized_images[i] for i in random_indices]
random_cluster_labels = [cluster_labels[i] for i in random_indices]


fig, axes = plt.subplots(nrows=1, ncols=len(random_images), figsize=(15, 5))

for i, (image, cluster_label) in enumerate(zip(random_images, random_cluster_labels)):
    axes[i].imshow(image)
    axes[i].set_title(f'Cluster {cluster_label }')

plt.tight_layout()
plt.show()


In [None]:
model.save("prospace.h5")

In [None]:
import pickle
with open("cluster.pkl", "wb") as f:
    pickle.dump(cluster_labels, f)