In [1]:
# Importing the necessary libraries
from osgeo import gdal
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
import sklearn
import scipy as sp
from sklearn.neighbors import NearestNeighbors
from kneed import KneeLocator

In [2]:
# Calculating Cumulative Count Cut
def scaleCCC(x):
  return((x - np.nanpercentile(x, 2))/(np.nanpercentile(x, 98) - np.nanpercentile(x,2)))

def readAsArray(img):
  # Extracting individual bands(4,8,12) from the uploaded images
  band4 = img.GetRasterBand(1).ReadAsArray()
  band8 = img.GetRasterBand(2).ReadAsArray()
  band12 = img.GetRasterBand(3).ReadAsArray()
  # Creating a new image by combining the three bands
  return(np.dstack((band4, band8, band12)))

def applyCCC(img):
  # Extracting individual bands(4,8,12) from the uploaded images
  band4 = img.GetRasterBand(1).ReadAsArray()
  band8 = img.GetRasterBand(2).ReadAsArray()
  band12 = img.GetRasterBand(3).ReadAsArray()
  ccc4 = scaleCCC(band4)
  ccc8 = scaleCCC(band8)
  ccc12 = scaleCCC(band12)
  # Creating a new image by combining the three bands
  return(np.dstack((ccc4,ccc8,ccc12)))

In [4]:
# Reshaping the images
def reshape_images():
  reshaped_images=[]
  for i in range(len(images)):
    new_shape = (images[i].shape[0] * images[i].shape[1], images[i].shape[2])
    new_image = images[i][:, :, :3].reshape(new_shape)
    reshaped_images.append(new_image)

  # Concatenating all the arrays
  combined_image = reshaped_images[0]
  for i in range(1, len(reshaped_images)):
    combined_image = np.concatenate((combined_image, reshaped_images[i]), axis=0)

  # Removing nan values
  train_image = np.nan_to_num(combined_image)
  return train_image

In [5]:
def plot_image(plt_img):
  plt.figure()
  plt.imshow(plt_img)
  plt.show()

In [6]:
def dbscan(model, trn_img):
	return model.fit(trn_img)

In [7]:
for i in range(2,3):
  images_list=[str(i)+ "_P.tif", str(i) +"_A.tif"]
  images = []
  for img_name in images_list:
    img = gdal.Open('./ALL_FLOOD/'+ img_name)
    modified_img = readAsArray(img)
    images.append(modified_img)
  train_image = reshape_images()

In [9]:
import csv
for k in range (20,101,10):
    nearest_neighbors = NearestNeighbors(n_neighbors=k)
    neighbors = nearest_neighbors.fit(train_image)
    distances, indices = neighbors.kneighbors(train_image)
    distances = np.sort(distances[:,k-1], axis=0)
    i = np.arange(len(distances))
    knee = KneeLocator(i, distances, S=1, curve='convex', direction='increasing', interp_method='polynomial')
    epsilon = distances[knee.knee]
    epsilon -=0.005
    max_epsilon = epsilon+0.01
    while (epsilon<=max_epsilon):
        dbscan_model = DBSCAN(eps=epsilon, min_samples = k)
        dbscan_model = dbscan(dbscan_model, train_image)
        core_samples_mask = np.zeros_like(dbscan_model.labels_, dtype=bool)
        core_samples_mask[dbscan_model.core_sample_indices_] = True
        labels = dbscan_model.labels_

        # Number of clusters in labels, ignoring noise if present.
        n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
        n_noise = list(labels).count(-1)
        with open('myfile.csv', 'a', newline='') as f:
            writer = csv.writer(f)
            print([k,epsilon,n_clusters,n_noise])
            writer.writerow([k,epsilon, n_clusters,n_noise])
        epsilon+=0.001    

[20, 0.00016139202213211797, 1, 334422]
[20, 0.001161392022132118, 145, 285482]
[20, 0.002161392022132118, 206, 133168]
[20, 0.003161392022132118, 78, 69252]
[20, 0.004161392022132118, 67, 42692]
[20, 0.005161392022132118, 29, 26620]
[20, 0.006161392022132118, 24, 18512]
[20, 0.007161392022132118, 11, 13898]
[20, 0.008161392022132119, 9, 11246]
[20, 0.00916139202213212, 12, 9307]
[30, 0.0009413770525725125, 7, 303891]
[30, 0.0019413770525725125, 149, 204204]
[30, 0.0029413770525725125, 57, 98979]
[30, 0.0039413770525725126, 24, 60100]
[30, 0.004941377052572513, 25, 39787]
[30, 0.005941377052572513, 18, 25877]
[30, 0.006941377052572513, 11, 18932]
[30, 0.007941377052572512, 9, 14478]
[30, 0.008941377052572513, 4, 11749]
[30, 0.009941377052572514, 8, 10081]
[40, 0.0015543833021657797, 46, 282218]
[40, 0.0025543833021657797, 57, 153654]
[40, 0.0035543833021657797, 22, 84099]
[40, 0.00455438330216578, 17, 54957]
[40, 0.00555438330216578, 14, 38096]
[40, 0.00655438330216578, 12, 25545]
[40,

In [20]:
core_samples_mask = np.zeros_like(dbscan_model.labels_, dtype=bool)
core_samples_mask[dbscan_model.core_sample_indices_] = True
labels = dbscan_model.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)

Estimated number of clusters: 1
Estimated number of noise points: 13049


In [None]:
# Calculating water percentage for before image

arr_before = np.unique(dbscan_before_labels, return_counts=True)
key_before, val_before = arr_before
water_before = val_before[2]/(val_before[0] + val_before[1] + val_before[2] + val_before[3] + val_before[4]) * 100
print("%.2f" %water_before, "%")

8.99 %


In [None]:
# Calculating water percentage for after image
arr_after = np.unique(dbscan_after_labels, return_counts=True)
key_after, val_after = arr_after
water_after = val_after[2]/(val_after[0] + val_after[1] + val_after[2] + val_after[3]) * 100
print("%.2f" %water_after, "%")

45.51 %


In [None]:
# Calculating Flood Increase

flood_increase = water_after - water_before
print("%.2f" %flood_increase, "%")

36.52 %


In [24]:
# Calculating silhouette score for comparision

sklearn.metrics.silhouette_score(X_after, dbscan_after_labels)

0.36859936