In [11]:
# Importing the necessary libraries
from osgeo import gdal
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
import sklearn
import scipy as sp
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from kneed import KneeLocator

In [12]:
def readAsArray(img):
  band1 = img.GetRasterBand(1).ReadAsArray()
  return(band1)

In [13]:
def process_image(img):
    data = img.ReadAsArray()
    data = np.moveaxis(data, 0, -1)
    rows, cols, bands = data.shape
    data_2d = data.reshape(rows*cols, bands)
    data_2d_scaled = scaler.fit_transform(data_2d)
    data_2d_scaled = np.nan_to_num(data_2d_scaled)
    return data_2d_scaled

In [14]:
# Defining Plot Function

def plot_image(plt_img):
  plt.figure()
  plt.imshow(plt_img)
  plt.show()

In [None]:
# Running the DBSCAN Algorithm
dbscan_model = DBSCAN(eps=0.057694701, min_samples=40)
scaler = StandardScaler()
all_images=[]
first = True
first2 = True

for i in range(2,5):
    # List of images
    images_list=[str(i)+ "_P_256.tif", str(i) +"_A_256.tif"]
    images = []
    # Opening the satellite images
    for img_name in images_list:
        img = gdal.Open('./ALL_FLOOD_256/'+ img_name)
        data_2d_scaled = process_image(img)

        modified_img = readAsArray(img)
        modified_img = modified_img.reshape((modified_img.shape[0], modified_img.shape[1], 1))
        images.append(modified_img)

        if(first):
            train_image=data_2d_scaled
            first = False
        else:
            train_image = np.concatenate([data_2d_scaled, train_image], axis=0)

    all_images.extend(images)
    if (first2):
        combined_data = train_image
    else:
        combined_data = np.concatenate([combined_data, train_image], axis=0)

    dbscan_model.fit(combined_data)
    print(dbscan_model.labels_.shape)

In [17]:
# Opening the test images
imgP = gdal.Open("./ALL_FLOOD_256/1_P_256.tif")
imgA = gdal.Open("./ALL_FLOOD_256/1_A_256.tif")

before_img = readAsArray(imgP)
after_img = readAsArray(imgA)

data_2d_scaled_before = process_image(imgP)
data_2d_scaled_after = process_image(imgA)

In [10]:
import csv
# headerList = ["Min Samples","Epsilon","No. of Before Clusters", "No. of After Clusters", "No. of Before Noise points", "No. of After Noise points"]

# with open('./myfile.csv', 'a', newline='') as f:
#     writer = csv.DictWriter(f,fieldnames=headerList)
#     writer.writeheader()
    
for k in range (50,301,10):
    nearest_neighbors = NearestNeighbors(n_neighbors=k)
    neighbors = nearest_neighbors.fit(train_image)
    distances, indices = neighbors.kneighbors(train_image)
    distances = np.sort(distances[:,k-1], axis=0)
    i = np.arange(len(distances))
    knee = KneeLocator(i, distances, S=1, curve='convex', direction='increasing', interp_method='polynomial')
    epsilon = distances[knee.knee]
    epsilon -=0.005
    max_epsilon = epsilon+0.011

    while (epsilon<=max_epsilon):

        dbscan_model = DBSCAN(eps=epsilon, min_samples = k)
        dbscan_model = dbscan(dbscan_model, train_image)

        dbscan_before_labels = dbscan_model.fit_predict(X_before)
        core_samples_mask_before = np.zeros_like(dbscan_before_labels, dtype=bool)
        core_samples_mask_before[dbscan_model.core_sample_indices_] = True
        labels_before = dbscan_before_labels
        n_clusters_before = len(set(labels_before)) - (1 if -1 in labels_before else 0)
        n_noise_before = list(labels_before).count(-1)

        dbscan_after_labels = dbscan_model.fit_predict(X_after)
        core_samples_mask_after = np.zeros_like(dbscan_after_labels, dtype=bool)
        core_samples_mask_after[dbscan_model.core_sample_indices_] = True
        labels_after = dbscan_after_labels
        n_clusters_after = len(set(labels_after)) - (1 if -1 in labels_after else 0)
        n_noise_after = list(labels_after).count(-1)

        with open('tuning.csv', 'a', newline='') as f:
            writer = csv.writer(f)
            print([k, epsilon, n_clusters_before, n_noise_before, n_clusters_after, n_noise_after])
            writer.writerow([k, epsilon, n_clusters_before, n_noise_before, n_clusters_after, n_noise_after])
        epsilon+=0.001

[220, 0.012935662311220567, 20, 36485, 28, 45821]
[220, 0.013935662311220568, 12, 33830, 17, 40633]
[220, 0.014935662311220569, 13, 30069, 14, 36595]
[220, 0.01593566231122057, 9, 27958, 9, 34711]
[220, 0.01693566231122057, 6, 26583, 10, 32148]
[220, 0.01793566231122057, 7, 24581, 10, 30603]
[220, 0.018935662311220573, 8, 23131, 9, 28839]
[220, 0.019935662311220573, 10, 21932, 6, 28053]
[220, 0.020935662311220574, 8, 20815, 6, 27305]
[220, 0.021935662311220575, 7, 20129, 9, 25549]
[230, 0.013766106763740352, 14, 35935, 19, 44502]
[230, 0.014766106763740353, 12, 32919, 15, 39646]
[230, 0.015766106763740354, 8, 29944, 10, 36681]
[230, 0.016766106763740355, 9, 27902, 8, 34409]
[230, 0.017766106763740356, 7, 26288, 9, 32099]
[230, 0.018766106763740357, 7, 24536, 9, 30603]
[230, 0.019766106763740358, 6, 23677, 8, 29058]
[230, 0.02076610676374036, 9, 22114, 7, 28033]
[230, 0.02176610676374036, 8, 20817, 6, 27290]
[230, 0.02276610676374036, 7, 20085, 7, 26170]
[240, 0.014609910339315028, 14, 

KeyboardInterrupt: 

In [20]:
core_samples_mask = np.zeros_like(dbscan_model.labels_, dtype=bool)
core_samples_mask[dbscan_model.core_sample_indices_] = True
labels = dbscan_model.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)

Estimated number of clusters: 1
Estimated number of noise points: 13049


In [None]:
# Calculating water percentage for before image

arr_before = np.unique(dbscan_before_labels, return_counts=True)
key_before, val_before = arr_before
water_before = val_before[2]/(val_before[0] + val_before[1] + val_before[2] + val_before[3] + val_before[4]) * 100
print("%.2f" %water_before, "%")

8.99 %


In [None]:
# Calculating water percentage for after image
arr_after = np.unique(dbscan_after_labels, return_counts=True)
key_after, val_after = arr_after
water_after = val_after[2]/(val_after[0] + val_after[1] + val_after[2] + val_after[3]) * 100
print("%.2f" %water_after, "%")

45.51 %


In [None]:
# Calculating Flood Increase

flood_increase = water_after - water_before
print("%.2f" %flood_increase, "%")

36.52 %


In [24]:
# Calculating silhouette score for comparision

sklearn.metrics.silhouette_score(X_after, dbscan_after_labels)

0.36859936