<h1>Automated Flood Detection based on Satellite Images</h1>
<br>
This project focuses on flood detection using only the SEN2 data from the SEN12-FLOOD dataset. The goal is to process Sentinel-2 satellite images, specifically four spectral bands: Band2 (blue), Band3 (green), Band4 (red), and Band8 (infrared). These bands are important for distinguishing between land, water, and other surface features. The process involves organizing the data, checking for empty images, and stacking the bands together for further analysis, which will be used for flood detection tasks.
<br>
<hr>
Dataset:<br>
Clément Rambour, Nicolas Audebert, Elise Koeniguer, Bertrand Le Saux, Michel Crucianu, Mihai Datcu. (2020). SEN12-FLOOD : a SAR and Multispectral Dataset for Flood Detection . IEEE Dataport. https://dx.doi.org/10.21227/w6xz-s898

Download (after free registration) - 12,2Gb: https://ieee-dataport.org/open-access/sen12-flood-sar-and-multispectral-dataset-flood-detection


<h2>Directory Setup</h2>

In [3]:
import rasterio
import pandas as pd
import os
import shutil
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

In [14]:
def check_empty_img(url):
    
    path = os.path.join(url, 'B02.tif')

# Load the image in grayscale mode (0)
    image = cv2.imread(path,0)
  
 # Return True if all pixels are zero (empty image), otherwise False
    if (cv2.countNonZero(image) == 0):
        return  True
    else:
        return  False
  

In [15]:
def remove_folders(path):
    # Delete the folder and all its contents
    shutil.rmtree(path)
    return

In [16]:
def create_dir_structure(flist):
    for folder in flist:
        prefix = folder[:11]    # Extract prefix from folder name
        id = folder[11:]        # Extract id from folder name

        if len(id) > 4: continue # Skip if id length is more than 4, because the id can be at most 4 digits

        for file in os.listdir(folder):

            if file.startswith('S1'): 
                os.remove(os.path.join(prefix + id, file)) # Remove files starting with 'S1'
                continue
            # If the file is a spectral band (B02, B03, B04, B08)
            if file.endswith('B02.tif') or file.endswith('B03.tif') or file.endswith('B04.tif') or file.endswith('B08.tif'):
                date = file[3:13] # Extract date from file name
                newFolder =  os.path.join(prefix,'S2_' + id + '_'+ date) # Create new folder name

                if os.path.isdir(newFolder):
                    shutil.move(os.path.join(folder, file), os.path.join(newFolder, file[14:])) # Move file if folder exists
                else:
                    os.mkdir(newFolder)     # Create folder if it doesn't exist
                    shutil.move(os.path.join(folder, file), os.path.join(newFolder, file[14:]))  # Move file

        remove_folders(folder) # Remove the original folder after moving files


In [17]:
def stack_bands(path):

    band_list = ['B02.tif', 'B03.tif', 'B04.tif', 'B08.tif']
    try:
        
        # Read metadata from the first band (B02.tif) 
        with rasterio.open(os.path.join(path, band_list[0])) as src0:
            meta = src0.meta # Extract metadata from the first band
        
        # Update metadata to reflect the number of bands (layers) 
        meta.update(count = len(band_list))

       # Create a new stack file and write each band to it
        with rasterio.open(os.path.join(path, 'stack.tif'), 'w', **meta) as dst:
            for id, layer in enumerate(band_list, start=1):
                with rasterio.open(os.path.join(path, layer)) as src1:
                    dst.write_band(id, src1.read(1)) # Write each band to the stack file
    except:
        print("Folder with no Data") # Handle cases where the folder has no valid data 
        remove_folders(path) # Remove the folder if there's an issue
        
        pass # Continue without stopping on error 

    

In [None]:
# Create a list of all folders containing spectral bands


flist = []

rootdir = os.path.normpath('./SEN12FLOOD')  # Get the absolute path for the root directory
#rootdir = './SEN12FLOOD' 
for file in os.listdir(rootdir):
    d = os.path.join(rootdir, file) # Create the full path for each file/folder
    if os.path.isdir(d):    # Check if it's a folder
        flist.append(d)     # Add folder to the list
        
# Print the total number of folders found        
print(f"The number of folders are currently = {len(flist)}")

In [19]:
# Call the function to create the directory structure for the initial folder list (can run up to 1-2 min)
create_dir_structure(flist)


In [None]:
# Create a new list of folders that start with 'S2_'

flist = []
#rootdir = './SEN12FLOOD'
rootdir = os.path.normpath('./SEN12FLOOD')
for file in os.listdir(rootdir):
     if 'S2_' in file:
        d = os.path.join(rootdir, file)
        if os.path.isdir(d):
            flist.append(d)
        
        
print(f"The number of folders are currently = {len(flist)}")


In [None]:
# Iterate through all folders and create a new image with 4 spectral bands:
# Band2 (blue), Band3 (green), Band4 (red), and Band8 (infrared)
#This process can take up to 2-5 minutes to complete


for folder_path in flist:
    empty = check_empty_img(folder_path) # Check if the folder contains empty images (all pixels are zero)
    if empty:
        
        print("The images inside the current folder are empty - zero")
        remove_folders(folder_path) # Remove the folder if images are empty 
    else:
        stack_bands(folder_path) # Stack the spectral bands if images are valid

    
    

<h2>JSON Metadata Processing</h2>

In [None]:
import json
import os

# Step 2: Open and load the JSON file
with open('./SEN12FLOOD/S2list.json', 'r') as file:
    data = json.load(file)

print( data)



In [None]:
for location_id, location_data in data.items():
    for location_date, date_data in location_data.items():
        if location_date == 'geo' or location_date =='count' or location_date =='folder': continue
        folder = os.path.join(rootdir, 'S2_' + location_id + '_' + date_data['date'])
        #print (folder, date_data['FLOODING'])
        file = os.path.join(folder, 'flooding.txt')
        print(file)


        if os.path.isdir(folder):
            with open(file, 'w') as f:
                f.write(f"{date_data['FLOODING']}\n")


<h2>Data Augmentation</h2>

In [None]:
import os
import numpy as np
import torch
from torchvision import transforms
import tifffile as tiff  # TIFF fájlok betöltéséhez és kezeléséhez

In [None]:
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),  # 15 fokon belüli véletlenszerű forgatás
])

In [None]:
def augment_images_in_directory(input_dir):
    # Fájlok bejárása a megadott mappában
    for root, dirs, files in os.walk(input_dir):
        for file in files:
            if file == 'stack.tif':  # Csak a 'stack.tif' képekre fókuszálunk
                file_path = os.path.join(root, file)
                
                # TIFF kép betöltése 4 csatornával (HxWxC)
                img = tiff.imread(file_path)
                
                # Kép tensorrá alakítása és átrendezése (CxHxW formátum PyTorch számára)
                img_tensor = torch.from_numpy(img).permute(2, 0, 1).float()
                
                # Augmentáció alkalmazása
                img_transformed = train_transform(img_tensor)
                
                # Kép visszaalakítása (HxWxC formátumba numpy-hoz)
                img_transformed = img_transformed.permute(1, 2, 0).numpy().astype(np.uint16)
                
                # Augmentált kép mentése új névvel: 'astack.tif'
                save_path = os.path.join(root, 'astack.tif')
                tiff.imwrite(save_path, img_transformed)
                
                print(f"Kép augmentálva és mentve: {save_path}")

# Fő futtatás
input_directory = './SEN12FLOOD'  # A gyökérmappa, ahol a mappák találhatóak
augment_images_in_directory(input_directory)

<h2>Data loading</h2>

In [28]:
def getLabel(path):
    filepath = os.path.normpath(os.path.join(path, 'flooding.txt'))
    with open(filepath, 'r') as file:
        data = file.readline().strip()
        if(data =="False"):
            return 0
        else:
            return 1

In [None]:
flist = []
rootdir = os.path.normpath('./SEN12FLOOD')
for file in os.listdir(rootdir):
     if 'S2_' in file:
        d = os.path.join(rootdir, file)
        if os.path.isdir(d):
            flist.append(d)
        
        
print(f"The number of folders are currently = {len(flist)}")

In [None]:
#Filename to load
fileName = "astack.tif"

In [26]:
def load_data():

    images = []
    labels = []

    for folder in flist:
        imagePath = os.path.join(folder, fileName)
        image = cv2.imread(imagePath)
        images.append(image)

        label = getLabel(folder)
        labels.append(label)

    images = np.array(images, dtype = 'float32')
    labels = np.array(labels, dtype = 'int32')

    return images, labels
    

In [43]:
images, labels = load_data()

In [None]:
images.shape

In [42]:
X_train, X_test, y_train, y_test = train_test_split(
    images, labels, test_size=0.2, random_state=42, stratify=labels
)

<h2>Data Analysis</h2>

In [None]:
(unique, counts) = np.unique(labels, return_counts=True)

print(f"The number of images in the train dataset containing flooded areas is {counts[1]}\n")
print(f"While the number of images clean from floods is {counts[0]}\n")
percentage = round(counts[1] / (counts[0] + counts[1]) * 100, 3)
print(f"That makes the percentage of flooded areas " + str(percentage) + "%")


<h2>Model Definition</h2>

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
import pydot

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (512, 512, 3)), 
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(2, activation=tf.nn.softmax)
])

In [None]:
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

<h2>Training</h2>

In [None]:
history = model.fit( X_train, y_train, batch_size=28, epochs=20, validation_split = 0.2)

<h2>Evaluation</h2>

In [None]:
def plot_accuracy_loss(history):
    """
        Plot the accuracy and the loss during the training of the nn.
    """
    fig = plt.figure(figsize=(10,5))

    # Plot accuracy
    plt.subplot(221)
    plt.plot(history.history['acc'],'bo--', label = "acc")
    plt.plot(history.history['val_acc'], 'ro--', label = "val_acc")
    plt.title("train_acc vs val_acc")
    plt.ylabel("accuracy")
    plt.xlabel("epochs")
    plt.legend()

    # Plot loss function
    plt.subplot(222)
    plt.plot(history.history['loss'],'bo--', label = "loss")
    plt.plot(history.history['val_loss'], 'ro--', label = "val_loss")
    plt.title("train_loss vs val_loss")
    plt.ylabel("loss")
    plt.xlabel("epochs")

    plt.legend()
    plt.show()