<a href="https://colab.research.google.com/github/Mh-Osman/flood_predict/blob/main/flood.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
import zipfile
import os

# Mount Google Drive
drive.mount('/content/drive')

# Path to ZIP file (update if you placed it elsewhere)
zip_path = "/content/drive/MyDrive/SEN12FLOOD.zip"  # or wherever you've saved it

# Extract ZIP to /content directory in Colab
extract_path = "/content/SEN12FLOOD"
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Dataset extracted successfully.")


Mounted at /content/drive
Dataset extracted successfully.


In [None]:
!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m59.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.3


In [None]:
import os
import cv2
import json
import rasterio
import shutil
import subprocess

# ==== CONFIG ====
ROOT_DIR = '/content/SEN12FLOOD/SEN12FLOOD'
OUTPUT_DIR = '/content/processed_data'
FLOOD_DIR = os.path.join(OUTPUT_DIR, 'flood')
NON_FLOOD_DIR = os.path.join(OUTPUT_DIR, 'non_flood')

# Create output folders if they don't exist
os.makedirs(FLOOD_DIR, exist_ok=True)
os.makedirs(NON_FLOOD_DIR, exist_ok=True)

# ==== FUNCTIONS ====

def check_empty_img(folder_path):
    try:
        for file in os.listdir(folder_path):
            if file.endswith('B01.tif'):
                image_path = os.path.join(folder_path, file)
                image = cv2.imread(image_path, 0)
                if image is None or cv2.countNonZero(image) == 0:
                    return True
                return False
        return True
    except Exception as e:
        print(f"Error checking image in {folder_path}: {e}")
        return True

def find_band_file(folder_path, band_suffix):
    for file in os.listdir(folder_path):
        if file.endswith(band_suffix):
            return os.path.join(folder_path, file)
    return None

def stack_bands(folder_path, output_path):
    band_suffixes = ['B02.tif', 'B03.tif', 'B04.tif', 'B08.tif']
    band_paths = []

    try:
        for suffix in band_suffixes:
            path = find_band_file(folder_path, suffix)
            if path is None:
                raise FileNotFoundError(f"Missing band: {suffix}")
            band_paths.append(path)

        with rasterio.open(band_paths[0]) as src0:
            meta = src0.meta
        meta.update(count=len(band_paths))

        with rasterio.open(output_path, 'w', **meta) as dst:
            for idx, band_path in enumerate(band_paths, start=1):
                with rasterio.open(band_path) as src:
                    dst.write_band(idx, src.read(1))

    except Exception as e:
        print(f"Error stacking bands in {folder_path}: {e}")
        return False
    return True

def product_name(folder_path):
    return os.path.basename(folder_path)

def remove_empty_folders(folder_path):
    print(f"Removing folder: {folder_path}")
    subprocess.run(['rm', '-rf', folder_path])

def label(folder_name, s1_dict, s2_dict):
    # Check S1 data
    if folder_name in s1_dict:
        for k, v in s1_dict[folder_name].items():
            if isinstance(v, dict) and v.get("FLOODING") == True:
                return 1  # flood

    # Check S2 data
    if folder_name in s2_dict:
        for k, v in s2_dict[folder_name].items():
            if isinstance(v, dict) and v.get("FLOODING") == True:
                return 1  # flood

    return 0  # non-flood

# ==== MAIN PROCESSING ====
if __name__ == "__main__":
    # Load flood labels
    with open(os.path.join(ROOT_DIR, 'S1list.json')) as f:
        s1_dict = json.load(f)

    with open(os.path.join(ROOT_DIR, 'S2list.json')) as f:
        s2_dict = json.load(f)

    # List all subfolders (samples)
    flist = [os.path.join(ROOT_DIR, f) for f in os.listdir(ROOT_DIR) if os.path.isdir(os.path.join(ROOT_DIR, f))]
    print(f"Total folders found: {len(flist)}")

    for folder_path in flist:
        folder_name = os.path.basename(folder_path)

        if check_empty_img(folder_path):
            print(f"Empty or invalid folder: {folder_path}")
            remove_empty_folders(folder_path)
            continue

        is_flood = label(folder_name, s1_dict, s2_dict)
        output_file = os.path.join(FLOOD_DIR if is_flood else NON_FLOOD_DIR, f"{folder_name}.tif")

        success = stack_bands(folder_path, output_file)
        if success:
            print(f"Saved: {output_file}")
        else:
            print(f"Failed to process: {folder_path}")
            remove_empty_folders(folder_path)

    print("Preprocessing completed.")


Total folders found: 335
Saved: /content/processed_data/non_flood/0105.tif
Saved: /content/processed_data/non_flood/0071.tif
Saved: /content/processed_data/flood/0165.tif
Saved: /content/processed_data/flood/0149.tif
Saved: /content/processed_data/flood/0156.tif
Saved: /content/processed_data/flood/0139.tif
Saved: /content/processed_data/flood/5.tif
Empty or invalid folder: /content/SEN12FLOOD/SEN12FLOOD/0146
Removing folder: /content/SEN12FLOOD/SEN12FLOOD/0146
Saved: /content/processed_data/flood/0200.tif
Empty or invalid folder: /content/SEN12FLOOD/SEN12FLOOD/60
Removing folder: /content/SEN12FLOOD/SEN12FLOOD/60
Saved: /content/processed_data/flood/0223.tif
Saved: /content/processed_data/non_flood/0086.tif
Saved: /content/processed_data/flood/18.tif
Saved: /content/processed_data/non_flood/0122.tif
Saved: /content/processed_data/flood/34.tif
Empty or invalid folder: /content/SEN12FLOOD/SEN12FLOOD/0005
Removing folder: /content/SEN12FLOOD/SEN12FLOOD/0005
Saved: /content/processed_data

Total files: 93


In [None]:
pip install rasterio tensorflow



In [None]:
import os
import rasterio
import numpy as np
import tensorflow as tf

def load_tif(path):
    path = path.decode('utf-8')  # <-- decode from bytes to str
    with rasterio.open(path) as src:
        image = src.read().astype(np.float32) / 10000.0
        image = np.transpose(image, (1, 2, 0))  # (H, W, C)
    return image


def get_image_paths_labels(base_dir):
    image_paths = []
    labels = []

    flood_dir = os.path.join(base_dir, 'flood')
    non_flood_dir = os.path.join(base_dir, 'non_flood')

    for filename in os.listdir(flood_dir):
        if filename.endswith('.tif'):
            image_paths.append(os.path.join(flood_dir, filename))
            labels.append(1)

    for filename in os.listdir(non_flood_dir):
        if filename.endswith('.tif'):
            image_paths.append(os.path.join(non_flood_dir, filename))
            labels.append(0)

    return image_paths, labels

In [None]:
def preprocess(path, label):
    image = tf.numpy_function(load_tif, [path], tf.float32)
    image.set_shape([None, None, 4])  # Sentinel-2 has 4 bands
    image = tf.image.resize(image, [128, 128])  # Resize to fixed size
    return image, tf.cast(label, tf.int32)

def create_tf_dataset(image_paths, labels, batch_size=16):
    path_ds = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    ds = path_ds.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.shuffle(100).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds


In [None]:
base_dir = '/content/processed_data'
image_paths, labels = get_image_paths_labels(base_dir)

# Split 80-20
from sklearn.model_selection import train_test_split
# First split into train and temp (80% train, 20% temp)
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
    image_paths, labels, test_size=0.4, random_state=42)

# Split temp into val and test (each 20%)
val_paths, test_paths, val_labels, test_labels = train_test_split(
    temp_paths, temp_labels, test_size=0.5, random_state=42)

# Create datasets
train_ds = create_tf_dataset(train_paths, train_labels)
val_ds   = create_tf_dataset(val_paths, val_labels)
test_ds  = create_tf_dataset(test_paths, test_labels)

In [None]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(128, 128, 4)),
    layers.Conv2D(32, 3, activation='relu'),
    layers.MaxPooling2D(),

    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),

    layers.Conv2D(128, 3, activation='relu'),
    layers.GlobalAveragePooling2D(),

    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

In [None]:
model.fit(train_ds, validation_data=val_ds, epochs=10)


Epoch 1/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 304ms/step - accuracy: 0.7022 - loss: 0.6741 - val_accuracy: 0.6250 - val_loss: 0.6566
Epoch 2/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 101ms/step - accuracy: 0.6607 - loss: 0.6279 - val_accuracy: 0.6250 - val_loss: 0.6534
Epoch 3/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 146ms/step - accuracy: 0.6532 - loss: 0.6025 - val_accuracy: 0.6250 - val_loss: 0.6647
Epoch 4/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 127ms/step - accuracy: 0.6910 - loss: 0.5522 - val_accuracy: 0.6250 - val_loss: 0.6755
Epoch 5/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 110ms/step - accuracy: 0.6549 - loss: 0.5389 - val_accuracy: 0.6250 - val_loss: 0.7033
Epoch 6/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 105ms/step - accuracy: 0.7470 - loss: 0.4948 - val_accuracy: 0.6786 - val_loss: 0.6903
Epoch 7/10
[1m11/11[0m [3

<keras.src.callbacks.history.History at 0x7fc61691a790>

In [None]:
model.save("/content/flood_model.h5")




In [None]:
loss, accuracy = model.evaluate(test_ds)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8036 - loss: 0.3441  
Test Accuracy: 82.14%


In [None]:
for image, label in test_ds.take(1):
    prediction = model.predict(image)
    print("Predicted:", prediction.squeeze().round())
    print("Actual:   ", label.numpy())


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Predicted: [1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1.]
Actual:    [0 0 1 0 1 1 0 1 1 1 0 0 1 1 1 0]
