In [None]:
# Check if the code is running inside Google Colab
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

# Install gdown for downloading files from Google Drive
!pip install -q gdown
import os

if IN_COLAB:

    # Define the path where the repo should be cloned
    repo_path = "/content/TP_wildfire_segmentation_ETU"

    # Clone the GitHub repository if it hasn't been cloned yet
    if not os.path.exists(repo_path):
        !git clone https://github.com/ThomasLOUIS1/TP_wildfire_segmentation_ETU.git {repo_path}

    %cd /content/TP_wildfire_segmentation_ETU/

# Define the path to the dataset zip file inside the repo
dataset_zip_path = "data/dataset.zip"

# Download the dataset from Google Drive if it doesn't already exist
if not os.path.exists(dataset_zip_path):
    import gdown
    gdown.download(
        "https://drive.google.com/uc?id=1hDrmwxIVmBtMij2h5AL9mV_v1Hs_vZYk",  # <-- Your updated file ID
        dataset_zip_path,
        quiet=False
    )

# Define where the extracted dataset should be located
dataset_check_path = "data/RGB"

# Extract the dataset if it hasn't been extracted yet
if not os.path.exists(dataset_check_path):
    import zipfile
    with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
        zip_ref.extractall("data")



if IN_COLAB:
    %cd /content/TP_wildfire_segmentation_ETU/
    # Define the path to the extracted data directory
    data_dir = "/content/TP_wildfire_segmentation_ETU/data"
else:
    # Define the path to the extracted data directory
    data_dir = "data"



# Wildfire Segmentation with Multi-spectral Images - Bonus

### Try finding the best combination of loss/weight/epochs/architecture to get a test f1_score higher than 98%.

# 0.1 Get dataset files paths

Loading a segmentation dataset differ from loading a classification dataset with a tensorflow or Keras function. When using Keras for MNIST for example, images are stored in folder correspondign to a class. In folder 0, there are all images for the number 0.

Here, we will not load data from classes since data do not belong to any classes. The segmentation task will generate a mask (having the same size as the input image) and each pixel of the mask is a value 0 or 1 corresponding to the fire/notfire class.

In [None]:
# Imports
import os
import PIL
from PIL import Image
import tensorflow as tf
import numpy as np


######################################
# Complete the folowing code replacing "______" : 
######################################
# Dataset folder paths declaration
RGB_dir = data_dir + "/RGB"                    # Only for display purpose
triband_dir = data_dir + "/tribands"    # Data directory
mask_dir = data_dir + "/masks"          # Labels directory

# Define images size
img_size = (256, 256)

######################################
######################################

######################################
# What's the purpose of the following function ?
# Answer : Get all tif files sorted by name in a directory and return as a sorted list..

def sort_tif_paths_from_folder(dir):
    """
    Get all tif files sorted by name in a directory and return as a sorted list.
    
    Parameters:
    dir (str): directory path containing the tif files

    Returns:
    list: sorted list of tif file paths in the directory
    """
    paths_list = sorted(
    [
        os.path.join(dir, fname)
        for fname in os.listdir(dir)
        if fname.endswith(".tif")
    ]
    )
    return paths_list
######################################
######################################

# Get sorted list of tif files for RGB images
RGB_img_paths = sort_tif_paths_from_folder(RGB_dir)

######################################
# Complete the folowing code replacing "______" : 
######################################
# Get sorted list of tif files for triband training images
triband_img_paths_train =   sort_tif_paths_from_folder(triband_dir + "/train" )
# Get sorted list of tif files for triband validation images
triband_img_paths_val   =   sort_tif_paths_from_folder(triband_dir + "/val")
# Get sorted list of tif files for triband test images
triband_img_paths_test  =   sort_tif_paths_from_folder(triband_dir + "/test")

# Get sorted list of tif files for mask training images
mask_img_paths_train  =   sort_tif_paths_from_folder(mask_dir + "/train")
# Get sorted list of tif files for mask validation images
mask_img_paths_val    =   sort_tif_paths_from_folder(mask_dir + "/val")
# Get sorted list of tif files for mask test images
mask_img_paths_test   =   sort_tif_paths_from_folder(mask_dir + "/test")
######################################
######################################

######################################
# Complete the folowing code replacing "______" : 
# The idea is to compute the number of samples we have. 
# Tips : you have to sum the length of triband_img_paths_train, triband_img_paths_val and triband_img_paths_test arrays
######################################
print("Number of samples from {} : {}".format(triband_dir, len(triband_img_paths_train)+len(triband_img_paths_val)+len(triband_img_paths_test)))
print("Number of samples from {} : {}".format(mask_dir, len(mask_img_paths_train)+len(mask_img_paths_val)+len(mask_img_paths_test)))
######################################
######################################

# Print 6 firsts paths from RGB, biband and mask paths
# Note: Only works for the first 15 because we don't have many RGB images
for RGB_path, triband_path, mask_path in zip(RGB_img_paths[:6], triband_img_paths_train[:6], mask_img_paths_train[:6]):
    print(RGB_path, "|", triband_path, "|", mask_path)


# 0.2 Load dataset

In [None]:
batch_size = 16
######################################
# Complete the folowing code replacing "______" : 
######################################
def load_triband_and_mask_from_paths(paths = None):
    """
    Load the tri-bands and mask images data from the given paths and convert them into a tensorflow dataset.
    
    Parameters:
    paths (List): List of tuples of tri-bands and mask image paths
    
    Returns:
    data (tf.data.Dataset): A tensorflow dataset object with tri-bands and mask images data.
    """
    
    # Initialize arrays to store tri-bands image data and mask image data
    x = np.zeros((len(paths),) + img_size + (3,), dtype="float32")
    y = np.zeros((len(paths),) + img_size + (1,), dtype="float32")
    
    # Loop through the list of tri-bands and mask image paths
    for i, (triband_path, mask_path)  in enumerate(paths):
        
        # Read the tri-bands image file and normalize the data
        triband = np.array(Image.open(triband_path)) / 255.0
        x[i] = triband
        
        # Open the mask image file
        mask = np.array(Image.open(mask_path))
        # Add an extra dimension to the mask data for compatibility with the model
        mask = np.expand_dims(mask, 2) # Same as np.reshape(mask, (256,256,1))
        y[i] = mask
        
    # Create a tensorflow dataset from tri-bands and mask image data
    data = tf.data.Dataset.from_tensor_slices((x, y))
    
    # Batch the dataset and fetch the data in advance for faster processing
    data = data.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    
    return data
######################################
######################################

In [None]:
# Create tuples of (Tri-bands image path, mask image path) for train, val and test datasets
train_paths = list(zip(triband_img_paths_train, mask_img_paths_train))
val_paths = list(zip(triband_img_paths_val, mask_img_paths_val))
test_paths = list(zip(triband_img_paths_test, mask_img_paths_test))

######################################
# Complete the folowing code replacing "______" : 
######################################
# Load train, val and test datasets from the tuple of Tri-bands and mask image paths
train_ds =  load_triband_and_mask_from_paths(train_paths)
val_ds = load_triband_and_mask_from_paths(val_paths)
test_ds = load_triband_and_mask_from_paths(test_paths)
######################################
######################################

# 0.4 Declaration of all loss, metrics etc...

In [None]:
from metrics_and_losses import recall_m, precision_m, f1_m

from utils import predict, print_score, display_confusion_matrix, display_sample_prediction, load_masks

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, MaxPooling2D, Dropout, Flatten, Dense, UpSampling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K


def weighted_binary_crossentropy( y_true, y_pred) :
        y_true = K.clip(y_true, K.epsilon(), 1-K.epsilon())
        y_pred = K.clip(y_pred, K.epsilon(), 1-K.epsilon())
        logloss = -(y_true * K.log(y_pred) * ones_weight + (1 - y_true) * K.log(1 - y_pred) * zeros_weight )
        return K.mean( logloss, axis=-1)

# The Dice loss function we will use in the model.compile
def dice_loss(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return 1 - (2. * intersection + 1.) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1.)

def weighted_binary_crossentropy_and_dice(y_true, y_pred):
    return weighted_binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)


# 1.1 Build the model

Here you will find a very basic CNN.

In [None]:
model = None

model = Sequential()

model.add(Conv2D(4, (3, 3), activation='relu', padding='same', input_shape=(256, 256, 3)))
model.add(Conv2D(1, (3, 3), activation='sigmoid', padding='same'))

model.summary()

# 1.2 Train the model

Choose your hyperparameters

In [None]:
####
# If weighted_binary_crossentropy used
ones_weight = 26.0
zeros_weight = 1.0
####
loss = weighted_binary_crossentropy
learning_rate=0.01
batch_size = 16
epochs = 4

In [None]:
metrics = [tf.keras.metrics.BinaryAccuracy(), recall_m, precision_m, f1_m]

from tensorflow.keras.callbacks import EarlyStopping 

callback = EarlyStopping(monitor=f1_m, mode = 'max', patience=5)

model.compile(optimizer = Adam(learning_rate=learning_rate), metrics = metrics, loss = loss)

history = model.fit(train_ds, epochs = epochs, validation_data = val_ds, batch_size=batch_size, callbacks = [callback])

# 1.3 Evaluate the model 

In [None]:
model_score = model.evaluate(test_ds)
print_score(model_score)

# 1.4 Display predictions

In [None]:
# Diplay Tri-bands, mask and prediction of the model2
display_sample_prediction(model, triband_img_paths_test, mask_img_paths_test)

We can see it with a Confusion Matrix

In [None]:
test_gt_masks = load_masks(mask_img_paths_test, img_size=img_size)

display_confusion_matrix(predict(model, test_ds), test_gt_masks)

Print scores again ...

In [None]:
print_score(model_score)

# 1.5 Track your tests if you want !  

| test n°       |example| 1     | 2     | 3     | 4     | 5     | 6     | 7     | 8     | 9     | 10    | ...   | 
| ---           | ---   | ---   | ---   | ---   | ---   | ---   | ---   | ---   | ---   | ---   | ---   | ---   |
| f1_score      | 0.83  | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| recall        | 0.97  | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| precision     | 0.72  | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| epochs        | 4     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| batch_size    | 16    | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| learning rate | 0.01  | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| loss          | Dice  | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| class weight  | None  | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| n conv2d      | 3     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| n conv2dT     | 2     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |
| n params      | 1033  | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     | ?     |