Name : Lassalle Baptiste


E-mail : baptiste.lassalle@live.fr


# Useful Jupyter Notebook Shortcuts

Here are some helpful keyboard shortcuts for Jupyter Notebook:

- **M**: Switch to Markdown mode
- **Y**: Switch to Code mode
- **A**: Insert cell above
- **B**: Insert cell below
- **D, D**: (Press D twice) Delete selected cell
- **Shift + Enter**: Run the current cell and move to the next
- **Ctrl + Enter**: Run the current cell and stay on it
- **Shift + Tab**: Show function/method documentation
- **Ctrl + Shift + -**: Split cell at cursor
- **Esc**: Enter command mode (blue border)
- **Enter**: Enter edit mode (green border)


## default imports

In [1]:
# import for internal use
from urllib.error import URLError
from tensorflow.data import Dataset
import os
from pathlib import Path
import tensorflow as tf
from keras.utils import image_dataset_from_directory, get_file

In [2]:
# general imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# data analysis import
from skimage import io, img_as_float
from scipy import stats
from skimage.metrics import peak_signal_noise_ratio as psnr
from skimage.metrics import structural_similarity as ssim

In [4]:
# machine learning imports
from sklearn.model_selection import train_test_split
from keras.layers import Input, Dense, Conv1D, Conv2D, UpSampling2D, MaxPooling2D, AveragePooling2D, Rescaling, Activation, Add, Flatten, Reshape
from keras.models import Sequential, Model
from keras import optimizers, regularizers
from sklearn.metrics import r2_score, mean_absolute_percentage_error



## internal functions

In [5]:
from typing import Union
import tensorflow as tf

def transpose_image_grayscale(image: tf.Tensor) -> tf.Tensor:
    """
    Transpose a grayscale image from portrait to landscape orientation if necessary.

    This function checks the dimensions of the input image and transposes it if
    the height (first dimension) is less than the width (second dimension).
    The transposition ensures the image is always in landscape orientation.

    Parameters
    ----------
    image : tf.Tensor
        A 3D tensor representing a grayscale image with shape (height, width, 1).
        The last dimension represents the single color channel.

    Returns
    -------
    tf.Tensor
        A 3D tensor with the same content as the input image, but guaranteed to be
        in landscape orientation (width >= height).

    Notes
    -----
    The function assumes the input is a valid TensorFlow tensor representing
    a grayscale image with three dimensions (height, width, channel).

    Examples
    --------
    >>> import tensorflow as tf
    >>> # Create a portrait grayscale image (5x3x1)
    >>> portrait_image = tf.random.normal([5, 3, 1])
    >>> landscape_result = transpose_image_grayscale(portrait_image)
    >>> print(tf.shape(landscape_result).numpy())
    [3 5 1]

    >>> # Create a landscape grayscale image (3x5x1)
    >>> landscape_image = tf.random.normal([3, 5, 1])
    >>> result = transpose_image_grayscale(landscape_image)
    >>> print(tf.shape(result).numpy())
    [3 5 1]

    See Also
    --------
    tf.transpose : The underlying TensorFlow operation used for transposition.

    Raises
    ------
    ValueError
        If the input tensor does not have exactly 3 dimensions.
    """
    # Check if image is in portrait mode
    if tf.shape(image)[0] < tf.shape(image)[1]:
        # Transpose to landscape mode
        return tf.transpose(image, perm=[1, 0, 2])  # No need for channel dim in grayscale
    else:
        # If it's already landscape, return the image as is
        return image

## Public Functions

In [7]:
def load_and_create_datasets(
    url: str,
    fname: str,
    batch_size: int = 16,
    image_size: tuple[int, int] = (480, 320)
) -> tuple[Dataset, Dataset]:
    """
    Load and prepare image datasets for denoising tasks.

    This function downloads a dataset, extracts it, and creates TensorFlow datasets
    for training and testing. It handles both original (clean) and noisy images,
    applying necessary preprocessing steps including transposition and normalization.

    Parameters
    ----------
    url : str
        URL or local path to the dataset zip file.
    fname : str
        Filename to save the downloaded dataset.
    batch_size : int, optional
        Batch size for the datasets, by default 16.
    image_size : tuple of int, optional
        Target size for the images (height, width), by default (480, 320).

    Returns
    -------
    Tuple[Dataset, Dataset]
        A tuple containing:
        - dataset_train: A TensorFlow Dataset containing pairs of (noisy, original) images for training
        - dataset_test: A TensorFlow Dataset containing pairs of (noisy, original) images for testing

    Notes
    -----
    The function performs several steps:
    1. Downloads and extracts the dataset
    2. Creates separate datasets for original and noisy images, both for training and testing
    3. Applies image transposition to ensure consistent orientation
    4. Normalizes pixel values from [0, 255] to [0, 1]

    The expected dataset structure after extraction is:
    - BSD400/original/ (training original images)
    - BSD400/noise_gaussian_15/ (training noisy images)
    - BSD68/original/ (testing original images)
    - BSD68/noise_gaussian_15/ (testing noisy images)

    Examples
    --------
    >>> url = "https://example.com/dataset.zip"
    >>> fname = "denoising_dataset.zip"
    >>> train_ds, test_ds = load_and_create_datasets(url, fname)
    >>> for noisy_batch, clean_batch in train_ds.take(1):
    ...     print(f"Batch shape: {noisy_batch.shape}")

    Raises
    ------
    Exception
        If the URL download fails, it attempts to use the URL as a local filename.

    Notes
    -----
    - All images are loaded in grayscale mode
    - Images are automatically transposed if needed using the transpose_image_grayscale function
    - Pixel values are rescaled from 0-255 to 0-1
    - The function uses a specific file hash for verification

    See Also
    --------
    transpose_image_grayscale : Function used for ensuring consistent image orientation
    tensorflow.keras.utils.get_file : Used for downloading and extracting the dataset
    """
    image_size = tuple(image_size[0:2])
    dset_name = Path(fname).stem

    FileHash = r"d68e70fa5e9ba1ae4d36dd40a0095f1e1bb49d6574c0372eb5079c636fce0651"

    try:
        dset_download = get_file(fname=fname,
            origin=url,
            extract=True,
            file_hash=FileHash,
            archive_format="zip",
            force_download=False
        )
    except Exception:
            print("URL download failed, try using DATASET_URL as a local filename")
            dset_download = get_file(fname=fname,
                                          origin  = "file:\\"+url,
            extract=True,
            file_hash=FileHash,
            archive_format="zip",
            force_download=False
        )


    # Step 2: Get the path of the extracted directory
    dataset_dir = os.path.dirname(dset_download)
    image_dataset_from_directory_common_args = {
        "label_mode":None,
        "class_names":None,
        "color_mode":"grayscale",
        "batch_size":batch_size,
        "image_size":image_size,
        "shuffle":False,
        "seed":42,
        "validation_split":None,
        "subset":None,
        "interpolation":"bilinear",
        "follow_links":False,
        "crop_to_aspect_ratio":False,
        "pad_to_aspect_ratio":False,
        "data_format":None,
        "verbose":False,
    }

    train_images_original = image_dataset_from_directory(
        Path(dataset_dir).joinpath(dset_name, "BSD400","original"),
        **image_dataset_from_directory_common_args
    )

    train_images_noisy = image_dataset_from_directory(
        Path(dataset_dir).joinpath(dset_name, "BSD400","noise_gaussian_15"),
        **image_dataset_from_directory_common_args
    )

    test_images_original = image_dataset_from_directory(
        Path(dataset_dir).joinpath(dset_name, "BSD68", "original"),
        **image_dataset_from_directory_common_args
    )

    test_images_noisy = image_dataset_from_directory(
        Path(dataset_dir).joinpath(dset_name, "BSD68", "noise_gaussian_15"),
        **image_dataset_from_directory_common_args
    )



    # Apply the transpose function for grayscale images
    train_images_original = train_images_original.map(lambda x: tf.map_fn(transpose_image_grayscale, x))
    test_images_original = test_images_original.map(lambda x: tf.map_fn(transpose_image_grayscale, x))
    train_images_noisy = train_images_noisy.map(lambda x: tf.map_fn(transpose_image_grayscale, x))
    test_images_noisy = test_images_noisy.map(lambda x: tf.map_fn(transpose_image_grayscale, x))


    rescale = Rescaling(1.0 / 255)

    # Apply the Rescaling layer for normalization to both datasets
    train_images_original = train_images_original.map(lambda x: rescale(x))
    test_images_original = test_images_original.map(lambda x: rescale(x))
    train_images_noisy = train_images_noisy.map(lambda x: rescale(x))
    test_images_noisy = test_images_noisy.map(lambda x: rescale(x))


    dataset_train = Dataset.zip((train_images_noisy, train_images_original))
    dataset_test = Dataset.zip((test_images_noisy, test_images_original))
    print("Datasets for train and test, created. Please note that pixels values have been rescale from 0->255 to 0->1")
    return dataset_train, dataset_test

In [8]:
def dataset_to_list(dataset):
    flat_images_noisy = []
    flat_images_original = []

    for noisy_batch, original_batch in dataset:
        # Convert to numpy and flatten the batch dimension
        noisy_images = noisy_batch.numpy()
        original_images = original_batch.numpy()

        # Extend our lists with the flattened batches
        flat_images_noisy.extend(noisy_images)
        flat_images_original.extend(original_images)

    return tuple([x.squeeze() for x in flat_images_noisy]), tuple([x.squeeze() for x in flat_images_original])

In [9]:
   def fourier_denoise(image, threshold=0.1):
       # Compute the 2D FFT of the image
       f = np.fft.fft2(image)

       # Shift the zero-frequency component to the center
       f_shift = np.fft.fftshift(f)

       # Create a mask based on the threshold
       mask = np.abs(f_shift) > threshold * np.max(np.abs(f_shift))

       # Apply the mask
       f_shift_filtered = f_shift * mask

       # Shift back
       f_filtered = np.fft.ifftshift(f_shift_filtered)

       # Compute the inverse 2D FFT
       denoised = np.real(np.fft.ifft2(f_filtered))

       return np.clip(denoised, 0., 1.), f_shift, f_shift_filtered

## global variables

In [10]:
DATASET_URL = "https://amubox.univ-amu.fr/s/TP3mLFKikYdxt7o/download/dataset_bsd400_68.zip"
DATASET_FNAME = "dataset_bsd400_68.zip"
# images downsampled to reduce memory usage during training
IMAGE_SHAPE = (240, 160, 1) # last value correspond to the number of channels : 1 for grayscale, 3 for rgb

# Dataset Loading

In [11]:
dataset_train, dataset_test = load_and_create_datasets(DATASET_URL, DATASET_FNAME, image_size=IMAGE_SHAPE)

Downloading data from https://amubox.univ-amu.fr/s/TP3mLFKikYdxt7o/download/dataset_bsd400_68.zip
[1m182766331/182766331[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 0us/step
Datasets for train and test, created. Please note that pixels values have been rescale from 0->255 to 0->1


In [12]:
images_noise_test, images_ref_test = dataset_to_list(dataset_test)
images_noise_train, images_ref_train = dataset_to_list(dataset_train)

## Dataset Analysis

## Perceptron Autoencoder

## CNN Autoencoder

## Conclustions