This notebook normalize the raster image data to 0-1 range.  

***Import necessary libraries***

In [1]:
import os
import numpy as np
import rasterio
import rasterio.features

Lets create the files variables

In [2]:
folder = "H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb"
list_bands = ['02', '03', '04', '05', '06', '07', '08', '8A', '11', '12']

Lets create the function that normalize the data based on this :  
```txt
For each band separately, normalize data to the interval [0, 1]:
```
```python
norm_data = np.clip( (data - min_val) / (max_val - min_val), 0, 1)
```
```txt
where min_val and max_val correspond respectively to the 2% and 98% percentile on the data. Any values outside the range [min_val, max_val] (eventual outliers) are clipped to 0 or 1.
```

In [3]:
def normalize_and_save_band_acquisition(band, output_folder):
    with rasterio.open(band) as src:
        normalized_acquisitions = []
        for acquisition in range(1, src.count + 1):
            print(f"Acquisition {acquisition}/{src.count}")
            data_array = src.read(acquisition)
            min_val = (np.percentile(data_array, 2))
            max_val = (np.percentile(data_array, 98))
            norm_band = np.clip((data_array - min_val) / (max_val - min_val), 0, 1).astype('float32')
            print(f"Min: {min_val}, Max: {max_val}, Mean: {np.mean(norm_band)}")

            # Append the normalized acquisition to the list
            normalized_acquisitions.append(norm_band)

        # Stack the acquisitions along a new dimension (time)
        normalized_band = np.stack(normalized_acquisitions, axis=0)

        # Define the output file path for the normalized band
        output_path = os.path.join(output_folder, f'normalized_{os.path.basename(band)}.tif')

        # Create a new GeoTIFF file with a custom profile and write the normalized data
        profile = src.profile.copy()
        profile.update(count=src.count, dtype='float32')  # Set the count to the number of acquisitions
        with rasterio.open(output_path, 'w', **profile) as dst:
            dst.write(normalized_band)

In [4]:
def normalize_and_save_band_acquisition_uint16(band, output_folder):
    with rasterio.open(band) as src:
        normalized_acquisitions = []
        for acquisition in range(1, src.count + 1):
            print(f"Acquisition {acquisition}/{src.count}")
            data_array = src.read(acquisition)
            min_val = np.percentile(data_array, 2)
            max_val = np.percentile(data_array, 98)
            
            # Normalize the acquisition and scale back to uint16
            norm_band = np.clip((data_array - min_val) / (max_val - min_val), 0, 1) * 65535
            norm_band = norm_band.astype('uint16')
            
            print(f"Min: {min_val}, Max: {max_val}, Mean: {np.mean(norm_band)}")

            # Append the normalized acquisition to the list
            normalized_acquisitions.append(norm_band)

        # Stack the acquisitions along a new dimension (time)
        normalized_band = np.stack(normalized_acquisitions, axis=0)

        # Define the output file path for the normalized band
        output_path = os.path.join(output_folder, f'normalized_{os.path.basename(band)}.tif')

        # Create a new GeoTIFF file with a custom profile and write the normalized data
        profile = src.profile.copy()
        profile.update(count=src.count, dtype='uint16')  # Set the count to the number of acquisitions
        with rasterio.open(output_path, 'w', **profile) as dst:
            dst.write(normalized_band)


In [5]:
def normalize_and_save__all_band(band, output_folder):
    with rasterio.open(band) as src:
        # Read all acquisitions into a 3D NumPy array
        data_array = src.read()

        # Compute the min and max values over all acquisitions
        min_val = np.percentile(data_array, 2)
        max_val = np.percentile(data_array, 98)

        # Normalize the entire band
        norm_band = np.clip((data_array - min_val) / (max_val - min_val), 0, 1).astype('float32')
        print(f"Min: {min_val}, Max: {max_val}, Mean: {np.mean(norm_band)}")

        # Define the output file path for the normalized band
        output_path = os.path.join(output_folder, f'normalized_{os.path.basename(band)}.tif')

        # Create a new GeoTIFF file with a custom profile and write the normalized data
        profile = src.profile.copy()
        profile.update(count=1, dtype='float32')  # Set count to 1 since we have only one normalized band
        with rasterio.open(output_path, 'w', **profile) as dst:
            dst.write(norm_band)


In [3]:
def normalize_and_save_band(band, output_folder):
    with rasterio.open(band) as src:
        print("Start reading band")
        # Read all acquisitions into a 3D NumPy array
        data_array = src.read()

        # Compute the min and max values over all acquisitions
        min_val = np.percentile(data_array, 2)
        max_val = np.percentile(data_array, 98)
        print("Percentile calculated")
        # Del the variable to free up memory
        del data_array
        
        normalized_acquisitions = []
        for acquisition in range(1, src.count + 1):
            print(f"Bande {band} - Acquisition {acquisition}/{src.count}")
            data_array = src.read(acquisition)
            
            # Normalize the acquisition and scale back to uint16
            norm_band = np.clip((data_array - min_val) / (max_val - min_val), 0, 1) * 65535
            norm_band = norm_band.astype('uint16')
            
            print(f"Min: {min_val}, Max: {max_val}, Mean: {np.mean(norm_band)}")

            # Append the normalized acquisition to the list
            normalized_acquisitions.append(norm_band)
            
            # Del the variable to free up memory
            del data_array

        # Stack the acquisitions along a new dimension (time)
        normalized_band = np.stack(normalized_acquisitions, axis=0)

        # Define the output file path for the normalized band
        output_path = os.path.join(output_folder, f'normalized_{os.path.basename(band)}.tif')

        # Create a new GeoTIFF file with a custom profile and write the normalized data
        profile = src.profile.copy()
        profile.update(count=src.count, dtype='uint16')  # Set the count to the number of acquisitions
        with rasterio.open(output_path, 'w', **profile) as dst:
            dst.write(normalized_band)

In [4]:
def get_min_max_band(band, output_folder):
    with rasterio.open(band) as src:
        print(f"Start reading band {band}")

        # Read all acquisitions into a 3D NumPy array
        data_array = src.read()

        # Compute the min and max values over all acquisitions
        min_val = np.percentile(data_array, 2)
        max_val = np.percentile(data_array, 98)

        print("Percentile calculated")

        print(f"Min: {min_val}, Max: {max_val}, Mean: {np.mean(data_array)}")
        
        # write min max value to file
        output_path = os.path.join(output_folder, f'min_max_{os.path.basename(band)}.txt')
        with open(output_path, 'w') as f:
            f.write(f"Min: {min_val}, Max: {max_val}, Mean: {np.mean(data_array)}")

        return min_val, max_val

In [9]:
output_band_folder = os.path.join(folder, 'min_max_output')
for band in list_bands:
    print(f"Band {band}")
    band_file = os.path.join(folder, 's2_2020_B' + band + '.tif')
    get_min_max_band(band_file, output_band_folder)

Band 02
Start reading band H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B02.tif
Percentile calculated
Min: 329.0, Max: 1625.0, Mean: 815.658307728609
Band 03
Start reading band H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B03.tif
Percentile calculated
Min: 593.0, Max: 2002.0, Mean: 1131.2171614467027
Band 04
Start reading band H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B04.tif
Percentile calculated
Min: 413.0, Max: 2524.0, Mean: 1320.227935609443
Band 05
Start reading band H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B05.tif
Percentile calculated
Min: 999.0, Max: 2908.0, Mean: 1798.481426406503
Band 06
Start reading band H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B06.tif
Percentile calculated
Min: 1624.0, Max: 3705.0, Mean: 2567.803318466189
Band 07
Start reading band H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B07.tif
Pe

Create the output folder.

In [9]:
output_band_folder = os.path.join(folder, 'normalized_band_final_output')
os.makedirs(output_band_folder, exist_ok=True)

Now we normalize the data and save it in the output folder.  

In [11]:
for band in list_bands:
    print(f"Band {band}")
    band_file = os.path.join(folder, 's2_2020_B' + band + '.tif')
    normalize_and_save_band(band_file, output_band_folder)

Band 02
Start reading band
Percentile calculated
Bande H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B02.tif - Acquisition 1/73
Min: 329.0, Max: 1625.0, Mean: 24674.99314553335
Bande H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B02.tif - Acquisition 2/73
Min: 329.0, Max: 1625.0, Mean: 26838.710990553198
Bande H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B02.tif - Acquisition 3/73
Min: 329.0, Max: 1625.0, Mean: 26337.626332982185
Bande H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B02.tif - Acquisition 4/73
Min: 329.0, Max: 1625.0, Mean: 35316.613932336615
Bande H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B02.tif - Acquisition 5/73
Min: 329.0, Max: 1625.0, Mean: 31242.48091352189
Bande H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb\s2_2020_B02.tif - Acquisition 6/73
Min: 329.0, Max: 1625.0, Mean: 24329.924259366224
Bande H:/General/ExaplA

RasterioIOError: Read or write failed. H:/General/ExaplAInability_Data/transfer_6060512_files_e989f8bb/s2_2020_B02.tif, band 30: IReadBlock failed at X offset 0, Y offset 632: TIFFReadEncodedStrip() failed.

Once done, we have our normalized data in the output folder.