<img src="https://radar.community.uaf.edu/wp-content/uploads/sites/667/2021/03/HydroSARbanner.jpg" width="100%" />

<br>
<font size="6"> <b>FIER Daily Flood Forecasting Code</b><img style="padding: 7px" src="https://radar.community.uaf.edu/wp-content/uploads/sites/667/2021/03/UAFLogo_A_647.png" width="170" align="right"/></font>

<br>
<font size="4"> <b> Franz J Meyer, University of Alaska Fairbanks</b> <br>
</font>

This notebooks is developing an algorithm to generate daily flood inundation predictions using time series of Sentinel-1 RTC data and GEOGLoWs river runoff forecasts. 
    
The workflow utilizes information available in the fierpy <a href="https://github.com/SERVIR/fierpy">fierpy</a> GitHub repository.
<hr>

# Load Python Libraries

In [None]:
from ipyfilechooser import FileChooser
import rioxarray as rxr
import xarray as xr
import fierpy
import glob
import pandas as pd
from pathlib import Path
from ipyfilechooser import FileChooser
import re
from fier_local import reof as freof
from fier_local import sel_best_fit
import opensarlab_lib as asfn
from osgeo import gdal, osr
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib.dates as mdates
import ipywidgets as widgets
from tensorflow import keras
import tensorflow as tf
from sklearn.model_selection import KFold
import sklearn.metrics as metrics
import warnings
import sys
import os

In [None]:
def get_dates(dir_path, prefix):
    dates = []
    pths = list(dir_path.glob(f'{prefix}.tif*'))

    for p in pths:
        date_regex = '\d{8}'
        date = re.search(date_regex, str(p))
        if date:
            dates.append(date.group(0))
    return dates

# Normalize the datasets
# Define the normalization function
def normalize(arr):
    # Calculate the maximum value in the slice
    max_val = np.max(arr)
    min_val = np.min(arr)
    # Normalize the slice by dividing each element by the maximum value
    normalized_arr = (arr - min_val)/(max_val - min_val)
    return normalized_arr


def append_all(dic, name, to_append):
        for key1, nested_level1 in dic.items():
            for key2, nested_level2 in nested_level1.items():
                nested_level2[f'{name}'].append(to_append)  


**Function to grab the centerpoint coordinates of the AOI**

In [None]:
def get_centerpoint_coordinates(tif_file):
    dataset = gdal.Open(str(tif_file))
    
    # Get the geospatial transform
    geotransform = dataset.GetGeoTransform()
    
    # Get the image size
    width = dataset.RasterXSize
    height = dataset.RasterYSize
    
    # Calculate the center pixel coordinates
    center_x = geotransform[0] + (geotransform[1] * width + geotransform[2]) / 2
    center_y = geotransform[3] + (geotransform[4] * height + geotransform[5]) / 2
    
    # Create a spatial reference object for EPSG:4326
    src_srs = osr.SpatialReference()
    src_srs.ImportFromEPSG(4326)
    
    # Create a spatial reference object for the TIF file
    dataset_srs = osr.SpatialReference()
    dataset_srs.ImportFromWkt(dataset.GetProjection())
    
    # Create a coordinate transformation object
    coord_transform = osr.CoordinateTransformation(dataset_srs, src_srs)
    
    # Transform the center point coordinates to EPSG:4326
    lon, lat, _ = coord_transform.TransformPoint(center_x, center_y)
    
    return lat, lon


**Function to calculate the fits between data, discharge and precipitations**

In [None]:
def find_best_fit_nn(var, reof_ds, dataset, models_neural):
    # Get rid of the annoying warning that we should use datasets for optimized keras operations
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    
    # Redirect standard output to a null device
    sys.stdout = open(os.devnull, "w")


    # Create a list that will host the best models
    models = []

    # Define the neural network architecture
    def create_model():
        model = keras.Sequential([
            keras.layers.Dense(64, activation='relu', input_shape=(1,)),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(1)
        ])
        model.compile(optimizer='adam', loss='mean_squared_error')
        return model

    # Create a dictionnary to store statistics of each loop
    fit_dict = dict()
    dict_keys = ['fit_r2','pred_r','pred_rmse']

    # Generate sample data
    hindcast_var = (var-var.min())/(var.max()-var.min())
    
    # Reconstructed dataset 
    recon_da = np.zeros((dataset.shape))

    # Loop through temporal modes to determine the regression between them and variable
    for mode in reof_ds.mode.values:

        hindcast_rtcp = (
            reof_ds.temporal_modes[:,mode-1] - reof_ds.temporal_modes[:,mode-1].min())/(reof_ds.temporal_modes[:,mode-1].max() - reof_ds.temporal_modes[:,mode-1].min())

        # Set up K-fold cross-validation
        k = 3
        kf = KFold(n_splits=k)

        # Perform K-fold cross-validation and evaluate the models
        best_model = None
        best_score = np.inf

        for train_index, val_index in kf.split(hindcast_var):
            X_train, X_test = hindcast_var[train_index], hindcast_var[val_index]
            y_train, y_test = hindcast_rtcp[train_index], hindcast_rtcp[val_index]

            model = create_model()
            model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)
            y_test_pred = model.predict(X_test)

            rmse = metrics.mean_squared_error(y_test, y_test_pred,squared=False)

            if rmse < best_score:
                best_score = rmse
                best_model = model

        # Train the best model on the entire dataset
        best_model.fit(hindcast_var, hindcast_rtcp, epochs=10, batch_size=32, verbose = 0)
        
        # Append the model in the list
        models.append(best_model)

        # Make predictions using the best model
        hindcast_rtcp_pred = best_model.predict(hindcast_var)

        # Convert to dataarray
        hindcast_rtcp_pred_dataarray = var.copy()
        hindcast_rtcp_pred_dataarray.values = hindcast_rtcp_pred[:, 0]

        # Add to the reconstructed dataset
        recon_da += hindcast_rtcp_pred_dataarray * reof_ds.spatial_modes[:,:,mode-1]

    # Calculate distance between dataset and modeled dataset
    rmse_dataset = np.sqrt(np.mean((normalize(recon_da) - normalize(dataset)) ** 2)).values
    
    # Restore standard output
    sys.stdout = sys.__stdout__

    return rmse_dataset, models


In [None]:
def synthesize_neural(var, model, reof_ds, dataset):
    
    # Make a copy of the template dataset
    da_slice = dataset.copy()
    # Reconstructed dataset 
    da_slice.values = np.zeros((var.time.shape[0], reof_ds.dims['lat'], reof_ds.dims['lon']))
    
    # Loop over the modes
    for m in range(4):
        
        # Make predictions using the best model
        forecast_rtcp = model[m].predict(var.values)
        
        # Convert to dataarray
        forecast_rtcp_dataarray = var.copy()
        forecast_rtcp_dataarray.values = forecast_rtcp[:, 0]

        # Add to the reconstructed dataset
        da_slice.values += forecast_rtcp_dataarray * reof_ds.spatial_modes[:,:,m]
        
        
    return da_slice

In [None]:
def looper(mm, type_file, pola, start_ind, metricranking, vv, fitting):
    
    
    if type_file == 'SAR':
        folder = 'RTC_GAMMA/'
        if pola == 'VH and VV':
            prefix = '*'
        else:
            prefix = f'*{pola}'
    else:
        folder = 'Water_Masks/'
        if pola == 'VH and VV':
            prefix = '*combined'
        else:
            prefix = f'*{pola}*'

    tiff_dir = Path(fc.selected)/folder
    tiffs = list(tiff_dir.glob(f'{prefix}.tif*'))
    
    times = get_dates(tiff_dir, prefix)
    times.sort()
    times = pd.DatetimeIndex(times)
    times.name = "time"
    
    da = xr.concat([rxr.open_rasterio(f) for f in tiffs], dim=times)

    # delete the extra data variable 'band'
    da = da.sel(band=1, drop=True)
    # rename autogenerated x,y as lon,lat 
    da = da.rename({'x': 'lon', 'y': 'lat'}).fillna(0)
    
    # Calculate the REOF of the dataset
    reof_ds = (freof(da[:start_ind], n_modes = 4)).fillna(0)
        
    if fitting == True:    
        # Calculate the fits
        for var in ['Q','ERA']:

            #### ---- Polynomial ---- ####
            # Calculate the fits of different polynomials concerning each main mode
            fits = fierpy.find_fits(reof_ds,mm[var]['Selected'],da[:start_ind])
            # Grab the best fitting mode and coefficients
            name,mode,coeffs = sel_best_fit(fits, metricranking[0], metricranking[1])

            if metricranking[0] == 'r2':
                suffix = 'fit'
            else:
                suffix = 'pred'

            # Put score of this coeff in the score list
            vv[var]['Polynomial']['Score'].append(fits[f"{'_'.join(name.split('_')[:2])}_{suffix}_{metricranking[0]}"])
            vv[var]['Polynomial']['Coeffs'].append(coeffs)
            vv[var]['Polynomial']['Modes'].append(mode)   


            #### ---- Neural Network Regression ---- ####

            rmse, models = find_best_fit_nn(mm[var]['Selected'], reof_ds, da[:start_ind], vv[var]['Neural']['Models'])
            vv[var]['Neural']['RMSE'].append(rmse)
            vv[var]['Neural']['Models'].append(models)
    
    
    return reof_ds, da, vv

**Function to create z-score flood maps from forecast and RTCs, and calculate their correspondance according to DeVries (2020)**

In [None]:
def z_score(dataset, variable, forecast_slice, hindcast_slice, nb_dry):

    # Create dataframe
    df = pd.DataFrame({'time': times[:start_ind], 'var': variable})

    # Extract the year and month from the time column
    df['year'] = df['time'].dt.year
    df['month'] = df['time'].dt.month

    # Group the DataFrame by year and month to calculate the monthly q_sel average
    monthly_avg = df.groupby(['year', 'month'])['var'].mean().reset_index()

    # Group the DataFrame by year and count the number of unique months
    month_counts = monthly_avg.groupby('year')['month'].nunique()

    # Get the years with at least nb_dry unique months
    valid_years = month_counts[month_counts >= nb_dry].index

    # Filter the DataFrame to include only the valid years
    filtered_df = df[df['year'].isin(valid_years)]

    # Group the filtered DataFrame by year and month to calculate the final monthly q_sel average
    monthly_avg_filtered = filtered_df.groupby(['year', 'month'])['var'].mean().reset_index()

    # Get the four months with the lowest average q_sel for each year
    top_months = monthly_avg_filtered.groupby('year')['var'].nsmallest(nb_dry).index.get_level_values(1)

    # Filter the DataFrame again to include only the top months
    filtered_df = filtered_df[filtered_df['month'].isin(top_months)]

    # Calculate the average and std of the baseline images of da that fall in the dry season indices
    # We convert to dB follow DeVries (2020)
    dataset2 = 10*np.log(dataset.where(dataset!=0, 1))
    average = dataset2.isel(time=filtered_df.index).mean()
    standard_deviation = dataset2.isel(time=filtered_df.index).std()

    # Calculate the z_score
    z_score_forecast = (10*np.log(forecast_slice) - average) /  standard_deviation
    z_score_hindcast = (10*np.log(hindcast_slice) - average) /  standard_deviation

    # Create flood matrices, where 0 is no flood and 1 is flood. Threshold is at -3 for flood pixels, following DeVries (2020)
    flood_forecast = np.zeros((forecast_slice.shape))
    flood_hindcast = np.zeros((hindcast_slice.shape))
    flood_forecast[z_score_forecast < -3] = 1
    flood_hindcast[z_score_hindcast < -3] = 1

    # Calculate the indices for scoring: a = true positive, b = false positive, c = false negative, d = true negative
    a = np.sum(np.logical_and(flood_forecast == 1, flood_hindcast == 1))
    b = np.sum(np.logical_and(flood_forecast == 1, flood_hindcast == 0))
    c = np.sum(np.logical_and(flood_forecast == 0, flood_hindcast == 1))
    d = np.sum(np.logical_and(flood_forecast == 0, flood_hindcast == 0))

    # Calculate the skills
    overall_accuracy = ((a + b) / (a + b + c + d)) * 100
    CSI = (a / (a + b + c)) * 100

    return overall_accuracy, CSI

<hr>

# Choose the folder of the area you want to work with (subfolder of "Dataset")

In [None]:
fc = FileChooser(Path.cwd())
display(fc)

#### **Instructions**

**1 - Generate the flood percentage figure**  

**2 - Select a date range (format 'YYYY-MM-DD')**

**2 - Choose the criteria on which you want to base the best fit selection (rmse, r2, r).**

- Choose between "rmse", "r", "r2"
- If you choose "r2", you have to write "max"
- "min" otherwise


## **Variables Description**

We can divide the variables of this notebook in multiple categories: 

**Level 1:**
- Variables to fit: Discharge and Precipitation
- Variables Generated

**Level 2:**
Only for the variables generated.
- Polynomial fit
- Neural Network fit

**Level 3:**
- Reof
- Training Data (stack of SAR or Water Mask images on which the fit is calculated, polarization VV, VH or both)
- Filetype (SAR or Water Mask)
- Polarization (VV, VH, VV & VH)
- Coefficients (for polynomial fit only, coefficients of the best fitting polynomial)
- Modes (for polynomial fit only, mode with the best fit to the variables to fit)
- Models (for neural network only, model with the best variables fit)
- Score (for polynomial only, to help decide which fit is the best)
- RMSE (score for neural network to see which model is the best)
- Reof_Forecast (reof of the images we forecasted)
- Reof_Hindcast (reof of the images we hindcast to compare with forecast)
- Hindcast (hindcast stack of images)
- Forecast (forecast stack of images)
- Best_Index (index of the best score)
- Difference (difference of Reofs of forecast and hindcast)

In [None]:
# List of available dataset
list_types = ['SAR','Water Mask']
# List of available polarizations
list_polarization = ['VH', 'VH and VV', 'VV']

# Create the folder for the figures
pathfig = Path(fc.selected)/'Figures'
pathfig.mkdir(exist_ok=True)

# Create a dictionnary to hold our many variables
vz = {
    key1: {
        key2: {
            sub_key: [] for sub_key in [
                'Reof', 'Training_Data', 'Filetype', 'Polarization',
                'Coeffs', 'Modes', 'Models', 'Score', 'RMSE', 'Reof_forecast',
                'Reof_hindcast', 'Hindcast', 'Forecast', 'Best_Index', 'Difference'
            ]
        }
        for key2 in ['Polynomial', 'Neural']
    }
    for key1 in ['Q', 'ERA']
}



# Load the time template, every file combination has the same
tiff_dir = Path(fc.selected)/'RTC_GAMMA'
tiffs = list(tiff_dir.glob(f'*VV.tif*'))

times = get_dates(tiff_dir, '*VV')
times.sort()
times = pd.DatetimeIndex(times)
times.name = "time"


pathfig = Path(fc.selected)/'Figures'

floodpercent = np.load(pathfig/"flood_percentage.npy")
time_index = np.load(pathfig/"time_index.npy")
time_index = pd.DatetimeIndex(time_index)
plt.rcParams.update({'font.size': 12})


fig, ax = plt.subplots(figsize=(9,5))

ax.plot(np.unique(time_index), floodpercent, color='b', marker='o', markersize=3, label='Area Covered in Water [%]')
ax.set_ylim([np.min(floodpercent)-np.min(floodpercent)*0.1, np.max(floodpercent)+np.min(floodpercent)*0.1])
ax.set_xlabel('Date')
ax.axhline(y=np.mean(floodpercent), color='k', linestyle='--', label='Average Water Coverage [%]')
ax.set_ylabel('Image Area Covered in Water [%]')
ax.grid()
figname = ('ThresholdAndAreaTS.png')
ax.legend(loc='lower right')
plt.title(f"Maximum water coverage on {time_index[np.argmax(floodpercent)].strftime('%Y-%m-%d')}, index: {np.argmax(floodpercent)}")


**Choose the time window for the forecast**
Choose either a date range, or the index of the event you want to test.

In [None]:
sdate = '2019-11-01'
edate = '2019-12-15'
#sdate = 128
#edate = 130

metricranking = ('r2','max')

if type(sdate) is int:
    start_ind = sdate
    stop_ind = edate
else:
    start_ind = np.argmin(np.abs(times-pd.to_datetime(sdate)))
    stop_ind = np.argmin(np.abs(times-pd.to_datetime(edate)))

if start_ind == stop_ind or start_ind > stop_ind:
    print('Please select other dates')

**Run the fit calculation for every polarization and dataset type available. This will help choosing which of water masks or SAR, VV/VH/VV&VH is the best combination**

In [None]:
# Convert 'times' to a dataarray to use with match_dates()
time_dataarray = xr.DataArray(np.array(times), dims='time', coords={'time': np.array(times)})

# Create MATCH, the list gathering the discharge and precipitations
mz = {
    'Q': {
        'Total': None,
        'Selected': None,
        'Forecast':None
    },
    'ERA': {
        'Total': None,
        'Selected': None,
        'Forecast':None
    }
}


# Load in memory the discharge  dataset (will be the same everytime)
lon,lat = get_centerpoint_coordinates(tiffs[0])
mz['Q']['Total'] = fierpy.get_streamflow(lat,lon)[0]
mz['Q']['Selected'] = fierpy.match_dates(mz['Q']['Total'], time_dataarray[:start_ind])


# Get the projection of the AOI
info = gdal.Info(str(tiffs[0]), format='json')
info = info['coordinateSystem']['wkt']
utm = info.split('ID')[-1].split(',')[1][0:-2]

# Get the bounds of the AOI
dataset = gdal.Open(str(tiffs[0]))
if dataset is not None:
    # Get the transformation information
    transform = dataset.GetGeoTransform()

    # Extract the corner coordinates
    xmin = transform[0]
    ymax = transform[3]
    xmax = xmin + transform[1] * dataset.RasterXSize
    ymin = ymax + transform[5] * dataset.RasterYSize

from pyproj import Transformer
points = [(ymin, xmin), (ymax, xmax)]  
boundaries = []
transformer = Transformer.from_crs(int(utm), 4326)
for pt in transformer.itransform(points): boundaries.append(pt)
boundaries = np.array(boundaries)

# Open Dataarray
era_ds = xr.open_dataarray(f"{Path(fc.selected).parent}/ERA5/era5_data.nc")

# Get the closest lats and lons from the AOI's boundaries, otherwise slicing the dataset can return empty slices
latmin = era_ds['latitude'].values[np.argmin(np.abs(era_ds['latitude'].values - boundaries[0][0]))]
latmax = era_ds['latitude'].values[np.argmin(np.abs(era_ds['latitude'].values - boundaries[1][0]))]
lonmin = era_ds['longitude'].values[np.argmin(np.abs(era_ds['longitude'].values - boundaries[0][1]))]
lonmax = era_ds['longitude'].values[np.argmin(np.abs(era_ds['longitude'].values - boundaries[1][1]))]

# Load in memory the precipitation dataset, fitted to the AOI, as a dataarray so it can use the match_dates function
era_ds = era_ds.sel(longitude = slice(lonmin, lonmax), latitude = slice(latmax, latmin))
# Convert to dataarray
era_ds = xr.DataArray(np.sum(np.sum(era_ds.values,axis=1),axis=1), dims='time', coords={'time': era_ds.time.values})
# Select the data corresponding to time
era_sel = fierpy.match_dates(era_ds, time_dataarray[:start_ind]).fillna(0)

mz['ERA']['Total'] = era_ds
mz['ERA']['Selected'] = era_sel


# Iterate through every dataset type and polarization to compute the fits with the discharge
c = 1
for filetype in list_types:
    for polarization in list_polarization:
        print(f"Starting {filetype} {polarization}")
        # There is no VH&VV polarization for SAR so we skip the fitting
        if filetype == 'SAR' and polarization == 'VH and VV':
            append_all(vz, 'Coeffs', np.array([0,0,0]))
            append_all(vz, 'Modes', 0)
            # Depending on the metric and ranking we want to make sure the fake score is the worst
            if metricranking[1] == 'min':
                append_all(vz, 'Score', 1e9)
            else:
                append_all(vz, 'Score', 1e-9)
                
            # Do the same for the neural score
            append_all(vz, 'RMSE', 1e9)
            
        else:
            reof_ds, da, vv = looper(mz, filetype,
                                            polarization,
                                            start_ind,
                                            metricranking,
                                            vz,
                                            fitting=True
                                            )
        print(f"{c}/6 | Finished computing {filetype} {polarization}")
        c += 1

**Select the best mode and associated coefficients based on the ranking score**

In [None]:
# Determine the index of the best score, best filetype and polarization from the polynomial fits

for var in ['Q','ERA']:
    if metricranking[1] == 'min':
        vz[var]['Polynomial']['Best_Index'] = vz[var]['Polynomial']['Score'].index(min(vz[var]['Polynomial']['Score']))
    else:
        vz[var]['Polynomial']['Best_Index'] = vz[var]['Polynomial']['Score'].index(max(vz[var]['Polynomial']['Score']))
    if vz[var]['Polynomial']['Best_Index'] < 3:
        vz[var]['Polynomial']['Filetype'] = 'SAR'
        vz[var]['Polynomial']['Polarization'] = list_polarization[vz[var]['Polynomial']['Best_Index']]
    else:
        vz[var]['Polynomial']['Filetype'] = 'Water_Mask'
        vz[var]['Polynomial']['Polarization'] = list_polarization[vz[var]['Polynomial']['Best_Index']-3]
        
    # Do the same for the neural network
    vz[var]['Neural']['Best_Index']  = vz[var]['Neural']['RMSE'].index(min(vz[var]['Neural']['RMSE']))
    if vz[var]['Neural']['Best_Index'] < 3:
        vz[var]['Neural']['Filetype'] = 'SAR'
        vz[var]['Neural']['Polarization'] = list_polarization[vz[var]['Neural']['Best_Index']]
    else:
        vz[var]['Neural']['Filetype'] = 'Water_Mask'
        vz[var]['Neural']['Polarization'] = list_polarization[vz[var]['Neural']['Best_Index']-3]

    # Replace the coeffs, modes and models by their best iteration
    vz[var]['Polynomial']['Coeffs'] = np.array(vz[var]['Polynomial']['Coeffs'][vz[var]['Polynomial']['Best_Index']])
    vz[var]['Polynomial']['Modes'] = vz[var]['Polynomial']['Modes'][vz[var]['Polynomial']['Best_Index']]
    vz[var]['Neural']['Models'] = vz[var]['Neural']['Models'][vz[var]['Neural']['Best_Index']]
    
    # Calculate the reof of the best combination of polarization and filetype
    vz[var]['Polynomial']['Reof'], vz[var]['Polynomial']['Training_Data'], *_ = looper(mz, vz[var]['Polynomial']['Filetype'], vz[var]['Polynomial']['Polarization'], start_ind, metricranking, vz, fitting = False)
    vz[var]['Polynomial']['Hindcast'] = vz[var]['Polynomial']['Training_Data'][start_ind:stop_ind+1]
    vz[var]['Neural']['Reof'], vz[var]['Neural']['Training_Data'], *_ = looper(mz, vz[var]['Neural']['Filetype'], vz[var]['Neural']['Polarization'], start_ind, metricranking, vz, fitting = False)
    vz[var]['Neural']['Hindcast'] = vz[var]['Neural']['Training_Data'][start_ind:stop_ind+1]


**Prepare the datasets for the fit**

In [None]:
# We fill in the NaNs to avoid breaking the fit
for var, reg in zip(['Q','ERA'],['Polynomial','Neural']):
    vz[var][reg]['Reof'].fillna(0)

In [None]:
fig, ax = plt.subplots(figsize=(20,5), ncols=2)
xlabels = ['Discharge [$m^{3}.s^{-1}$]', 'Precipitations [m.h^{-1}]']
for var, i in zip(['Q','ERA'],range(2)):
    vals = vz[var]['Polynomial']
    x = np.linspace(0, mz[var]['Selected'].max(),100)
    f = np.poly1d(np.squeeze(vals['Coeffs']))
    ax[i].plot(mz[var]['Selected'],vals['Reof'].temporal_modes[:,vals['Modes']-1], 'o', label=f"{metricranking[0]} = {np.round(vals['Score'][vals['Best_Index']],5)}")
    ax[i].legend()
    ax[i].set_xlabel(xlabels[i])
    ax[i].set_ylabel('Time series amplitude')
    ax[i].plot(x,f(x))
    ax[i].set_title(f"Fit for mode {vals['Modes']}, poly degree {len(vals['Coeffs'])-1}, {vals['Filetype']}_{vals['Polarization']}")            
plt.savefig(pathfig/f'Polynomial_Fit_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png')

In [None]:
fig = plt.figure(constrained_layout=True, figsize=(20, 10))

row_titles = [f"Discharge | {vz['Q']['Neural']['Filetype']}_{vz['Q']['Neural']['Polarization']}, RMSE: {min(vz['Q']['Neural']['RMSE']):.3f}",
              f"Precipitation | {vz['ERA']['Neural']['Filetype']}_{vz['ERA']['Neural']['Polarization']}, RMSE: {min(vz['ERA']['Neural']['RMSE']):.3f}"]

# Create 3x1 subfigs
subfigs = fig.subfigures(nrows=2, ncols=1)

for var, i in zip(['Q','ERA'],range(2)):
    subfigs[i].suptitle(f'{row_titles[i]}')

    # Create 1x3 subplots per subfig
    axs = subfigs[i].subplots(nrows=1, ncols=4)
    axs[i].set_ylabel('Temporal Mode Amplitude', color='black')
    axs[i].yaxis.set_label_coords(-0.2, 0.5)
    for col, ax in enumerate(axs):
        ax.scatter(mz[var]['Selected'], vz[var]['Neural']['Reof'].temporal_modes[:, col], label='Hindcast')
        ax.plot(mz[var]['Selected'], vz[var]['Neural']['Models'][col].predict(mz[var]['Selected'].values), label='Model', color='orange')
        ax.set_title(f'Temporal Mode {col+1}')
        ax.set_xlabel(xlabels[i])
        ax.legend()
fig.savefig(pathfig/f"Neural_fit_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png")


In [None]:
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

width_per_subplot = 5  # Desired width (in inches) for each subplot
num_columns = len(vz[var]['Polynomial']['Reof'].mode.values)
fig_width = num_columns * width_per_subplot

fig = plt.figure(constrained_layout=True, figsize=(fig_width, 10))

labels = ['Discharge','Precipitation']
# Create 3x1 subfigs
subfigs = fig.subfigures(nrows=2, ncols=1)

for var, i in zip(['Q','ERA'],range(2)):
    subfigs[i].suptitle(f'{row_titles[i]}')

    # Create 1x3 subplots per subfig
    axs = subfigs[i].subplots(nrows=1, ncols=4)
    axs[i].set_ylabel('Normalized Values', color='black')
    axs[i].yaxis.set_label_coords(-0.2, 0.5)
    
    for col, ax in enumerate(axs):
        reof_norm = normalize(vz[var]['Polynomial']['Reof'].temporal_modes[:, col])
        vari = normalize(mz[var]['Selected'])
        # Plotting the data with modified labels and line styles
        ax.plot(times[:start_ind], (reof_norm - np.min(reof_norm)) / (np.max(reof_norm) - np.min(reof_norm)),
                        label='RTPC', linestyle='-', linewidth=2, color='black')  # RTPC - Red solid line
        ax.plot(times[:start_ind], (vari - np.min(vari)) / (np.max(vari) - np.min(vari)),
                        label=labels[i], linestyle='--', linewidth=2, dashes=(5, 2), color='blue')  # Discharge - Dashed line
        ax.set_xlabel('Times')  # X-axis label
        ax.set_ylabel('Normalized Values')  # Y-axis label
        corr, _ = pearsonr(reof_norm, vari)
        ax.set_title(f'Mode {col+1} (Correlation: {corr:.3f})', fontsize=20)  # Subplot title with correlation
        ax.legend(fontsize=17)  # Show legend
        ax.tick_params(axis='x', rotation=45)  # Rotate x-axis tick labels by 45 degrees


#plt.tight_layout()  # Adjust the layout spacing
plt.show()  # Display the plot
plt.savefig(pathfig/f"Variables_Modes_Correlations_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png")

**Plot the spatiotemporal modes for the discharge** 

In [None]:
# Open figure based on amount of modes


fp = []
plot = True 

for reg, p in zip(['Polynomial','Neural'], range(2)):
    for var, j in zip(['Q','ERA'],range(2)):
        fp.append(f"{vz[var][reg]['Filetype']}{vz[var][reg]['Polarization']}")
        
        # If True, the figure generation is skipped
        if p >= 1 and f"{vz[var][reg]['Filetype']}{vz[var][reg]['Polarization']}" == fp[(j+p)-2]:
            plot = False
        else:
            True
        if plot:            
            reof = vz[var][reg]['Reof']
            plt.figure()
            num_columns = len(reof.mode.values)  # Number of columns in your subplots
            width_per_subplot = 5  # Desired width (in inches) for each subplot

            fig_width = num_columns * width_per_subplot

            fig, axes = plt.subplots(nrows=2, ncols=num_columns, figsize=(fig_width, 10))
            fig.suptitle(f"{reg} - {var} | Modes from {times[0].strftime('%Y-%m-%d')} to {times[start_ind].strftime('%Y-%m-%d')}, {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}", fontsize=22)
            # This part plots the spatial modes
            for i, ax in enumerate(axes[0]):

                # Create a plot of the spatial modes
                mesh = ax.imshow(reof.spatial_modes.values[:,:,i],
                          cmap = 'icefire',
                          vmin = -np.nanstd(reof.spatial_modes.values[:,:,i])/2+np.nanmean(reof.spatial_modes.values[:,:,i]),
                          vmax = np.nanstd(reof.spatial_modes.values[:,:,i])/2+np.nanmean(reof.spatial_modes.values[:,:,i]))

                # Set plot title and labels
                ax.set_title(f"Spatial Mode {i}")

                # Add colorbar
                imratio=0.047*(reof.spatial_modes.values[:,:,i].shape[0]/reof.spatial_modes.values[:,:,i].shape[1])
                cbar0 = fig.colorbar(mesh, ax=ax, fraction=imratio)
                cbar0.set_label('Spatial Mode Value')

            # This part plots the temporal modes with the discharge superimposed
            for i, ax in enumerate(axes[1]):
                # Create a line plot of the temporal mode
                ax.scatter(times[:start_ind], reof.temporal_modes[:, i], color = 'red', label='Temporal Modes', s = 150)
                # Format x-tick labels as dates
                ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
                ax.xaxis.set_minor_locator(mdates.AutoDateLocator())
                # Rotate x-tick labels
                plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
                # Set plot title and labels
                ax.set_title(f"Temporal Mode {i}")
                ax.set_xlabel('Time')
                ax.set_ylabel('Amplitude')

                # Create a secondary y-axis on the right
                ax_secondary = ax.twinx()

                # Plot the secondary data as bars on the right axis
                ax_secondary.bar(times[:start_ind], mz[var]['Selected'].values, color='blue', alpha=0.5, label = labels[j])
                ax_secondary.set_ylabel(xlabels[j])

            plt.tight_layout()
            plt.show()
            plt.savefig(pathfig/f"{reg[0]}_Reof_{var}_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png", dpi = 400)
            

<hr>

#### **Calculate the forecast of flooding based on the relationship between discharge and the main modes of our dataset**

**Calculate the amount of days between sdate and edate**

In [None]:
# Amount of days for the forecast
nb_days_forecast = (times[stop_ind] - times[start_ind]).days

**Generate the forecast based on the best fit coefficients**

In [None]:
# Generate new dates array nb_days_forecast days into the future and save them as a dataset
forecast_dates = xr.Dataset.from_dataframe(
                    pd.DataFrame(
                        {'time': da[:start_ind].time.max().values +
                         np.arange(1, nb_days_forecast+1, dtype='timedelta64[D]')}
                    )
                    )

for var in ['Q','ERA']:
    mz[var]['Forecast'] = fierpy.match_dates(mz[var]['Total'],forecast_dates)
    
    for reg in ['Polynomial','Neural']:
        # Use the previously found relationships and functions to generate RTC/mask forecast
        vz[var]['Polynomial']['Forecast'] = fierpy.synthesize(vz[var]['Polynomial']['Reof'],
                                                              mz[var]['Forecast'],
                                                              np.poly1d(vz[var]['Polynomial']['Coeffs']),
                                                              vz[var]['Polynomial']['Modes'])
        
        vz[var]['Neural']['Forecast'] = synthesize_neural(mz[var]['Forecast'],
                                                          vz[var]['Neural']['Models'],
                                                          vz[var]['Neural']['Reof'],
                                                          vz[var]['Polynomial']['Forecast'])


**Grab overlapping dates between forecast and dataset to compare the forecast quality**

In [None]:
# Get the matching indices
ind_hindcast = np.where(np.isin( vz[var]['Polynomial']['Hindcast']['time'].values, vz[var]['Polynomial']['Forecast']['time'].values))[0]
ind_forecast = np.where(np.isin( vz[var]['Polynomial']['Forecast']['time'].values, vz[var]['Polynomial']['Hindcast']['time'].values))[0]

# Grab the slices of the datasets corresponding to the matching indices
for var in ['Q','ERA']:
    for reg in ['Polynomial','Neural']:
        vz[var][reg]['Forecast'] = vz[var][reg]['Forecast'].isel(time=ind_forecast)
        vz[var][reg]['Hindcast'] = vz[var][reg]['Hindcast'].isel(time=ind_hindcast)
        vz[var][reg]['Difference']  = vz[var][reg]['Forecast']  - vz[var][reg]['Hindcast'] # Calculate the differences between the forecast and hindcast

### **Z-Score for the forecasts**

**Calculate the forecasts' scores compared to the hindcasts**

In [None]:
# Prepare the arrays to host the scores results
CSI=np.zeros((4))
Overall = np.zeros((4))

# Calculate the scores
i = 0
for var in ['Q','ERA']:
    for reg in ['Polynomial','Neural']:
        CSI[i], Overall[i] = z_score(vz[var][reg]['Training_Data'] , mz[var]['Selected'], vz[var][reg]['Forecast'] , vz[var][reg]['Hindcast'] ,3)
        i += 1

# Define the row and column labels
row_labels = ['Discharge', 'Precipitation']
column_labels = ['Polynomial', 'Neural']

# Create a figure and axis
fig, ax = plt.subplots()

# Plot the matrix using imshow
im = ax.imshow(np.vstack((CSI,Overall)).mean(axis=0).reshape((2,2)), cmap='Blues')

# Add ticks and labels to the x-axis and y-axis
ax.set_xticks(np.arange(len(column_labels)))
ax.set_yticks(np.arange(len(row_labels)))
ax.set_xticklabels(column_labels)
ax.set_yticklabels(row_labels)

# Loop over the data to add text annotations in each cell
for i in range(len(row_labels)):
    for j in range(len(column_labels)):
        text = ax.text(j, i, f"CSI: {CSI[i+j]:.2f}\nTot: {Overall[i+j]:.2f}", ha='center', va='center', color='red')

# Set the title
ax.set_title("Forecast Scores")

# Add a colorbar
cbar = ax.figure.colorbar(im, label='Average score [%]')

# Show the plot
plt.show()

plt.savefig(pathfig/f"Scores_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png")

### **Plot the forecasts and their associated real data to compare spatial correspondance**
*The colorbar is adjusted to the values of each normalized dataset. We use as colorbar boundaries:
$\mu(\text{dataset})\pm\sigma(\text{dataset})$*

In [None]:
# Automatically set the size of the plot

names = ['Discharge','','Precipitation']

nrows = 4
width_ratios = [1] * ncols  # Equal width for subplots, and additional space for colorbar
width_ratios[-1] += 0.075
height_ratios = [1] * nrows

for reg, r in zip(['Polynomial', 'Neural'], range(2)):
    
    num_columns = len(vz['Q'][reg]['Forecast'].time)  # Number of columns in your subplots
    if num_columns <= 1:
        ncols = 2
        max_iter = 1
    else:
        ncols = num_columns
        max_iter = ncols

    width_per_subplot = 5  # Desired width (in inches) for each subplot
    fig_width = num_columns * width_per_subplot

    # Start Figure
    fig, ax = plt.subplots(figsize=(fig_width, 15), nrows = 4, ncols = ncols, gridspec_kw={'width_ratios': width_ratios, 'height_ratios': height_ratios})
    
    # Create title
    fig.suptitle(f"Q: {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}, mode {vz[var][reg]['Modes']}, poly deg {len(vz[var][reg]['Coeffs'])-1} | Prec: {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}, mode {vz[var][reg]['Modes']}, poly deg{len(vz[var][reg]['Coeffs'])-1}", fontsize = 20)

    
    for var, j in zip(['Q', 'ERA'], [0,2]):
        forecast = vz[var][reg]['Forecast'].fillna(0)
        hindcast = vz[var][reg]['Hindcast'].fillna(0)


        # Loop to plot the data. We use the forecast and their associated real data, normalized
        # The colorbar is varying from -std+mean to std+mean
        for i in range(max_iter):
            norm_hind = normalize(hindcast[i])
            norm_fore = normalize(forecast[i])

            im0 = ax[j,i].imshow(norm_hind, vmin = -norm_hind.std() + norm_hind.mean(), vmax = norm_hind.std() + norm_hind.mean())
            ax[0,i].set_title(f"{str(norm_hind[i].time.values)[0:10]}, f{ind_forecast[i]}", fontsize = 17)
            im1 = ax[j+1,i].imshow(norm_fore, vmin = -norm_fore.std() + norm_fore.mean(), vmax = norm_fore.std() + norm_fore.mean())

            ax[j,i].set_xticks([])
            ax[j,i].set_yticks([])
            ax[j+1,i].set_xticks([])
            ax[j+1,i].set_yticks([])
            
            ax[j,0].set_ylabel(f"Hindcast {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}", fontsize = 15)
            ax[j+1,0].set_ylabel(f"Forecast {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}", fontsize = 15)
            
            if i == max_iter-1:

                imratio=0.047*(norm_hind.shape[0]/norm_hind.shape[1])
                cbar0 = fig.colorbar(im0, ax=ax[j,i], fraction=imratio)
                cbar1 = fig.colorbar(im1, ax=ax[j+1,i], fraction=imratio)
                cbar0.set_label('Normalized Frobenius Norm')
                cbar1.set_label('Normalized Frobenius Norm')

    # Remove the second column of ax if it is empty
    if num_columns <= 1:
        for i in range(len(ax)):
            fig.delaxes(ax[i][1])

    # rearange the axes for no overlap
    fig.tight_layout()

    # Plot line in the middle
    line = plt.Line2D([0,1],[0.48,0.48], transform=fig.transFigure, color="red")
    # Create the first title for the first two rows
    title1 = fig.text(0, 0.75, 'Discharge', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)

    # Create the second title for the last two rows
    title2 = fig.text(0, 0.25, 'Precipitations', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)

    fig.add_artist(line)


    plt.savefig(pathfig/f"{reg[0]}_Forecat_Hindcast_Comparison_{names[j]}_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png")

### Pattern analysis

**Calculate the principal modes (REOFs) of the forecast and real data to compare them**




In [None]:
# Calculate Reofs forecasts and their real equivalents

for var in ['Q','ERA']:
    for reg in ['Polynomial','Neural']:
        vz[var][reg]['Reof_Hindcast'] = freof(vz[var][reg]['Hindcast'], n_modes = vz[var][reg]['Hindcast'].shape[0])
        vz[var][reg]['Reof_Forecast'] = freof(vz[var][reg]['Forecast'], n_modes = vz[var][reg]['Forecast'].shape[0])

**Normalize the REOFs for comparison**

In [None]:
# For each entry add the differences to the dictionary
for var in vz:
    for reg in vz[var]:
        vz[var][reg]['Reof_Diff_Spatial'] = xr.apply_ufunc(normalize, vz[var][reg]['Reof_Forecast'].spatial_modes) - xr.apply_ufunc(normalize, vz[var][reg]['Reof_Hindcast'].spatial_modes) # Difference of normalized reof spatial modes forecast/hindscast
        vz[var][reg]['Reof_Diff_Temporal'] = xr.apply_ufunc(normalize, vz[var][reg]['Reof_Forecast'].temporal_modes) - xr.apply_ufunc(normalize, vz[var][reg]['Reof_Hindcast'].temporal_modes) #  Difference of normalized reof temporal modes forecast/hindscast




**Plot the differences between the REOFs of forecasts and real data**

We use as colorbar boundaries:
$\mu(\text{dataset})\pm\sigma(\text{dataset})$

In [None]:

for reg, r in zip(['Polynomial', 'Neural'], range(2)):
    
    num_columns = len(vz['Q'][reg]['Forecast'].time)  # Number of columns in your subplots
    if num_columns <= 1:
        ncols = 2
        max_iter = 1
    else:
        ncols = num_columns
        max_iter = ncols

    width_per_subplot = 5  # Desired width (in inches) for each subplot
    fig_width = num_columns * width_per_subplot
    
    plt.figure()
    # Start Figure
    fig, ax = plt.subplots(figsize=(fig_width, 15), nrows = 4, ncols = ncols)
    
    # Create title
    fig.suptitle(f"Difference REOFs Forecast-Hindcast | Q: {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}, mode {vz[var][reg]['Modes']}, poly deg {len(vz[var][reg]['Coeffs'])-1} | Prec: {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}, mode {vz[var][reg]['Modes']}, poly deg{len(vz[var][reg]['Coeffs'])-1}", fontsize = 20)

    
    for var, j in zip(['Q', 'ERA'], [0,2]):
        
        # Load the differences of the normalized spatiotemporal modes
        spatial = vz[var][reg]['Reof_Diff_Spatial']
        temporal = vz[var][reg]['Reof_Diff_Temporal']


        # Loop to plot the data. We use the forecast and their associated real data, normalized
        # The colorbar is varying from -std+mean to std+mean
        for i in range(max_iter):

            # Plot the spatial modes
            ax[j,i].imshow(spatial.values[:,:,i],
              cmap = 'icefire',
              vmin = -np.nanstd(spatial.values[:,:,i])+np.nanmean(spatial.values[:,:,i]),
              vmax = np.nanstd(spatial.values[:,:,i])+np.nanmean(spatial.values[:,:,i]))
            
            # Set plot title and labels
            ax[j,i].set_title(f"Spatial Mode {i+1}")

            # Add colorbar
            cbar = fig.colorbar(mesh, ax=ax[j,i])

            # Create a line plot of the temporal mode
            ax[j+1,i].scatter(times[ind_forecast], temporal[:, i], color = 'red', label='Temporal Modes', s = 150)
            # Format x-tick labels as dates
            ax[j+1,i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
            ax[j+1,i].xaxis.set_minor_locator(mdates.AutoDateLocator())
            # Rotate x-tick labels
            plt.setp(ax[j+1,i].xaxis.get_majorticklabels(), rotation=45)
            # Set plot title and labels
            ax[j+1,i].set_title(f"Temporal Mode {i + 1}")
            ax[j+1,i].set_xlabel('Time')
            ax[j+1,i].set_ylabel('Amplitude')                      



    # rearange the axes for no overlap
    fig.tight_layout()

    # Plot line in the middle
    line = plt.Line2D([0,1],[0.47,0.47], transform=fig.transFigure, color="red")
    
    # Create the first title for the first two rows
    title1 = fig.text(0.01, 0.75, 'Discharge', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)

    # Create the second title for the last two rows
    title2 = fig.text(0.01, 0.25, 'Precipitations', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)
    fig.add_artist(line)

    plt.savefig(pathfig/f"{reg[0]}_Difference_REOFs_Forecast_Hindcast_{names[j]}_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png")






### **Frobenius norm of the spatial REOFs differences**

In [None]:
width_per_subplot = 5  # Desired width (in inches) for each subplot
fig_width = 2 * width_per_subplot

# Start Figure
fig, ax = plt.subplots(figsize=(fig_width, 15), nrows = 4, ncols = 1)

# Create title
fig.suptitle(f"Frobenius norm Difference REOFs Forecast-Hindcast", fontsize = 20)


for reg, j in zip(['Polynomial', 'Neural'], [0,2]):

    # Load the differences of the normalized spatiotemporal modes
    f1 = np.linalg.norm(vz['Q'][reg]['Reof_Diff_Spatial'], axis=2)
    f2 = np.linalg.norm(vz['ERA'][reg]['Reof_Diff_Spatial'], axis = 2)


    # Loop to plot the data. We use the forecast and their associated real data, normalized
    # The colorbar is varying from -std+mean to std+mean
    im0 = ax[j].imshow(f1, vmin=-np.std(f1) + np.mean(f1), vmax=np.std(f1) + np.mean(f1))
    cbar0 = fig.colorbar(im0, ax=ax[j], shrink=1.0)
    cbar0.set_label('Frobenius norm value')

    ax[j].set_title('Discharge difference')

    # Plot the second matrix with colorbar and title
    im1 = ax[j+1].imshow(f2, vmin=-np.std(f2) + np.mean(f2), vmax=np.std(f2) + np.mean(f2))
    cbar1 = fig.colorbar(im1, ax=ax[j+1], shrink=1.0)
    cbar1.set_label('Frobenius norm value')

    ax[j+1].set_title('Precipitation difference')

    # Set the title for the entire figure
    fig.suptitle('Frobenius norm of spatial modes difference between forecast and real data')


# rearange the axes for no overlap
fig.tight_layout()

# Plot line in the middle
line = plt.Line2D([0,1],[0.49,0.49], transform=fig.transFigure, color="red")

# Create the first title for the first two rows
title1 = fig.text(0.001, 0.75, 'Polynomial', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)

# Create the second title for the last two rows
title2 = fig.text(0.001, 0.25, 'Neural', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)
fig.add_artist(line)

plt.savefig(pathfig/f"Frobenius_Difference_Forecast_Hindcast_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png", dpi = 400)


### **Gradient and Fourier Transform for spatial comparison**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import convolve
from scipy.fft import fft2, fftshift

nrows = 4
width_ratios = [1] * ncols  # Equal width for subplots, and additional space for colorbar
width_ratios[-1] += 0.075
height_ratios = [1] * nrows

for reg in ['Polynomial', 'Neural']:
    #Automatically set the size of the plot
    # Start Figures
    fig1, ax1 = plt.subplots(figsize=(fig_width, 15), nrows = 4, ncols = ncols, gridspec_kw={'width_ratios': width_ratios, 'height_ratios': height_ratios})
    fig2, ax2 = plt.subplots(figsize=(fig_width, 15), nrows = 4, ncols = ncols, gridspec_kw={'width_ratios': width_ratios, 'height_ratios': height_ratios})


    # Create title
    fig1.suptitle(f"{reg} | Q: {vz['Q'][reg]['Filetype']} {vz['Q'][reg]['Polarization']}, mode {vz['Q'][reg]['Modes']}, poly deg {len(vz['Q'][reg]['Coeffs'])-1} | Prec: {vz['ERA'][reg]['Filetype']} {vz['ERA'][reg]['Polarization']}, mode {vz['ERA'][reg]['Modes']}, poly deg{len(vz['ERA'][reg]['Coeffs'])-1}", fontsize = 20)
    fig2.suptitle(f"{reg} | Q: {vz['Q'][reg]['Filetype']} {vz['Q'][reg]['Polarization']}, mode {vz['Q'][reg]['Modes']}, poly deg {len(vz['Q'][reg]['Coeffs'])-1} | Prec: {vz['ERA'][reg]['Filetype']} {vz['ERA'][reg]['Polarization']}, mode {vz['ERA'][reg]['Modes']}, poly deg{len(vz['ERA'][reg]['Coeffs'])-1}", fontsize = 20)

    # Loop to plot the data. We use the forecast and their associated real data, normalized
    # The colorbar is varying from -std+mean to std+mean
    
    for var, j in zip(['Q', 'ERA'], [0,2]):
        for i in range(max_iter):


            # Compute the gradients using finite differences
            dhind = np.gradient(vz[var][reg]['Hindcast'].values[i])
            dfore = np.gradient(vz[var][reg]['Forecast'].values[i])

            # Compute the magnitude of gradients
            dmag_hind = np.sqrt(dhind[0] ** 2 + dhind[1] ** 2)
            dmag_fore = np.sqrt(dfore[0] ** 2 + dfore[1] ** 2)

            # Normalize the gradients
            dmag_hind = normalize(dmag_hind)
            dmag_fore = normalize(dmag_fore)

            # Perform Fourier transform
            ffthind = fftshift(fft2(vz[var][reg]['Hindcast'].values[i]))
            fftfore = fftshift(fft2(vz[var][reg]['Forecast'].values[i]))

            fftfore[np.isnan(fftfore)] = 0

            # Plot the Fourier spectra
            ffthind_norm = normalize((np.log(np.abs(ffthind))))
            fftfore_norm = normalize((np.log(np.abs(fftfore))))

            im0 = ax1[j,i].imshow(dmag_hind, vmin = dmag_hind.mean() - dmag_hind.std(), vmax = dmag_hind.mean() + dmag_hind.std(), cmap='hot')
            ax1[j,i].set_title(f"{str(vz[var][reg]['Hindcast'][i].time.values)[0:10]}, f{ind_forecast[i]}", fontsize = 17)
            im1 = ax1[j+1,i].imshow(dmag_fore, vmin = dmag_fore.mean() - dmag_fore.std(), vmax = dmag_fore.mean() + dmag_fore.std(), cmap='hot') 
            ax1[j,i].set_xticks([])
            ax1[j,i].set_yticks([])
            ax1[j+1,i].set_xticks([])
            ax1[j+1,i].set_yticks([])


            im2 = ax2[j,i].imshow(ffthind_norm, vmin = 0, vmax = 1, cmap='icefire')
            ax2[j,i].set_title(f"{str(vz[var][reg]['Hindcast'][i].time.values)[0:10]}, f{ind_forecast[i]}", fontsize = 17)
            im3 = ax2[j+1,i].imshow(fftfore_norm, vmin = 0, vmax = 1, cmap='icefire')
            ax2[j,i].set_xticks([])
            ax2[j,i].set_yticks([])
            ax2[j+1,i].set_xticks([])
            ax2[j+1,i].set_yticks([])
            
            if i == max_iter-1:

                imratio=0.047*(dmag_hind.shape[0]/dmag_hind.shape[1])
                cbar0 = fig.colorbar(im0, ax=ax1[j,i], fraction=imratio)
                cbar1 = fig.colorbar(im1, ax=ax1[j+1,i], fraction=imratio)
                cbar0.set_label('Normalized Gradient')
                cbar1.set_label('Normalized Gradient')
                
                imratio=0.047*(ffthind_norm.shape[0]/ffthind_norm.shape[1])
                cbar2 = fig.colorbar(im2, ax=ax2[j,i], fraction=imratio)
                cbar3 = fig.colorbar(im3, ax=ax2[j+1,i], fraction=imratio)
                cbar2.set_label('Normalized Gradient')
                cbar3.set_label('Normalized Gradient')
                

    # Some axis labels
    ax1[j+1,0].set_ylabel(f"Forecast {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}", fontsize = 15)
    ax1[j,0].set_ylabel(f"Hindcast {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}", fontsize = 15)
    ax2[j,0].set_ylabel(f"Hindcast {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}", fontsize = 15)
    ax2[j+1,0].set_ylabel(f"Forecast {vz[var][reg]['Filetype']} {vz[var][reg]['Polarization']}", fontsize = 15)

    # Remove the second column of ax if it is empty
    if num_columns <= 1:
        for i in range(len(ax1)):
            fig1.delaxes(ax1[i][1])
            fig2.delaxes(ax2[i][1])

    # rearange the axes for no overlap
    fig1.tight_layout()
    fig2.tight_layout()


    # Create the first title for the first two rows
    title1 = fig1.text(0, 0.75, 'Discharge', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)
    title21 = fig2.text(0, 0.75, 'Discharge', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)

    # Create the second title for the last two rows
    title12 = fig1.text(0, 0.25, 'Precipitations', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)
    title22 = fig2.text(0, 0.25, 'Precipitations', va='center', ha='center', rotation='vertical', color='red', fontsize = 20)

    # Add the red line in the middle
    line1 = plt.Line2D([0, 1], [0.485, 0.485], transform=fig1.transFigure, color='red', linewidth=2)
    line2 = plt.Line2D([0, 1], [0.485, 0.485], transform=fig2.transFigure, color='red', linewidth=2)
    fig1.add_artist(line1)
    fig2.add_artist(line2)
    # Save the figures separately
    fig1.savefig(pathfig/"Gradients_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png")
    fig2.savefig(pathfig/"FFTs_{times[0].strftime('%Y-%m-%d')}_{times[start_ind].strftime('%Y-%m-%d')}.png")

**Histogram of the REOFs**

In [None]:
# Plot the histogram of values
fig, ax = plt.subplots(figsize=(fig_width, 15), nrows = 2, ncols = 2)

for var, j in zip(['Q', 'ERA'], range(2)):
    for reg, i in zip(['Polynomial','Neural'], range(2)):
            ax[j, i].hist(np.mean(vz[var][reg]['Reof_Diff_Spatial'].values, axis = 0), bins=10)  # Adjust the number of bins as needed
            ax[j, i].set_xlabel('Score')
            ax[j, i].set_ylabel('Frequency')
            ax[j, i].set_title(f"{var} {reg}: Diff Normalized Forecast modes - Data modes")
            ax[j, i].legend([f"Mode {i+1}" for i in range(vz[var][reg]['Reof_Diff_Spatial'].shape[2])])
    

plt.savefig(pathfig/f"Histogram_difference_Discharge_{times[start_ind:stop_ind][ind_hindcast][0].strftime('%Y-%m-%d')}_{times[start_ind:stop_ind][ind_hindcast][-1].strftime('%Y-%m-%d')}.png", dpi = 400)
