In [1]:
### Author: Md Shadman Sakib 4/1/2024
### Input: .csv files of each observation station having observation data and simulation output
### Output: Subplot timeseries graphs for each individual stations.
### The optimum simulation can be selected using the simulation id. This is used to feed data into the evaluate function to caluclate the RMSE.
### The start date and end date is sued to calcualt ethe metric using the data within this interval
### Tt also dictates the length of the plot.
### There are missing values in the observed dataset. So, before feeding the data into the evaluate metric, the nan values are dropped from the dataframe. 
### Ensemble plot
### Output: 

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import r2_score
from pandas.io.formats.style import Styler
import matplotlib.dates as mdates
import glob
%matplotlib qt

def evaluate(obs_wl, sim_wl):
    from sklearn.metrics import r2_score
    import numpy as np
    import pandas as pd
    
    # Calculate coefficient of determination (true vs. predicted)
    cd = r2_score(obs_wl, sim_wl)
    # Calculate correlation matrix (R)
    R = np.corrcoef(sim_wl, obs_wl)  
    # Calculate the mean absolute error (MAE)
    res = sim_wl - obs_wl
    res_abs = res.abs()
    MAE = res_abs.mean()
    # Calculate the mean square error (MSE)
    res_pow = res.pow(2)
    MSE = res_pow.mean()
    # Calculate the root-mean square error (RMSE)
    RMSE = np.sqrt(MSE)
    # Calculate Kling-Gupta Efficiency
    sd_y_sim = np.std(sim_wl)
    sd_y_obs = np.std(obs_wl)
    avr_y_sim = np.mean(sim_wl)
    avr_y_obs = np.mean(obs_wl)
    alpha=sd_y_sim/sd_y_obs
    beta=avr_y_sim/avr_y_obs
    KGE = 1 - np.sqrt(np.square(R[0,1]-1) + np.square(alpha-1) + np.square(beta-1))
    # Calculate Nash-Sutcliffe Efficiency
    res2 = obs_wl - np.mean(obs_wl)
    res2_pow = res2.pow(2)
    NSE = 1 - (res_pow.sum()/res2_pow.sum())
    return np.round(np.array([MAE, RMSE, NSE, KGE]),2)


station_dict = {
    "SP": "Sewells Point, VA",
    "MP": "Money Point, VA",
    "KK": "Kiptopeke, VA",
    "WAC": "Wachapreague, VA",
    "YOR": "Yorktown USCG, VA",
    "WP": "Windmill Point, VA",
    "OC": "Ocean City Inlet, MD",
    "LEW": "Lewisetta, VA",
    "SI": "Solomons Island, MD",
    "DC": "Washington, DC",
    "BH": "Bishops Head, MD",
    "CC": "Chesapeake City, MD",
    "BAL": "Baltimore, MD",
    "TB": "Tolchester Beach, MD",
    "ANN": "Annapolis, MD",
    "CAM": "Cambridge, MD",
    "DAH": "Dahlgren, VA",
    "CBBT": "CBBT, Chesapeake Channel, VA"
}

#-------------------------------INPUT START---------------------------------------------------------------#

local_path_set=r"E:\VirginiaTech-Research\ChesapeakeModel\CS_Tester\ExtremeEvent\SandyCS_0.021_Ocean_0.20\Results"
data_paths=glob.glob(local_path_set+"\*.csv")
optimal_col_index=2  # set it to the col index you want to calcualte the metric for

# Specify the datetime to find
start_date = '2012-10-27 00:00:00'
end_date = '2012-11-02 00:00:00'

plt.rcParams.update({'font.size': 10.5})
label_subplots=[-2,-3]
#-------------------------------INPUT END-----------------------------------------------------------------#
#---------------------------------------------------------------------------------------------------------#
#-------------------------------DATA PROCESSING START-----------------------------------------------------#


station_count=len(data_paths)

num_rows = 6 
num_cols = 2


fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 11.25))
fig.subplots_adjust(top=0.9) 

# Initialize subplot indices
row_index = 0
col_index = 0

fig.delaxes(axs[num_rows-1,num_cols-1])  # Remove the last subplot (right side of the last row)


### flattenig all axes for inserting date times
axs_datetime = axs.flatten()

    
for data_path in data_paths:

    data = pd.read_csv(data_path)
    #sim_code=data.columns[optimal_sim_id+2]
    station=data_path.split('\\')[-1].split('.')[0]
    data['DateTime (GMT)'] = data['DateTime (GMT)'].astype('datetime64[ns]')

    # Find index of specific datetime to crop the plot
    start_index = data[data['DateTime (GMT)'] == start_date].index[0]
    end_index = data[data['DateTime (GMT)'] == end_date].index[0]+1


    ### calculating the metrics within the set timeframe
    ## titl: setting up/annotating the evaluation metrics on to the graphs
    ### drop the nan values where there are missing data
    ################################################################################################################################################
    data_storm=data[start_index:end_index]
    data_NAN_filtered = data_storm.dropna(subset=['NOAA WL (m)'])
    
    obs_wl=data_NAN_filtered['NOAA WL (m)']
    sim_wl=data_NAN_filtered.iloc[:,optimal_col_index] 
    
    eval_met=evaluate(obs_wl, sim_wl) # [MAE, RMSE, NSE, KGE]
    station_title=station_dict[station]+' ['+station+']'
    title_met='MAE: '+str(eval_met[0])+'m; '+' RMSE: '+str(eval_met[1])+'m; '+' NSE: '+str(eval_met[2])+'; '+' KGE: '+str(eval_met[3]) 
    
    ################################################################################################################################################

    for j in data.columns[2:]:
        axs[row_index, col_index].plot(data['DateTime (GMT)'][start_index:end_index], data[j][start_index:end_index], label='Simulated', c='k')
        
    axs[row_index, col_index].scatter(data['DateTime (GMT)'][start_index:end_index], data['NOAA WL (m)'][start_index:end_index], label='Observation',c='r', s=2)
    axs[row_index, col_index].set_ylabel('Water level [m]')
    axs[row_index, col_index].xaxis.set_major_formatter(mdates.DateFormatter('%d/%b'))
    axs[row_index, col_index].xaxis.set_major_locator(mdates.DayLocator(interval=1))

    print(title_met)
    
    
    current_ylim = axs[row_index, col_index].get_ylim()
    axs[row_index, col_index].set_ylim(current_ylim[0], current_ylim[1] + 0.175 * (current_ylim[1] - current_ylim[0]))
    
    axs[row_index, col_index].text(0.98, 0.95, station_title, transform=axs[row_index, col_index].transAxes, fontsize=10.5, verticalalignment='top', horizontalalignment='right')
    axs[row_index, col_index].text(0.05, 0.95, title_met, transform=axs[row_index, col_index].transAxes, fontsize=10.5, verticalalignment='top', horizontalalignment='left')


    if row_index == 0 and col_index == 0:
        axs[row_index, col_index].legend(loc='lower right')

    
    # removing labels from all axes
    axs[row_index, col_index].set_xticklabels([]) # this command removes the x-axis values

    # Update subplot indices
    if col_index == num_cols - 1:
        row_index += 1
        col_index = 0
    else:
        col_index += 1


### reinstating labels to required subplots


for a in label_subplots:
    axs_datetime[a].xaxis.set_major_formatter(mdates.DateFormatter('%d/%b'))
    axs_datetime[a].xaxis.set_major_locator(mdates.DayLocator(interval=1))


fig.subplots_adjust(top=0.983, bottom=0.044, left=0.045, right=0.989, hspace=0.15, wspace=0.120)
#fig.tight_layout()

print('[MAE, RMSE, NSE, KGE]')

MAE: 0.11m;  RMSE: 0.13m;  NSE: 0.65;  KGE: 0.72
MAE: 0.13m;  RMSE: 0.17m;  NSE: 0.69;  KGE: 0.66
MAE: 0.07m;  RMSE: 0.08m;  NSE: 0.88;  KGE: 0.85
MAE: 0.11m;  RMSE: 0.12m;  NSE: 0.77;  KGE: 0.77
MAE: 0.12m;  RMSE: 0.14m;  NSE: 0.88;  KGE: 0.85
MAE: 0.13m;  RMSE: 0.15m;  NSE: 0.44;  KGE: 0.71
MAE: 0.22m;  RMSE: 0.26m;  NSE: 0.75;  KGE: 0.64
MAE: 0.1m;  RMSE: 0.12m;  NSE: 0.28;  KGE: 0.69
MAE: 0.17m;  RMSE: 0.21m;  NSE: 0.81;  KGE: 0.74
MAE: 0.11m;  RMSE: 0.13m;  NSE: 0.77;  KGE: 0.75
MAE: 0.15m;  RMSE: 0.18m;  NSE: 0.79;  KGE: 0.78
[MAE, RMSE, NSE, KGE]


In [43]:
axs_datetime[-1]

<Axes: >

In [14]:
len(data)

2641

In [16]:
len(data_strom)

1441

  plt.tight_layout()
