In [None]:
import h5py
from matplotlib import pyplot as plt
from matplotlib import lines, colors, ticker
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn.metrics import mean_squared_error

import sys
sys.path.append('../externals/gfz_cygnss/')
import gfz_202003.utils.mathematics as mat

In [None]:
f_pred = h5py.File('/work/ka1176/caroline/gitlab/cygnss-deployment/prediction/current_predictions.h5', 'r')

In [None]:
f_pred.keys()

In [None]:
y_true = f_pred['y_true'][:]
y_pred = f_pred['y_pred'][:]
sp_lon = f_pred['sp_lon'][:]
sp_lat = f_pred['sp_lat'][:]

In [None]:
rmse = mean_squared_error(y_true, y_pred, squared=False)

print(f'Overall root mean square error (RMSE): {rmse:.4f} m/s')

In [None]:
y_bins = [4, 8, 12, 16, 20, 100]
y_ix   = np.digitize(y_true, y_bins, right=False)

all_rmse = np.zeros(len(y_bins))
all_bias = np.zeros(len(y_bins))
all_counts = np.zeros(len(y_bins))

In [None]:
for i, yy in enumerate(y_bins):
    if np.any(y_ix==i):
        rmse = mean_squared_error(y_true[y_ix==i], y_pred[y_ix==i], squared=False)
        all_rmse[i] = rmse
        all_bias[i] = np.mean(y_pred[y_ix==i] - y_true[y_ix==i])
        all_counts[i] = np.sum(y_ix==i)
        print(f'RMSE in bin {i} (up to {yy} m/s): {rmse:.4f} m/s')
    else:
        all_rmse[i] = None
        all_bias[i] = None
        all_counts[i] = 0
        print(f"--- No samples in bin {i} (up to {yy} m/s)")
        
df_rmse = pd.DataFrame(dict(rmse=all_rmse, bias=all_bias, bins=y_bins, counts=all_counts))

In [None]:
sns.set_style('whitegrid')
sns.set_context('talk')

In [None]:
fig=plt.figure()
ax=fig.add_subplot(111)

sns.histplot(y_true, ax=ax, color='C7', label='ERA5 wind speed (m/s)')
sns.histplot(y_pred, ax=ax, color='C2', label='Predicted wind speed (m/s)')

ax.legend(fontsize=12)

ax.set_xticks([5, 10, 15, 20, 25])
ax.set_xticklabels([5, 10, 15, 20, 25])
ax.set_xlabel('ERA5 wind speed (m/s)')

plt.show()

In [None]:
ymin = 2.5
ymax = 25.0

fig=plt.figure()
ax=fig.add_subplot(111)

img=ax.hexbin(y_true, y_pred, cmap='viridis', norm=colors.LogNorm(vmin=1, vmax=25000), mincnt=1)
clb=plt.colorbar(img)
clb.set_ticks([1, 10, 100, 1000, 10000])
clb.set_ticklabels([r'$1$', r'$10$', r'$10^2$', r'$10^3$', r'$10^4$'])
clb.set_label('Samples in bin')
clb.ax.tick_params()

ax.set_xlabel('ERA5 wind speed (m/s)')
ax.set_ylabel('Predicted wind speed (m/s)')

ax.plot(np.linspace(0, 30), np.linspace(0, 30), 'r:')

ax.set_ylim(ymin, 25)
ax.set_xlim(ymin, 25)

ax.set_xticks([5, 10, 15, 20, 25])
ax.set_xticklabels([5, 10, 15, 20, 25])
ax.set_yticks([5, 10, 15, 20, 25])
ax.set_yticklabels([5, 10, 15, 20, 25])

fig.tight_layout()

In [None]:
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
from mpl_toolkits.axes_grid1 import AxesGrid

In [None]:
deg = 1 # grid resolution (publication: 1)

xx, yy, gridded_y_true = mat.average_to_grid2(sp_lon[:], sp_lat[:], y_true[:], resolution=deg)
xx, yy, gridded_y_pred = mat.average_to_grid2(sp_lon[:], sp_lat[:], y_pred[:], resolution=deg)
xx, yy, gridded_rmse = mat.average_to_grid2(sp_lon[:], sp_lat[:], np.abs(y_pred[:] - y_true[:]), resolution=deg)
xx, yy, gridded_bias = mat.average_to_grid2(sp_lon[:], sp_lat[:], y_pred[:] - y_true[:], resolution=deg)

In [None]:
grid_lon = np.arange(-180, 181, deg)
grid_lat = np.arange(-90, 91, deg)

In [None]:
proj = ccrs.PlateCarree(180)
fig, ax = plt.subplots(1, 1, figsize=(6,4), gridspec_kw=dict(hspace=0.05, wspace=0.1), subplot_kw=dict(projection=proj))
cmap = ax.contourf(grid_lon[:], grid_lat[::-1][:], gridded_y_true[:].T, levels=60, transform=proj, antialiased=False, cmap='magma')
ax.coastlines()
gl = ax.gridlines(crs=proj, draw_labels=True, linewidth=0, color='gray', alpha=0.5, linestyle=':')
gl.top_labels = False
gl.right_labels= False
clb = plt.colorbar(cmap, ax=ax, orientation='horizontal', shrink=1, label='Average ERA5 wind speed (m/s)')

clb.set_ticks(np.arange(2.5, 18, 2.5))
clb.ax.tick_params(labelsize=8)

gl.xlabel_style = {'size': 8, 'color': 'black'}
gl.ylabel_style = {'size': 8, 'color': 'black'}

plt.show()

In [None]:
proj = ccrs.PlateCarree(180)
fig, ax = plt.subplots(1, 1, figsize=(6,4), gridspec_kw=dict(hspace=0.05, wspace=0.1), subplot_kw=dict(projection=proj))
cmap = ax.contourf(grid_lon[:], grid_lat[::-1][:], gridded_rmse[:].T, levels=60, transform=proj, antialiased=False, cmap='viridis')
ax.coastlines()
gl = ax.gridlines(crs=proj, draw_labels=True, linewidth=0, color='gray', alpha=0.5, linestyle=':')
gl.top_labels = False
gl.right_labels= False
clb = plt.colorbar(cmap, ax=ax, orientation='horizontal', shrink=1, label='Average RMSE (m/s)')

clb.set_ticks(np.arange(0, np.nanmax(gridded_rmse)+1, 1.0))
clb.ax.tick_params(labelsize=8)

gl.xlabel_style = {'size': 8, 'color': 'black'}
gl.ylabel_style = {'size': 8, 'color': 'black'}

plt.show()