In [1]:
import copy
import glob
import gzip
import json
import os
import pickle
import shutil
import sys
import time
import warnings
import zipfile
import numba
from datetime import date
import h5py 

import cdsapi
import numpy as np
import pandas as pd
import xarray as xr


import matplotlib
import matplotlib.font_manager as font_manager
import matplotlib.pylab as plt
import matplotlib.pyplot as maplt
import seaborn

import trajectory as trj
from collections import defaultdict

sys.path.insert(0, os.getcwd() + "/../resort/rasotools-master/")
import rasotools


In [6]:
matplotlib.rcParams.update({"font.size": 20})
matplotlib.rcParams["figure.figsize"] = (20, 10)
font = {
    "size": 20,
}
matplotlib.rc("font", **font)

warnings.filterwarnings("ignore")

%load_ext line_profiler


In [2]:
# @numba.jit(nopython=True)
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx]

# @numba.jit()
def drop_dims(input_array):
    input_array = np.array(input_array)
    dim = []
    for i in list(np.shape(input_array)):
        if i != 1:
            dim.append(i)
    return input_array.reshape(dim)

def datetime_to_seconds(dates, ref='1900-01-01T00:00:00'):
    """ from datetime64 to seconds since 1900-01-01 00:00:00"""
    return ((dates - np.datetime64(ref)) / np.timedelta64(1, 's')).astype(np.int64)

def seconds_to_datetime(seconds, ref='1900-01-01'):
    """ from seconds to datetime64 """
    seconds = np.asarray(seconds)
    return pd.to_datetime(seconds, unit='s', origin=ref)

In [7]:
stdplevs = [1000,2000,3000,5000,7000,10000,15000,20000,25000,30000,40000,50000,70000,85000,92500]
diff = True
show_date = False
save_dict = {'eastward windspeed':'u', 'northward windspeed':'v', 'air temperature':'temperature', 'specific humidity': 'q'}
units_dict = {'eastward windspeed':'m/s', 'northward windspeed':'m/s', 'air temperature':'K', 'specific humidity':'1'}

for var in ['specific humidity', 'eastward windspeed', 'northward windspeed', 'air temperature']: #  ['eastward windspeed', 'northward windspeed', 'air temperature']:
    for year in [1960, 1970, 1980, 1990, 2000, 2010, 2020]: # [1960, 1970, 1980, 1990, 2000, 2010, 2020]:
        try:
            # file_list = []
            # for i in glob.glob('/scratch/das/federico/COP2_HARVEST_JAN2023/igra2/*.nc')[:]:
            #     sid = i.split('/')[-1].split('.')[0]
            #     print(sid)
            #     file_list.append(calc_station.remote(sid,year,var))
            # results = ray.get(file_list)
            # with open('era5_temperature_fc_'+str(year)+'_rmse_data.p', 'wb') as file:
            #     pickle.dump(results, file)

            with open('./igra/world/era5_' + save_dict[var] + '_fc_'+str(year)+'_rmse_data.p', 'rb') as file:
                results = pickle.load(file)

            rmse_sum_shbase_sonde, rmse_sum_shdisp_sonde, rms_sum_shbase, rms_sum_sonde, rms_sum_shdisp, rms_sum_dispminusbase = copy.deepcopy(results[0])
            for i in results[1:]:
                for k in [1000,2000,3000,5000,7000,10000,15000,20000,25000,30000,40000,50000,70000,85000,92500]:
                    rmse_sum_shbase_sonde[k]  = rmse_sum_shbase_sonde[k] + i[0][k]
                    rmse_sum_shdisp_sonde[k] = rmse_sum_shdisp_sonde[k] + i[1][k]
                    rms_sum_shbase[k] = rms_sum_shbase[k] + i[2][k]
                    rms_sum_sonde[k] = rms_sum_sonde[k] + i[3][k]
                    rms_sum_shdisp[k] =  rms_sum_shdisp[k] + i[4][k]
                    rms_sum_dispminusbase[k] = rms_sum_dispminusbase[k] + i[5][k]

            print('valid ascents: ', len(rms_sum_shdisp[50000]))
            t0 = time.time()
            rmse_shbase_sonde=[]
            rmse_shdisp_sonde=[]

            rms_shbase=[]
            rms_sonde=[]
            rms_shdisp=[]
            rms_dispmbase=[]

            for i in range(len(stdplevs)):
                rmse_shbase_sonde.append(np.sqrt(np.nanmean((np.array(rmse_sum_shbase_sonde[stdplevs[i]])**2))))
                if show_date:    
                    print('rmse_shbase_sonde - plev: ', stdplevs[i], ' RMSE: ', rmse_shbase_sonde[-1])
                rmse_shdisp_sonde.append(np.sqrt(np.nanmean((np.array(rmse_sum_shdisp_sonde[stdplevs[i]])**2))))
                if show_date:    
                    print('rmse_shdisp_sonde - plev: ', stdplevs[i], ' RMSE: ', rmse_shdisp_sonde[-1])

                rms_shbase.append(np.sqrt(np.nanmean((np.array(rms_sum_shbase[stdplevs[i]])**2))))
                if show_date:    
                    print('rms_shbase - plev: ', stdplevs[i], ' RMS: ', rms_shbase[-1])
                rms_sonde.append(np.sqrt(np.nanmean((np.array(rms_sum_sonde[stdplevs[i]])**2))))
                if show_date:    
                    print('rms_sonde - plev: ', stdplevs[i], ' RMS: ', rms_sonde[-1])
                rms_shdisp.append(np.sqrt(np.nanmean((np.array(rms_sum_shdisp[stdplevs[i]])**2))))
                if show_date:
                    print('rms_shdisp - plev: ', stdplevs[i], ' RMS: ', rms_shdisp[-1])
                rms_dispmbase.append(np.sqrt(np.nanmean((np.array(rms_sum_dispminusbase[stdplevs[i]])**2))))
                if show_date:
                    print('rms_dispmbase - plev: ', stdplevs[i], ' RMS: ', rms_shdisp[-1])


            print('')

            fig, ax = maplt.subplots(1, 2, gridspec_kw={'width_ratios': [4, 1]}, figsize = (15,10))
            ax1 = ax[0]
            ax2 = ax[1] 
            ax2.sharey(ax1)
            if var == 'specific humidity':
                ax1.plot(100000*np.array(rmse_shbase_sonde),stdplevs,color='orange', label=r'RMSE undisplaced $\times 10^{-5}$')
                ax1.plot(100000*np.array(rmse_shdisp_sonde),stdplevs, color='red', label=r'RMSE displaced $\times 10^{-5}$')
            else:
                ax1.plot(np.array(rmse_shbase_sonde),stdplevs,color='orange', label='RMSE undisplaced')
                ax1.plot(np.array(rmse_shdisp_sonde),stdplevs, color='red', label='RMSE displaced')

            ax1_4 = ax1.twiny()
            ax1_4.axvline(x=0, color='black', alpha=0.8, ls='--', lw=0.5)
            if var == 'specific humidity':
                if diff:
                    plt_diff = ax1_4.plot(100000*(np.array(rmse_shbase_sonde)-np.array(rmse_shdisp_sonde)),stdplevs,color='purple', label=r'RMSE difference undisplaced - displaced $\times 10^{-5}$')
                plt_rms = ax1_4.plot(np.array(rms_dispmbase)*100000,stdplevs, color='green', alpha=0.3, ls='--', label=r'RMS undisplaced - displaced $\times 10^{-5}$')
            else:
                if diff:
                    plt_diff = ax1_4.plot((np.array(rmse_shbase_sonde)-np.array(rmse_shdisp_sonde)),stdplevs,color='purple', label=r'RMSE difference undisplaced - displaced')
                plt_rms = ax1_4.plot(np.array(rms_dispmbase),stdplevs, color='green', alpha=0.3, ls='--', label=r'RMS undisplaced - displaced')
            ax1_4.legend(loc='upper right', prop={'size':14})

            ax1.set_ylim(ax1.get_ylim()[::-1])
            ax1.set_ylabel('pressure (Pa)')
            ax1.set_xlabel(var+' RMSE (' +str(units_dict[var]) + ')')
            ax1.legend(loc='lower left', prop={'size':14})
            ax1.grid()

            value_nr = []
            for i in rmse_sum_shbase_sonde:
                value_nr.append(len(np.asarray(rmse_sum_shbase_sonde[i])[~np.isnan(rmse_sum_shbase_sonde[i])]))
            ax2.barh(stdplevs, value_nr, 2000, color='g', alpha = 0.4, align='center')
            ax2.set_xlabel('Observations')
            ax2.tick_params(labelleft=False)
            ax2.grid()

            maplt.title(str(year)+' '+var+' RMSE \n' + str(len(rms_sum_shdisp[50000])) +' valid ascents')
            maplt.savefig('./igra/world/'+str(year)+'_'+save_dict[var]+'_era5_fc_world_rmse_plot_igra.png')
            maplt.close()
            print('RMSE calculation: ', time.time()-t0)
        except:
            print('no data: ', var, year)
            pass