# Data comparison by plotting

`line 3` *target_var_list* 입력

`line 7` *max_time_step* 입력

`line 9, 10` *dataset_1, dataset_2* 입력

`line 17` domain level 설정

`line 26` norm 스케일 조정

`line 28` region 설정 

In [4]:
import xarray as xr
import numpy as np
import his_utils, his_plot
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
from matplotlib.colors import TwoSlopeNorm
from multiprocessing import Pool
import dask



dataset_1 = xr.open_dataset("testdata/2021-06-26/prediction_40step_1std_perturb.nc")
dataset_2 = xr.open_dataset('testdata/2021-06-26/prediction_40step_no_perturb.nc')
    
dataset_1 = his_utils.convert_scale(dataset_1).resample(time='1d').mean()
dataset_2 = his_utils.convert_scale(dataset_2).resample(time='1d').mean()

In [10]:
# 서로 다른 dataset별로 어떻게 생겨먹었는지 다 plot해서 뽑아버리기
# ['10m_u_component_of_wind',
#  '10m_v_component_of_wind',
#  '2m_temperature',
#  'geopotential_at_surface',
#  'land_sea_mask',
#  'mean_sea_level_pressure',
#  'total_precipitation_6hr',
#  'geopotential',
#  'specific_humidity',
#  'temperature',
#  'u_component_of_wind',
#  'v_component_of_wind',
#  'vertical_velocity',
#  'toa_incident_solar_radiation']


target_var_list = [ 'geopotential',
 'specific_humidity',
 'temperature',
 'u_component_of_wind',
 'v_component_of_wind',
 'vertical_velocity',
 '10m_u_component_of_wind',
 '10m_v_component_of_wind',
 '2m_temperature',
 'mean_sea_level_pressure',
 'total_precipitation_6hr']

max_time_step = 10

for var in target_var_list:
    diff = dataset_1[var] - dataset_2[var]

    # weights = np.cos(np.deg2rad(diff.lat))
    # weights.name = "weights"
    # weighted = diff.weighted(weights)
    mean_val = diff.mean(('lat', 'lon', 'time'))
    if 'level' in dataset_2[var].dims:
        std_all = diff.std(('lat', 'lon', 'time'))
        std_val = std_all.max().values
    else:
        std_val = diff.std(('lat', 'lon')).max().values
        norm = TwoSlopeNorm(vmin=mean_val - std_val, 
                            vcenter=mean_val, 
                            vmax=mean_val + std_val)

    print(f"{var} std: {std_val}")
    print(f"{var} mean: {mean_val}")

        
    
    

    title = "1std - no perturbation"

    # level variables
    if 'level' in dataset_1[var].dims:
        mean_val = mean_val[0]
        std_all = std_all[0]
        arg_list = [(diff.isel(time=time_index, level=level_index), 
                     var, 
                     "platecarree", 
                     "RdBu_r", 
                     f'{var} {title} at t:{time_index}, level:{level_index}\n mean:{mean_val[level_index].values}, std:{std_all[level_index].values}', 
                     f'figure/{title} {var}_{time_index}_{level_index}.png',
                     TwoSlopeNorm(vmin=mean_val[level_index] - std_val, 
                            vcenter=mean_val[level_index], 
                            vmax=mean_val[level_index] + std_val))
            for time_index in range(max_time_step)
            for level_index in range(len(diff.level))]
    
    # time-independent variables
    elif 'time' not in dataset_1[var].dims:
        arg_list = [(diff, 
                     var, 
                     "platecarree", 
                     "RdBu_r", 
                     f'{var} {title}', 
                     f'figure/{title} {var}.png',
                     norm)]
    
    # surface variables
    else:
        arg_list = [(diff.isel(time=time_index), 
                     var, 
                     "platecarree", 
                     "RdBu_r", 
                     f'{var} {title} at t:{time_index}', 
                     f'figure/{title} {var}_{time_index}.png',
                     norm)
            for time_index in range(max_time_step)]

    with Pool() as pool:
        pool.map(his_plot.plot, arg_list)

geopotential std: 67.43692779541016
geopotential mean: <xarray.DataArray 'geopotential' (batch: 1, level: 37)>
array([[3.5231037 , 2.0290718 , 2.185933  , 2.329424  , 2.4403012 ,
        2.4652176 , 2.2448292 , 1.9865309 , 1.2974168 , 1.0466173 ,
        0.83219326, 0.705143  , 0.60228586, 0.51328516, 0.42852235,
        0.35133272, 0.29710743, 0.23052394, 0.20192626, 0.18311115,
        0.19059113, 0.19544008, 0.21721552, 0.24106592, 0.26663312,
        0.30088583, 0.34258035, 0.36097983, 0.39020315, 0.4178699 ,
        0.45141128, 0.4803674 , 0.5127507 , 0.526462  , 0.54133767,
        0.5580038 , 0.5818291 ]], dtype=float32)
Coordinates:
  * level    (level) int32 1 2 3 5 7 10 20 30 ... 850 875 900 925 950 975 1000
Dimensions without coordinates: batch
specific_humidity std: 4.879955668002367e-05
specific_humidity mean: <xarray.DataArray 'specific_humidity' (batch: 1, level: 37)>
array([[ 3.0353938e-11, -9.9830248e-12, -2.7995854e-11, -4.9443699e-11,
        -2.9320317e-11, -3.62828

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (3,) + inhomogeneous part.

In [None]:
# calculate weighted means & max value of each variables
ERA5 = xr.open_dataset("testdata/2022-01-01_tp.grib")
ERA5 = ERA5.stack(new_time=['time', 'step'])
ERA5 = ERA5.assign_coords(new_time=ERA5.valid_time.values)
ERA5 = ERA5.rename({'new_time': 'time'})

ERA5 = ERA5.drop_vars(['number', 'surface'])
tp = "tp"

weights = np.cos(np.deg2rad(ERA5.latitude))
weights.name = "weights"

mean_ERA = {}
mean_google = {}

max_ERA = {}
max_google = {}

for t in range(0, 14):
    mean_ERA[t]=ERA5[tp].isel(time=t).weighted(weights).mean(('latitude', 'longitude')).values * 1000
    # mean_google[t]=ERA5[tp].isel(time=t).weighted(weights).mean(('lat', 'lon')).values

    max_ERA[t]=ERA5[tp].isel(time=t).max(('latitude', 'longitude')).values * 1000
    # max_google[t]=ERA5[tp].isel(time=t).max(('lat', 'lon')).values

# for var in ERA5.data_vars:
#     if var not in ['land_sea_mask', 'geopotential_at_surface']:
#         if 'level' in ERA5[var].dims:
#             mean_ERA[var]=ERA5[var].isel(time=t).weighted(weights).mean(('lat', 'lon')).mean(dim='level').values
#             mean_google[var]=ERA5[var].isel(time=t).weighted(weights).mean(('lat', 'lon')).mean(dim='level').values
#         else:
#             mean_ERA[var]=ERA5[var].isel(time=t).weighted(weights).mean(('lat', 'lon')).values
#             mean_google[var]=ERA5[var].isel(time=t).weighted(weights).mean(('lat', 'lon')).values
        
#     else:
#         mean_ERA[var]=ERA5[var].weighted(weights).mean(('lat', 'lon')).values
#         mean_google[var]=ERA5[var].weighted(weights).mean(('lat', 'lon')).values

In [None]:
PRESSURE_LEVEL = [1000,  975,  950,  925,  900,  
                  875,  850,  825,  800,  775,  
                  750,  700, 650,  600,  550,  
                  500,  450,  400,  350,  300,  
                  250,  225,  200,  175, 150,  
                  125,  100,   70,   50,   30,   
                  20,   10,    7,    5,    3,    
                  2, 1]

In [None]:
PRESSURE_LEVEL[24
]