# Create SLP, PRW(TWCV), W500

set tp_type as 'MTPR' or 'TP' to save the appropriate inputdata folder

In [166]:
import numpy as np 
import xarray as xr 
import matplotlib.pyplot as plt 
import cartopy
import os
import pandas as pd
import cftime
import datetime as dt
from tqdm import tqdm, notebook
import netCDF4 as nc
import glob

import subprocess
import cftime

from netCDF4 import Dataset

In [147]:
calendar_type = 'proleptic_gregorian'
# calendar_type = 'standard'

In [148]:
tp_type = 'TP'

# in_folder = '/localdrive/drive6/era5/data/six_hrly/data_1deg'
# deg_type = 'DEG10'

in_folder = '/localdrive/drive6/era5/data/six_hrly/data_1_5deg'
deg_type = 'DEG15'

# Manually converting all the expver files to regular files

In [172]:
file = '/localdrive/drive6/era5/data/six_hrly/data_1_5deg/msl/msl_2020_6hrly_expver.nc'
out_file = '/localdrive/drive6/era5/data/six_hrly/data_1_5deg/msl/msl_2020_6hrly.nc'
ds = xr.open_dataset(file)

In [173]:
ver1 = ds.sel(expver=1)
ver5 = ds.sel(expver=5)
out_var = np.zeros(ver1.msl.shape)*np.nan
for i in notebook.tqdm(range(len(ds.time)), total=len(ds.time)):
    ver1 = ds.sel(expver=1).isel(time=i)
    ver5 = ds.sel(expver=5).isel(time=i)
    if (np.all(np.isnan(ver1.msl.values)) & np.all(np.isnan(ver5.msl.values))):
        print('hold on both are nans')
        break
    elif (np.all(np.isnan(ver1.msl.values))):
        out_var[i, :, :] = ver5.msl.values
    elif (np.all(np.isnan(ver5.msl.values))):
        out_var[i, :, :] = ver1.msl.values
    else:
        print('hold on something neither is nan')
        break

HBox(children=(FloatProgress(value=0.0, max=1464.0), HTML(value='')))




In [174]:
ds = ds.sel(expver=1)
ds = ds.drop('expver')

In [175]:
ds['msl'] = (('time', 'latitude', 'longitude'), out_var)
ds.to_netcdf(out_file)

# Creating the VAR file that contains all the necessary years

In [176]:
# start year to end year+1
year_range = [1950, 2021]
num_years = year_range[1] - year_range[0]

# var map, variable name in code to the input variable name
in_var_fn_map = {'slp': 'msl', 'w500': 'w500', 'tcwv': 'tcwv', 'uv10': 'uv10'}
in_var_map = {'slp': 'msl', 'w500': 'w', 'tcwv': 'tcwv', 'uv10': 'uv10'}
in_var_scale = {'slp': 1.0/100., 'w500': 1.0, 'tcwv': 1.0, 'uv10': 1.0}
in_var_offset = {'slp': 0.0, 'w500': 0.0, 'tcwv': 0.0, 'uv10': 0.0}

# var map, variable name in code to the output variable name
out_var_map = {'slp': 'SLP', 'w500': 'W500', 'tcwv': 'PRW', 'uv10': 'UV10'}
out_var_long_name = {'slp': 'Sea Level Pressure', \
                     'w500': 'Vertical Velocity at 500hPa', 'tcwv': 'Total Column Water Vapor', \
                     'uv10': 'Wind Speeds'}
out_var_units = {'slp': 'mb', 'w500': 'Pa/s', 'tcwv': 'mm/hr', 'uv10': 'm/s'}

In [177]:
# getting the time dimension length
time_dim = 0
var = 'slp'
for i_year, year in enumerate(range(year_range[0], year_range[1])):
    in_file = os.path.join(in_folder, f"{in_var_fn_map[var]}/{in_var_fn_map[var]}_{year}_6hrly.nc")
    ds = xr.open_dataset(in_file)
    time_dim += len(ds.time)
    lat_dim = len(ds.latitude)
    lon_dim = len(ds.longitude)
    ds.close()
    
print(f"Dimension: (time x lat x lon) --> {time_dim} x {lat_dim} x {lon_dim}")

Dimension: (time x lat x lon) --> 103732 x 121 x 240


In [178]:
print(out_var_map)
print(time_dim)

{'slp': 'SLP', 'w500': 'W500', 'tcwv': 'PRW', 'uv10': 'UV10'}
103732


In [179]:
# Running the code for the variable list
# var_list = ['slp', 'w500', 'tcwv']
var_list = ['slp']
tp_type = 'ALL'

for var in var_list:
    
    # initiallizing the variable output sizes
    out_var = np.empty((time_dim, lat_dim, lon_dim))
    out_time = np.zeros((time_dim,))
    out_time_bnds = np.zeros((time_dim, 2))
    out_date = np.empty(out_time.shape, dtype=int)
    
    tmp_time_dim = 0
   
    # looping through all the years to create the single file that has all the necessary years
    for i_year, year in notebook.tqdm(enumerate(range(year_range[0], year_range[1])), total=num_years, desc=f"{var.upper()}"): 
        in_file = os.path.join(in_folder, f"{in_var_fn_map[var]}/{in_var_fn_map[var]}_{year}_6hrly.nc")
        if (not os.path.exists(in_file)):
            raise Exception('No file found.') 
        ds = xr.open_dataset(in_file)
        in_lat = ds.variables['latitude'].values
        in_lon = ds.variables['longitude'].values
        in_var = ds.variables[in_var_map[var]].values
        curr_time_dim = len(ds.time)
        ds.close()
        
        in_time = (np.arange(0, curr_time_dim, 1) + tmp_time_dim)*6.
        tmp_date = [dt.datetime(year_range[0], 1, 1) + dt.timedelta(hours=i_time) for i_time in in_time]
        in_date = [int(f"{i_date.year:04d}{i_date.month:02d}{i_date.day:02d}") for i_date in tmp_date]
        
        # selecting only the first 1460 to match the model output which is of a 365 year calendar
        out_date[tmp_time_dim:tmp_time_dim+curr_time_dim] = in_date
        out_time[tmp_time_dim:tmp_time_dim+curr_time_dim] = in_time
        out_var[tmp_time_dim:tmp_time_dim+curr_time_dim, :, :] = (in_var*in_var_scale[var] + in_var_offset[var])
        tmp_time_dim += curr_time_dim
        
    # getting the output variables
    out_lat = in_lat
    out_lon = in_lon

    # getting the output time bounds
    out_time_bnds[:, 1] = out_time
    out_time_bnds[0, 0] = out_time[0]
    out_time_bnds[1:, 0] = out_time[:-1]
    
    # out file 
    out_file = f"/localdrive/drive10/jj/mdtf/inputdata/model/ERA5.{tp_type}.{deg_type}.001/6hr/ERA5.{tp_type}.{deg_type}.001.{out_var_map[var]}.6hr.nc"
    out_ds = xr.Dataset({
        'time_bnds': (('time', 'nbnd'), out_time_bnds), 
        'date': (('time'), out_date),
        out_var_map[var]: (('time', 'lat', 'lon'), out_var), 
    }, 
    coords={
        'time': out_time, 
        'lat': out_lat, 
        'lon': out_lon,
        'nbnd': [1, 2]
    },
    )

    # lon attribs
    out_ds.lon.attrs['long_name'] = 'longitude'
    out_ds.lon.attrs['units'] = 'degrees_east'

    # lat attribs
    out_ds.lat.attrs['long_name'] = 'latitude'
    out_ds.lat.attrs['units'] = 'degrees_north'

    # time attribs
    out_ds.time.attrs['long_name'] = 'time'
    out_ds.time.attrs['units'] = f"hours since {year_range[0]}-01-01 00:00:00"
    out_ds.time.attrs['calendar'] = calendar_type
    out_ds.time.attrs['bounds'] = 'time_bnds'

    # time_bnds attribs
    out_ds.time_bnds.attrs['long_name'] = 'time interval endpoints'

    # date attribs
    out_ds.date.attrs['long_name'] = 'current date (YYYYMMDD)'

    # slp attribs
    out_ds[out_var_map[var]].attrs['long_name'] = out_var_long_name[var]
    out_ds[out_var_map[var]].attrs['units'] = out_var_units[var]

    out_ds.to_netcdf(out_file)
    out_ds.close()

HBox(children=(FloatProgress(value=0.0, description='SLP', max=71.0, style=ProgressStyle(description_width='in…




In [180]:
print(out_date[-1])

20201231


In [None]:
file = '/localdrive/drive6/era5/data/six_hrly/data_1_5deg/msl/msl_2020_6hrly.nc'
ds = xr.open_dataset(file)

In [None]:
ver1 = ds.sel(expver=1)
ver5 = ds.sel(expver=5)

In [None]:
# ver5.msl.isel(time=1410).plot()
# ver1.msl.isel(time=1410).plot()

In [None]:
x1 = ver1.msl.isel(time=10).values
x2 = ver1.msl.isel(time=100).values
# x2 = ver5.msl.isel(time=0).values

plt.figure()
plt.pcolormesh(x1-x2, cmap='bwr'); 
plt.colorbar()
plt.show()