In [2]:
import numpy as np 
import xarray as xr 
import matplotlib.pyplot as plt 
import cartopy
import os
import pandas as pd
import cftime
import datetime as dt
import tqdm
import netCDF4 as nc
import glob

# read in ERA-I all year SLP data (6-hr)
# all years of SLP should be given in one file...
in_folder = '/localdrive/drive6/erai/converts/'

# Creating the W500 file that contains all the necessary years

In [3]:
time_start_year = 1975
year_range = [1979, 2014]

num_years = year_range[1] - year_range[0]
out_pr = np.empty((1460*num_years,121,240))
out_time = np.zeros((1460*num_years,))
out_time_bnds = np.zeros((1460*num_years, 2))

for i_year, year in tqdm.tqdm_notebook(enumerate(range(year_range[0], year_range[1])), total=num_years): 
    in_file = os.path.join(in_folder, 'w0500.%d.nc'%(year))
    
    ds = xr.open_dataset(in_file)
    in_lat = ds.variables['lat'].values
    in_lon = ds.variables['lon'].values
    in_pr = ds.variables['w0500']
    ds.close()
    
    # time needed for the output is number of days since start of year 1
    # time from 0 to 1460, in terms of days
    # + time from the start of the time year in model data
    # + i_year * 1460, for every year loop i have to add 1460 to the time
    in_time = np.arange(0, 365, .25) + (year_range[0] - time_start_year)*365 + i_year*365
    
    # selecting only the first 1460 to match the model output which is of a 365 year calendar
    in_pr = in_pr.isel(time=range(1460)).values
    
    out_time[i_year*1460:(i_year+1)*1460]  = in_time
    out_pr[i_year*1460:(i_year+1)*1460, :, :] = in_pr[:,0,:,:]

#     if (i_year == 2): 
#         break

# getting the output variables
out_lat = in_lat
out_lon = in_lon

# getting the output time bounds
out_time_bnds[:, 1] = out_time
out_time_bnds[0, 0] = out_time[0]
out_time_bnds[1:, 0] = out_time[:-1]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if __name__ == '__main__':


HBox(children=(FloatProgress(value=0.0, max=35.0), HTML(value='')))




## Creating the date variable provided in model data

In [4]:
# time_arr = [cftime.DatetimeNoLeap(time_start_year, 1, 1) + dt.timedelta(days=i) for i in tqdm.notebook.tqdm(out_time, total=len(out_time))]
out_date = np.empty(out_time.shape, dtype=int)
for i, i_time in tqdm.notebook.tqdm(enumerate(out_time), total=len(out_time)): 
    tmp = cftime.DatetimeNoLeap(time_start_year, 1, 1) + dt.timedelta(days=i_time)
    out_date[i] = int('%04d%02d%02d'%(tmp.year, tmp.month, tmp.day))

HBox(children=(FloatProgress(value=0.0, max=51100.0), HTML(value='')))




# Creating the model sample data for the SLP values

In [7]:
# write the output as netcdf file 

# out_file = '/localdrive/drive10/jj/mdtf/inputdata/model/QBOi.EXP1.AMIP.001/6hr/QBOi.EXP1.AMIP.001.W0500.6hr.nc'
out_file = '/localdrive/drive10/jj/mdtf/inputdata/model/ERAI.EXP1.TEST.001/6hr/ERAI.EXP1.TEST.001.W500.6hr.nc'

# # Format of the PRECT variable for 3hr is as follows: 
# 1. time - noleap, days since 1975-01-01 00:00:00
# 2. date - current date
# 3. lat - latitude
# 4. lon - longitude
# 5. time_bnds - time interval endpoints
# 6. SLP - mba, long_name, cell_methods: "time:mean"

out_ds = xr.Dataset({
        'time_bnds': (('time', 'nbnd'), out_time_bnds), 
        'date': (('time'), out_date),
        'W500': (('time', 'lat', 'lon'), out_pr), 
    }, 
    coords={
        'time': out_time, 
        'lat': out_lat, 
        'lon': out_lon,
        'nbnd': [1, 2]
    },
    )

# lon attribs
out_ds.lon.attrs['long_name'] = 'longitude'
out_ds.lon.attrs['units'] = 'degrees_east'

# lat attribs
out_ds.lat.attrs['long_name'] = 'latitude'
out_ds.lat.attrs['units'] = 'degrees_north'

# time attribs
out_ds.time.attrs['long_name'] = 'time'
out_ds.time.attrs['units'] = 'days since 1975-01-01 00:00:00'
out_ds.time.attrs['calendar'] = 'noleap'
out_ds.time.attrs['bounds'] = 'time_bnds'

# time_bnds attribs
out_ds.time_bnds.attrs['long_name'] = 'time interval endpoints'

# date attribs
out_ds.date.attrs['long_name'] = 'current date (YYYYMMDD)'

# slp attribs
out_ds.W500.attrs['long_name'] = 'Vertical Velocity at 500hPa'
out_ds.W500.attrs['units'] = 'Pa/s'


In [8]:
# actually writing the output file
out_ds.to_netcdf(out_file)

# Testing the input model 

In [190]:
fn = '/localdrive/drive10/jj/mdtf/inputdata/model/QBOi.EXP1.AMIP.001/3hr/QBOi.EXP1.AMIP.001.PRECT.3hr.nc'
ncid = nc.Dataset(fn)
lat = ncid['lat'][:]
lon = ncid['lon'][:]
time = ncid['time'][:]
time_bnds = ncid['time_bnds'][:]

In [221]:
print(time_bnds[1, :])
print('{:.4f}'.format((time[-1])))

[0.    0.125]
2557.3750


In [63]:
ds = xr.open_dataset(fn)
print(ds.variables['time'])
time = ds.variables['time'].values
ind = 365*8 + (31+27)*8
print(time[ind:ind+10])

<xarray.IndexVariable 'time' (time: 20460)>
array([cftime.DatetimeNoLeap(1975, 1, 1, 0, 0, 0, 0, 1, 1),
       cftime.DatetimeNoLeap(1975, 1, 1, 3, 0, 0, 0, 1, 1),
       cftime.DatetimeNoLeap(1975, 1, 1, 6, 0, 0, 0, 1, 1), ...,
       cftime.DatetimeNoLeap(1982, 1, 3, 3, 0, 0, 0, 3, 3),
       cftime.DatetimeNoLeap(1982, 1, 3, 6, 0, 0, 0, 3, 3),
       cftime.DatetimeNoLeap(1982, 1, 3, 9, 0, 0, 0, 3, 3)], dtype=object)
Attributes:
    long_name:  time
    bounds:     time_bnds
[cftime.DatetimeNoLeap(1976, 2, 28, 0, 0, 0, 0, 4, 59)
 cftime.DatetimeNoLeap(1976, 2, 28, 3, 0, 0, 0, 4, 59)
 cftime.DatetimeNoLeap(1976, 2, 28, 6, 0, 0, 0, 4, 59)
 cftime.DatetimeNoLeap(1976, 2, 28, 9, 0, 0, 0, 4, 59)
 cftime.DatetimeNoLeap(1976, 2, 28, 12, 0, 0, 0, 4, 59)
 cftime.DatetimeNoLeap(1976, 2, 28, 15, 0, 0, 0, 4, 59)
 cftime.DatetimeNoLeap(1976, 2, 28, 18, 0, 0, 0, 4, 59)
 cftime.DatetimeNoLeap(1976, 2, 28, 21, 0, 0, 0, 4, 59)
 cftime.DatetimeNoLeap(1976, 3, 1, 0, 0, 0, 0, 5, 60)
 cftime.DatetimeNoL

In [70]:
import cftime
import datetime as dt

cftime.DatetimeNoLeap(1980, 3, 1, 0, 0, 0, 0, 2, 60)