# Notes

for the leoc96, I have to provide the left most values as the lon cordinates, instead of the center point, because if not it causes a crash 

Do I have to do this for all model data? 
If so, I might have to hard fix the crash that occurs when plotting using basemap. 

In [None]:
import numpy as np 
import xarray as xr 
import matplotlib.pyplot as plt 
import cartopy
import os
import pandas as pd
import cftime
import datetime as dt
import tqdm
import netCDF4 as nc

import os 
import glob

In [2]:
# reading in LEOC96 data
in_folder = '/mnt/drive6/LEOC96/atmos/ts/6hr/1yr'
sample_file =  '/mnt/drive6/LEOC96/atmos/ts/6hr/1yr/atmos.2008010100-2008123123.slp.nc'

In [3]:
ds = xr.open_dataset(sample_file)
lat_dim = ds.lat.size
lon_dim = ds.lon.size
dims = ds.slp.shape[1:]
print(dims)

(180, 288)


In [5]:
var = 'slp'
year_range = [2008, 2012]

time_start_year = 1975

# must include the end year, so have to add one 
num_years = year_range[1] - year_range[0] + 1

# creating the output arrays
out_slp = np.empty((1460*num_years,dims[0],dims[1]))
out_time = np.zeros((1460*num_years,))
out_time_bnds = np.zeros((1460*num_years, 2))

# loop through all the years
for i_year, year in enumerate(range(year_range[0], year_range[1]+1)):
    
    # input slp file
    in_file = os.path.join(in_folder, f'atmos.{year}010100-{year}123123.{var}.nc')
   
    # reading in the input data 
    ds = xr.open_dataset(in_file)
    in_lat = ds.variables['lat'].values
    in_lon = ds.variables['lon'].values
    in_slp = ds.variables['slp']
    ds.close()
    
    # time needed for the output is number of days since start of year 1
    # time from 0 to 1460, in terms of days
    # + time from the start of the time year in model data
    # + i_year * 1460, for every year loop i have to add 1460 to the time
    in_time = np.arange(0, 365, .25) + (year_range[0] - time_start_year)*365 + i_year*365
    
    # selecting only the first 1460 to match the model output which is of a 365 year calendar
    in_slp = in_slp.isel(time=range(1460)).values
    
    # saving the values in the output variables
    out_time[i_year*1460:(i_year+1)*1460]  = in_time
    out_slp[i_year*1460:(i_year+1)*1460, :, :] = in_slp
    
    print(f'Completed {year}.')
    
# getting the output variables 
# out_lat = in_lat
# out_lon = in_lon 

lon = np.copy(in_lon)
dlon = lon[1] - lon[0]
out_lon = np.round(lon - dlon/2., decimals=2)

lat = np.copy(in_lat)
dlat = lat[1] - lat[0]
out_lat = np.round(lat - dlat/2., decimals=2)

# getting the output time bounds
out_time_bnds[:, 1] = out_time
out_time_bnds[0, 0] = out_time[0]
out_time_bnds[1:, 0] = out_time[:-1]


Completed 2008.
Completed 2009.
Completed 2010.
Completed 2011.
Completed 2012.


## Creating the date variable provided in model data

In [6]:
# time_arr = [cftime.DatetimeNoLeap(time_start_year, 1, 1) + dt.timedelta(days=i) for i in tqdm.notebook.tqdm(out_time, total=len(out_time))]
out_date = np.empty(out_time.shape, dtype=int)
for i, i_time in tqdm.tqdm_notebook(enumerate(out_time), total=len(out_time)): 
    tmp = cftime.DatetimeNoLeap(time_start_year, 1, 1) + dt.timedelta(days=i_time)
    out_date[i] = int('%04d%02d%02d'%(tmp.year, tmp.month, tmp.day))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=7300.0), HTML(value='')))




# Creating the model sample data for the SLP values

In [7]:
# write the output as netcdf file 

out_file = '/localdrive/drive10/jj/mdtf/inputdata/model/QBOi.EXP1.AMIP.001/6hr/QBOi.EXP1.AMIP.001.SLP.6hr.nc'

# # Format of the PRECT variable for 3hr is as follows: 
# 1. time - noleap, days since 1975-01-01 00:00:00
# 2. date - current date
# 3. lat - latitude
# 4. lon - longitude
# 5. time_bnds - time interval endpoints
# 6. SLP - mba, long_name, cell_methods: "time:mean"

out_ds = xr.Dataset({
        'time_bnds': (('time', 'nbnd'), out_time_bnds), 
        'date': (('time'), out_date),
        'SLP': (('time', 'lat', 'lon'), out_slp), 
    }, 
    coords={
        'time': out_time, 
        'lat': out_lat, 
        'lon': out_lon,
        'nbnd': [1, 2]
    },
    )

# lon attribs
out_ds.lon.attrs['long_name'] = 'longitude'
out_ds.lon.attrs['units'] = 'degrees_east'

# lat attribs
out_ds.lat.attrs['long_name'] = 'latitude'
out_ds.lat.attrs['units'] = 'degrees_north'

# time attribs
out_ds.time.attrs['long_name'] = 'time'
out_ds.time.attrs['units'] = 'days since 1975-01-01 00:00:00'
out_ds.time.attrs['calendar'] = 'noleap'
out_ds.time.attrs['bounds'] = 'time_bnds'

# time_bnds attribs
out_ds.time_bnds.attrs['long_name'] = 'time interval endpoints'

# date attribs
out_ds.date.attrs['long_name'] = 'current date (YYYYMMDD)'

# slp attribs
out_ds.SLP.attrs['long_name'] = 'Sea Level Pressure'
out_ds.SLP.attrs['units'] = 'mb'


In [8]:
# actually writing the output file

out_ds.to_netcdf(out_file)