In [43]:
# flatten 4d (lat, lon, date, level) output data to 2d (record, level)

In [44]:
import numpy as np
import xarray as xr
import re

In [45]:
# path of input and output files
case_name = 'CO2-2018'
name_nc_in = 'ts_satellite.201812.nc' # name of input file
# range of days to process
first_day = '2018-12-01'
last_day_p1 = '2019-01-01'

input_directory = '/geos/u73/msadiq/GEOS-Chem/rundirs/ensemble_runs/' + case_name + '/nd51/'
output_directory = input_directory

input_file = input_directory + name_nc_in

# name of output netCDF file
output_file = output_directory + '2d_' + name_nc_in

In [46]:
ds = xr.open_dataset(input_file)
ds = ds.squeeze('time') # remove unnecessay dimension
ds = ds.drop('time')
#ds
#ds['SpeciesConc_CO2'][30,0,:,:].plot()

In [47]:
days = np.arange(first_day, last_day_p1, dtype='datetime64[D]')
ds.coords['record'] = days 
ds = ds.rename({'record':'date'}) # rename and add coordinates
ds

In [48]:
varnames = list(ds.data_vars.keys())  # Get a list of variable names. The expression is a bit cumbersome. Just copy and paste it for now.
nvar = len(varnames)  # how many elements in the list?
varnames[0:27]

['SpeciesConc_CO2', 'PEDGE_S_PSURF']

In [49]:
nm_lev = len(ds.coords['lev'])
nm_lon = len(ds.coords['lon'])
nm_lat = len(ds.coords['lat'])
nm_days = len(ds.coords['date'])

record = range(nm_lon * nm_lat * nm_days)

In [50]:
foo = xr.DataArray(0, coords=[record, ds.coords['lev']], dims=['record', 'levels'])
foo = foo.astype(dtype = 'float32')
ds_output = foo.to_dataset(name = 'SpeciesConc_CO2')
ds_output = ds_output.astype(dtype = 'float32')
# dataset to store following dataarrays

In [51]:
flat = ds['SpeciesConc_CO2'][:,0,:,:].to_dataframe() # flatten a variable at one level
flat.reset_index(inplace=True) # get indices to prepare output coordinates
flat

Unnamed: 0,date,lat,lon,lev,SpeciesConc_CO2
0,2018-12-01,-89.5,-180.0,0.9925,405565.03125
1,2018-12-01,-89.5,-177.5,0.9925,405565.71875
2,2018-12-01,-89.5,-175.0,0.9925,405566.43750
3,2018-12-01,-89.5,-172.5,0.9925,405567.21875
4,2018-12-01,-89.5,-170.0,0.9925,405567.96875
...,...,...,...,...,...
406219,2018-12-31,89.5,167.5,0.9925,422742.84375
406220,2018-12-31,89.5,170.0,0.9925,422750.90625
406221,2018-12-31,89.5,172.5,0.9925,422753.50000
406222,2018-12-31,89.5,175.0,0.9925,422758.78125


In [52]:
lat = xr.DataArray(0, coords=[record], dims=['record'])
lon = xr.DataArray(0, coords=[record], dims=['record'])
date = xr.DataArray(0, coords=[record], dims=['record'])
lat.values = flat['lat']
lon.values = flat['lon']
date.values = flat['date']
ds_output['lat'] = lat
ds_output['lon'] = lon
ds_output['date'] = date
ds_output

In [53]:
for ivar in varnames[0:27]:
    for ilev in range(nm_lev):
        flat = ds[ivar][:,ilev,:,:].to_dataframe() # flatten a variable at one level
        foo[:,ilev] = flat[ivar] # store output to a dataarray
    ds_output[ivar] = foo.copy() # store dataarray to dataset
    print(ivar + ' done!')

SpeciesConc_CO2 done!
PEDGE_S_PSURF done!


In [54]:
#ds_output['SpeciesConc_CO2Tag15'][:,0].plot()

In [55]:
#ds_output.SpeciesConc_CO2Tag10.plot()
ds_output.attrs['comment'] = '[CO2] unit: ppbv'
ds_output.attrs

{'comment': '[CO2] unit: ppbv'}

In [56]:
ds_output.to_netcdf(path=output_file)