In [1]:
# flatten 4d (lat, lon, date, level) output data to 2d (record, level)

In [2]:
import numpy as np
import xarray as xr
import re

In [3]:
# path of input and output files
case_name = 'CO2-2018-01'
name_nc_in = 'ts_satellite.201801.nc' # name of input file
# range of days to process
first_day = '2018-01-01'
last_day_p1 = '2018-02-01'

input_directory = '/geos/u73/msadiq/GEOS-Chem/rundirs/ensemble_runs/' + case_name + '/nd51/'
output_directory = input_directory

input_file = input_directory + name_nc_in

# name of output netCDF file
output_file = output_directory + '2d_' + name_nc_in

In [4]:
ds = xr.open_dataset(input_file)
ds = ds.squeeze('time') # remove unnecessay dimension
ds = ds.drop('time')
#ds
#ds['SpeciesConc_CO2'][30,0,:,:].plot()

In [5]:
days = np.arange(first_day, last_day_p1, dtype='datetime64[D]')
ds.coords['record'] = days 
ds = ds.rename({'record':'date'}) # rename and add coordinates
ds

In [6]:
varnames = list(ds.data_vars.keys())  # Get a list of variable names. The expression is a bit cumbersome. Just copy and paste it for now.
nvar = len(varnames)  # how many elements in the list?
varnames[0:27]

['SpeciesConc_CO2oc',
 'SpeciesConc_CO2bf',
 'SpeciesConc_CO2ff',
 'SpeciesConc_CO2',
 'SpeciesConc_CO2Tag1',
 'SpeciesConc_CO2Tag2',
 'SpeciesConc_CO2Tag3',
 'SpeciesConc_CO2Tag4',
 'SpeciesConc_CO2Tag5',
 'SpeciesConc_CO2Tag6',
 'SpeciesConc_CO2Tag7',
 'SpeciesConc_CO2Tag8',
 'SpeciesConc_CO2Tag9',
 'SpeciesConc_CO2Tag10',
 'SpeciesConc_CO2Tag11',
 'SpeciesConc_CO2Tag12',
 'SpeciesConc_CO2Tag13',
 'SpeciesConc_CO2Tag14',
 'SpeciesConc_CO2Tag15',
 'SpeciesConc_CO2Tag16',
 'SpeciesConc_CO2Tag17',
 'SpeciesConc_CO2Tag18',
 'SpeciesConc_CO2Tag19',
 'SpeciesConc_CO2Tag20',
 'SpeciesConc_CO2Tag21',
 'SpeciesConc_CO2Tag22',
 'PEDGE_S_PSURF']

In [7]:
nm_lev = len(ds.coords['lev'])
nm_lon = len(ds.coords['lon'])
nm_lat = len(ds.coords['lat'])
nm_days = len(ds.coords['date'])

record = range(nm_lon * nm_lat * nm_days)

In [8]:
foo = xr.DataArray(0, coords=[record, ds.coords['lev']], dims=['record', 'levels'])
foo = foo.astype(dtype = 'float32')
ds_output = foo.to_dataset(name = 'SpeciesConc_CO2')
ds_output = ds_output.astype(dtype = 'float32')
# dataset to store following dataarrays

In [9]:
flat = ds['SpeciesConc_CO2'][:,0,:,:].to_dataframe() # flatten a variable at one level
flat.reset_index(inplace=True) # get indices to prepare output coordinates
flat

Unnamed: 0,date,lat,lon,lev,SpeciesConc_CO2
0,2018-01-01,-89.5,-180.0,0.9925,403691.09375
1,2018-01-01,-89.5,-177.5,0.9925,403693.28125
2,2018-01-01,-89.5,-175.0,0.9925,403695.84375
3,2018-01-01,-89.5,-172.5,0.9925,403697.71875
4,2018-01-01,-89.5,-170.0,0.9925,403699.93750
...,...,...,...,...,...
406219,2018-01-31,89.5,167.5,0.9925,415478.15625
406220,2018-01-31,89.5,170.0,0.9925,415486.71875
406221,2018-01-31,89.5,172.5,0.9925,415489.56250
406222,2018-01-31,89.5,175.0,0.9925,415495.28125


In [10]:
lat = xr.DataArray(0, coords=[record], dims=['record'])
lon = xr.DataArray(0, coords=[record], dims=['record'])
date = xr.DataArray(0, coords=[record], dims=['record'])
lat.values = flat['lat']
lon.values = flat['lon']
date.values = flat['date']
ds_output['lat'] = lat
ds_output['lon'] = lon
ds_output['date'] = date
ds_output

In [11]:
for ivar in varnames[0:27]:
    for ilev in range(nm_lev):
        flat = ds[ivar][:,ilev,:,:].to_dataframe() # flatten a variable at one level
        foo[:,ilev] = flat[ivar] # store output to a dataarray
    ds_output[ivar] = foo.copy() # store dataarray to dataset
    print(ivar + ' done!')

SpeciesConc_CO2oc done!
SpeciesConc_CO2bf done!
SpeciesConc_CO2ff done!
SpeciesConc_CO2 done!
SpeciesConc_CO2Tag1 done!
SpeciesConc_CO2Tag2 done!
SpeciesConc_CO2Tag3 done!
SpeciesConc_CO2Tag4 done!
SpeciesConc_CO2Tag5 done!
SpeciesConc_CO2Tag6 done!
SpeciesConc_CO2Tag7 done!
SpeciesConc_CO2Tag8 done!
SpeciesConc_CO2Tag9 done!
SpeciesConc_CO2Tag10 done!
SpeciesConc_CO2Tag11 done!
SpeciesConc_CO2Tag12 done!
SpeciesConc_CO2Tag13 done!
SpeciesConc_CO2Tag14 done!
SpeciesConc_CO2Tag15 done!
SpeciesConc_CO2Tag16 done!
SpeciesConc_CO2Tag17 done!
SpeciesConc_CO2Tag18 done!
SpeciesConc_CO2Tag19 done!
SpeciesConc_CO2Tag20 done!
SpeciesConc_CO2Tag21 done!
SpeciesConc_CO2Tag22 done!
PEDGE_S_PSURF done!


In [12]:
#ds_output['SpeciesConc_CO2Tag15'][:,0].plot()

In [13]:
#ds_output.SpeciesConc_CO2Tag10.plot()
ds_output.attrs['comment'] = '[CO2] unit: ppbv'
ds_output.attrs

{'comment': '[CO2] unit: ppbv'}

In [14]:
ds_output.to_netcdf(path=output_file)