## Example: convert DHARMA LES output to DEPHY format
### Code to read DHARMA LES output files and write to DEPHY format (NetCDF)
### Contributed by Ann Fridlind from NASA/GISS

### Import libraries

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import os
import netCDF4
import datetime as dt
from netCDF4 import Dataset

### Specify directory locations

In [2]:
# specify Github scratch directory where processed model output will be committed
my_gitdir = '../../output_les/dharma/sandbox/Lx25km_dx100m/'

# specify local source directories (with subdirectories for spin up over ice and restart over water)
my_rundir = '/user-data-home/dharma/sandbox/Lx25km_dx100m/'
my_outdirs = sorted([f for f in os.listdir(my_rundir) if not f.startswith('.')], key=str.lower)
print(my_outdirs)

['hours0-2', 'hours2-end']


### Read DHARMA input parameter and domain-mean output files

In [3]:
# read in DHARMA parameter settings from second run
input_filename = my_rundir + my_outdirs[1] + '/dharma.cdf'
dharma_params = xr.open_dataset(input_filename)
dharma_params

ERROR 1: PROJ: proj_create_from_database: Open of /opt/conda/share/proj failed


In [4]:
# read in and concatenate DHARMA domain-mean profiles
for index, elem in enumerate(my_outdirs):
    input_filename = my_rundir + elem + '/dharma.soundings.cdf'
    print(input_filename)
    if index==0:
        dharma_snds = xr.open_dataset(input_filename)
    else:
        dharma_snds_too = xr.open_dataset(input_filename)
        dharma_snds = xr.concat([dharma_snds,dharma_snds_too],dim='time')
dharma_snds = dharma_snds.drop_duplicates('time',keep='first')
dharma_snds

/user-data-home/dharma/sandbox/Lx25km_dx100m/hours0-2/dharma.soundings.cdf
/user-data-home/dharma/sandbox/Lx25km_dx100m/hours2-end/dharma.soundings.cdf


In [5]:
# calculate some additional variables requested, and add them to the xarray
dummy_snd = dharma_snds['qc']*0.
nz = dharma_params['geometry'].nz
dz = dharma_snds['zw'].data[1:nz+1]-dharma_snds['zw'].data[0:nz]
Fnsw = dharma_snds['Fsw_up'].data-dharma_snds['Fsw_dn'].data
Fnlw = dharma_snds['Flw_up'].data-dharma_snds['Flw_dn'].data
dharma_snds = dharma_snds.assign(theta = dummy_snd + (dharma_snds['th'].data+1)*dharma_snds.theta_00)
dharma_snds = dharma_snds.assign(pi = dummy_snd + dharma_snds['T'].data/dharma_snds['theta'].data)
dharma_snds = dharma_snds.assign(pressure = dummy_snd + np.power(dharma_snds['pi'].data,7./2)*np.power(10.,5))
dharma_snds = dharma_snds.assign(SH = dummy_snd + dharma_snds['qv'].data/(1+dharma_snds['qt'].data))
dharma_snds = dharma_snds.assign(TH = dummy_snd + dharma_snds['qt'].data/(1+dharma_snds['qt'].data))
dharma_snds = dharma_snds.assign(SC = dummy_snd + dharma_snds['qc'].data/(1+dharma_snds['qt'].data))
dharma_snds = dharma_snds.assign(SR = dummy_snd + dharma_snds['qr'].data/(1+dharma_snds['qt'].data))
dharma_snds = dharma_snds.assign(stke = dummy_snd + dharma_snds['tkeavg'].data+dharma_snds['tke_smag'].data)
dharma_snds = dharma_snds.assign(w_zt = dummy_snd + 0.5*(dharma_snds['w'].data[:,0:nz]+dharma_snds['w'].data[:,1:nz+1]))
dharma_snds = dharma_snds.assign(w2_zt = dummy_snd + 0.5*(dharma_snds['w2'].data[:,0:nz]+dharma_snds['w2'].data[:,1:nz+1]))
dharma_snds = dharma_snds.assign(HRlw = dummy_snd + 0.5*(Fnlw[:,0:nz]+Fnlw[:,1:nz+1])
                                 /dz/dharma_snds['rhobar'].data)
dharma_snds = dharma_snds.assign(HRsw = dummy_snd + 0.5*(Fnsw[:,0:nz]+Fnsw[:,1:nz+1])
                                 /dz/dharma_snds['rhobar'].data)
dharma_snds

In [6]:
# read in and concatenate DHARMA domain-mean scalars
for index, elem in enumerate(my_outdirs):
    input_filename = my_rundir + elem + '/dharma.scalars.cdf'
    print(input_filename)
    if index==0:
        dharma_scas = xr.open_dataset(input_filename)
    else:
        dharma_scas_too = xr.open_dataset(input_filename)
        dharma_scas = xr.concat([dharma_scas,dharma_scas_too],dim='time')
dharma_scas = dharma_scas.drop_duplicates('time',keep='first')
dharma_scas

/user-data-home/dharma/sandbox/Lx25km_dx100m/hours0-2/dharma.scalars.cdf
/user-data-home/dharma/sandbox/Lx25km_dx100m/hours2-end/dharma.scalars.cdf


In [7]:
# calculate some additional variables requested
dummy_sca = dharma_scas['lwp']*0.
dharma_scas = dharma_scas.assign(Psurf = dummy_sca + dharma_params['sounding'].Psurf*100.)
dharma_scas = dharma_scas.assign(SWupTOA = dummy_sca + dharma_scas['alb_toa'].data*dharma_scas['u0'].data*1367.)
dharma_scas = dharma_scas.assign(SWdnTOA = dummy_sca + dharma_scas['u0'].data*1367.)
dharma_scas

### Read list of requested domain-mean output variables

In [8]:
# read list of requested variables
vars_mean_list = pd.read_excel('https://docs.google.com/spreadsheets/d/1Vl8jYGviet7EtXZuQiitrx4NSkV1x27aJAhxxjBb9zI/export?gid=0&format=xlsx',
                              sheet_name='Mean')
vars_mean_list = vars_mean_list.drop(columns='comment')

# add columns to contain model output name and units conversion factors
vars_mean_list = vars_mean_list.assign(model_name='missing data',conv_factor=1.0)
vars_mean_list

Unnamed: 0,standard_name,variable_id,units,dimensions,model_name,conv_factor
0,time,time,s,self,missing data,1.0
1,height,zf,m,self,missing data,1.0
2,air_pressure,pa,Pa,"time, height",missing data,1.0
3,air_volumic_mass,rho,kg m-3,"time, height",missing data,1.0
4,air_temperature,ta,K,"time, height",missing data,1.0
5,specific_humidity,qv,1,"time, height",missing data,1.0
6,total_water_content,qt,1,"time, height",missing data,1.0
7,relative_humidity,hur,1,"time, height",missing data,1.0
8,eastward_wind,ua,m s-1,"time, height",missing data,1.0
9,northward_wind,va,m s-1,"time, height",missing data,1.0


### Match DHARMA scalar variables to requested outputs

In [9]:
# identify requested variables with only time dimension
vars_mean_scas = vars_mean_list[vars_mean_list['dimensions']=='time']

# match to DHARMA variable names and specify conversion factors
for index in vars_mean_scas.index:
    standard_name = vars_mean_list.standard_name.iat[index]
    if standard_name=='surface_upward_sensible_heat_flux': 
        vars_mean_list.model_name.iat[index] = 'avg_T_flx'
    if standard_name=='surface_upward_latent_heat_flux': 
        vars_mean_list.model_name.iat[index] = 'avg_qv_flx'
    if standard_name=='surface_pressure': 
        vars_mean_list.model_name.iat[index] = 'Psurf'
    if standard_name=='surface_temperature': 
        vars_mean_list.model_name.iat[index] = 'avg_T_sfc'
    if standard_name=='inversion_height': 
        vars_mean_list.model_name.iat[index] = 'z_inv'
    if standard_name=='cloud_area_fraction': 
        vars_mean_list.model_name.iat[index] = 'colf_opd'
    if standard_name=='precipitation_flux_at_surface': 
        vars_mean_list.model_name.iat[index] = 'avg_precip'
        vars_mean_list.conv_factor.iat[index] = 1/3600.
    if standard_name=='atmosphere_mass_content_of_cloud_water': 
        vars_mean_list.model_name.iat[index] = 'cwp'
        vars_mean_list.conv_factor.iat[index] = 1/1000.
    if standard_name=='atmosphere_mass_content_of_liquid_water': 
        vars_mean_list.model_name.iat[index] = 'lwp'
        vars_mean_list.conv_factor.iat[index] = 1/1000.
    if standard_name=='atmosphere_mass_content_of_rain_water': 
        vars_mean_list.model_name.iat[index] = 'rwp'
        vars_mean_list.conv_factor.iat[index] = 1/1000.
    if standard_name=='toa_incoming_shortwave_flux': 
        vars_mean_list.model_name.iat[index] = 'SWdnTOA'
    if standard_name=='toa_outgoing_shortwave_flux': 
        vars_mean_list.model_name.iat[index] = 'SWupTOA'
    if standard_name=='toa_outgoing_longwave_flux': 
        vars_mean_list.model_name.iat[index] = 'LWupTOA'
    print(standard_name,' <--- ',vars_mean_list.model_name.iat[index],' * ',vars_mean_list.conv_factor.iat[index])

surface_upward_sensible_heat_flux  <---  avg_T_flx  *  1.0
surface_upward_latent_heat_flux  <---  avg_qv_flx  *  1.0
obukhov_length  <---  missing data  *  1.0
surface_friction_velocity  <---  missing data  *  1.0
inversion_height  <---  z_inv  *  1.0
surface_pressure  <---  Psurf  *  1.0
surface_temperature  <---  avg_T_sfc  *  1.0
cloud_area_fraction  <---  colf_opd  *  1.0
precipitation_flux_at_surface_in_liquid phase  <---  missing data  *  1.0
precipitation_flux_at_surface_in_ice_phase  <---  missing data  *  1.0
atmosphere_mass_content_of_cloud_water  <---  cwp  *  0.001
atmosphere_mass_content_of_rain_water  <---  rwp  *  0.001
atmosphere_mass_content_of_cloud_ice_water  <---  missing data  *  1.0
atmosphere_mass_content_of_snow_water  <---  missing data  *  1.0
atmosphere_mass_content_of_graupel_water  <---  missing data  *  1.0
toa_incoming_shortwave_flux  <---  SWdnTOA  *  1.0
toa_outgoing_shortwave_flux  <---  SWupTOA  *  1.0
toa_outgoing_longwave_flux  <---  LWupTOA  *  1.0

### Match DHARMA profile variables to requested outputs

In [10]:
# identify requested variables with time and vertical dimensions
vars_mean_snds = vars_mean_list[vars_mean_list['dimensions']=='time, height']

# match to DHARMA variable names and specify conversion factors
for index in vars_mean_snds.index:
    standard_name = vars_mean_list.standard_name.iat[index]
    if standard_name=='air_pressure': 
        vars_mean_list.model_name.iat[index] = 'pressure'
    if standard_name=='rho': 
        vars_mean_list.model_name.iat[index] = 'rhobar'
    if standard_name=='air_temperature': 
        vars_mean_list.model_name.iat[index] = 'T'
    if standard_name=='specific_humidity': 
        vars_mean_list.model_name.iat[index] = 'SH'
    if standard_name=='total_water_content': 
        vars_mean_list.model_name.iat[index] = 'TH'
    if standard_name=='relative_humidity': 
        vars_mean_list.model_name.iat[index] = 'RH'
        vars_mean_list.conv_factor.iat[index] = 1/100.
    if standard_name=='eastward_wind': 
        vars_mean_list.model_name.iat[index] = 'u'
    if standard_name=='northward_wind': 
        vars_mean_list.model_name.iat[index] = 'v'
    if standard_name=='upward_air_velocity': 
        vars_mean_list.model_name.iat[index] = 'w_zt'
    if standard_name=='air_potential_temperature': 
        vars_mean_list.model_name.iat[index] = 'theta'
    if standard_name=='air_liquid_potential_temperature': 
        vars_mean_list.model_name.iat[index] = 'thl'
    if standard_name=='specific_turbulent_kinetic_energy': 
        vars_mean_list.model_name.iat[index] = 'stke'
    if standard_name=='variance_of_upward_air_velocity': 
        vars_mean_list.model_name.iat[index] = 'w2_zt'
    if standard_name=='mass_fraction_of_cloud_liquid_water_in_air': 
        vars_mean_list.model_name.iat[index] = 'SC'
    if standard_name=='mass_fraction_of_rain_water_in_air': 
        vars_mean_list.model_name.iat[index] = 'SR'
    if standard_name=='cloud_area_fraction_in_atmospheric_layer': 
        vars_mean_list.model_name.iat[index] = 'cloud_f'
    if standard_name=='number_of_cloud_droplets_in_air': 
        vars_mean_list.model_name.iat[index] = 'nc'
    if standard_name=='number_of_rain_droplets_in_air': 
        vars_mean_list.model_name.iat[index] = 'nr'
    if standard_name=='longwave_heating_rate_in_air': 
        vars_mean_list.model_name.iat[index] = 'HRlw'
    if standard_name=='shortwave_heating_rate_in_air': 
        vars_mean_list.model_name.iat[index] = 'HRsw'
    print(standard_name,' <--- ',vars_mean_list.model_name.iat[index],' * ',vars_mean_list.conv_factor.iat[index])

air_pressure  <---  pressure  *  1.0
air_volumic_mass  <---  missing data  *  1.0
air_temperature  <---  T  *  1.0
specific_humidity  <---  SH  *  1.0
total_water_content  <---  TH  *  1.0
relative_humidity  <---  RH  *  0.01
eastward_wind  <---  u  *  1.0
northward_wind  <---  v  *  1.0
upward_air_velocity  <---  w_zt  *  1.0
air_potential_temperature  <---  theta  *  1.0
specific_turbulent_kinetic_energy_resolved  <---  missing data  *  1.0
specific_turbulent_kinetic_energy_sgs  <---  missing data  *  1.0
variance_of_upward_air_velocity  <---  w2_zt  *  1.0
mass_fraction_of_cloud_liquid_water_in_air  <---  SC  *  1.0
mass_fraction_of_rain_water_in_air  <---  SR  *  1.0
mass_fraction_of_cloud_ice_in_air  <---  missing data  *  1.0
mass_fraction_of_snow_in_air  <---  missing data  *  1.0
mass_fraction_of_graupel_in_air  <---  missing data  *  1.0
cloud_area_fraction_in_atmospheric_layer  <---  cloud_f  *  1.0
precipitation_flux_in_air_in_liquid phase  <---  missing data  *  1.0
precipi

### Create DEPHY output file

In [11]:
# create DEPHY output file
dephy_filename = './' + my_gitdir + 'DHARMA_LES_COMBLE-I.nc'
if os.path.exists(dephy_filename):
    os.remove(dephy_filename)
    print('The file ' + dephy_filename + ' has been deleted successfully')    
dephy_file = Dataset(dephy_filename,mode='w',format='NETCDF3_CLASSIC')

# create global attributes
dephy_file.title='DHARMA LES results for COMBLE-MIP case: fixed Nd and Ni'
dephy_file.reference='https://github.com/ARM-Development/comble-mip'
dephy_file.authors='Ann Fridlind (ann.fridlind@nasa.gov) and Florian Tornow (florian.tornow@nasa.gov)'
dephy_file.source=input_filename
dephy_file.version=dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
dephy_file.format_version='DEPHY SCM format version 1.6'
dephy_file.script='convert_DHARMA_LES_output_to_dephy_format.ipynb'
dephy_file.startDate='2020-03-12T19:00:00Z'
dephy_file.force_geo=1
dephy_file.surfaceType='ocean (after spin-up)'
dephy_file.surfaceForcing='ts (after spin-up)'
dephy_file.lat=str(dharma_params['Coriolis'].lat) + ' deg N'
dephy_file.dx=str(dharma_params['geometry'].L_x/dharma_params['geometry'].nx) + ' m'
dephy_file.dy=str(dharma_params['geometry'].L_y/dharma_params['geometry'].ny) + ' m'
dephy_file.dz='see zf variable'
dephy_file.nx=dharma_params['geometry'].nx
dephy_file.ny=dharma_params['geometry'].ny
dephy_file.nz=dharma_params['geometry'].nz

# create dimensions
nz = dharma_snds.dims['zt']
zf = dephy_file.createDimension('zf', nz)
zf = dephy_file.createVariable('zf', np.float64, ('zf',))
zf.units = 'm'
zf.long_name = 'altitude'
zf[:] = dharma_snds['zt'].data

nt = dharma_snds.dims['time']
time = dephy_file.createDimension('time', nt)
time = dephy_file.createVariable('time', np.float64, ('time',))
time.units = 'seconds since ' + dephy_file.startDate
time.long_name = 'time'
time[:] = dharma_snds['time'].data

# create and fill variables
for index in vars_mean_list.index[2:]:
    std_name = vars_mean_list.standard_name.iat[index]
    var_name = vars_mean_list.variable_id.iat[index]
    mod_name = vars_mean_list.model_name.iat[index]
    c_factor = vars_mean_list.conv_factor.iat[index]
    if vars_mean_list.dimensions.iat[index]=='time':
        new_sca = dephy_file.createVariable(var_name, np.float64, ('time'))
        new_sca.units = vars_mean_list.units.iat[index]
        new_sca.long_name = std_name
        if vars_mean_list.model_name.iat[index]!='missing data':
            new_sca[:] = dharma_scas[mod_name].data*c_factor
    if vars_mean_list.dimensions.iat[index]=='time, height':
        new_snd = dephy_file.createVariable(var_name, np.float64, ('time','zf'))
        new_snd.units = vars_mean_list.units.iat[index]
        new_snd.long_name = std_name
        if vars_mean_list.model_name.iat[index]!='missing data': 
            new_snd[:] = dharma_snds[mod_name].data*c_factor

print(dephy_file)
dephy_file.close()

The file ./../../output_les/dharma/sandbox/Lx25km_dx100m/DHARMA_LES_COMBLE-I.nc has been deleted successfully
<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_CLASSIC data model, file format NETCDF3):
    title: DHARMA LES results for COMBLE-MIP case: fixed Nd and Ni
    reference: https://github.com/ARM-Development/comble-mip
    authors: Ann Fridlind (ann.fridlind@nasa.gov) and Florian Tornow (florian.tornow@nasa.gov)
    source: /user-data-home/dharma/sandbox/Lx25km_dx100m/hours2-end/dharma.scalars.cdf
    version: 2023-06-09 20:04:57
    format_version: DEPHY SCM format version 1.6
    script: convert_DHARMA_LES_output_to_dephy_format.ipynb
    startDate: 2020-03-12T19:00:00Z
    force_geo: 1
    surfaceType: ocean (after spin-up)
    surfaceForcing: ts (after spin-up)
    lat: 73.0 deg N
    dx: 100.0 m
    dy: 100.0 m
    dz: see zf variable
    nx: 256
    ny: 256
    nz: 159
    dimensions(sizes): zf(159), time(241)
    variables(dimensions): float64 zf(zf), float64 time(

### Check output file

In [12]:
dephy_check = xr.open_dataset(dephy_filename)
dephy_check