# Methane k-coefficient calculation from P,T-grid of spectra
- - - - - 

<mate@berkeley.edu>

Calculation of methane opacities from tables of individual lines.

In [1]:
%pylab inline
import os

#When calculating the spectra, isotope numbers were used to delineate between species of a molecule (ex CH4 vs CH3D)
#For clarity in loading the kc grids into the model, we use the species name, but still
#require the isotope number and molecule name to find the associated spectra

#N.B. Because the same wavenumbers were used for all scpectra, these are hard-coded in the get_spectrum function below
#N.B. Directories for storing data need to be setup in advance.

version = 1
molecule = 'C2H6'
species = '12C2H6' #Using HITRAN notation
year=2006
isotope = 1

specdatapath = '/dataranch/VLT/HITRAN_2016/{:s}/spectra/'.format(molecule)
kcpath = '/dataranch/VLT/HITRAN_2016/{:s}/k_coefficient/VIMS/'.format(molecule)
gridpath = kcpath.format(molecule)+'{:s}/'.format(species) #Directory to store gridpoints

if not os.path.isdir(gridpath): os.mkdir(gridpath) 

fname_kc_grid = 'kc_{:s}.VIMS_{:04d}.v{:02d}.npy'.format(species,year,version)
fstr_gridpoint = 'kc_{:s}.VIMS_{:04d}.v{:02d}.{:7.2e}mb_{:05.1f}K.npy'

npy_files = specdatapath+'*.npy'
flist = !ls $npy_files
Plist = [float(fname.split('/')[-1].split('_')[1][:-2]) for fname in flist]
Tlist = [float(fname.split('/')[-1].split('_')[2][:-5]) for fname in flist]
pressures = np.array(sorted(set(Plist)))
temperatures = np.array(sorted(set(Tlist)))
nsteps = len(pressures)*len(temperatures)

ng = 10

import logging
logger = logging.getLogger('kc')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s')
if logger.handlers: logger.handlers = []

fh = logging.FileHandler(gridpath+'kc.parallel.log')
fh.setFormatter(formatter)
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)

logger.info("""
{:s} k-coefficient calculation version {:02d} -

VIMS plate scale calculation for an
abridged range of IR from HITRAN 2016.

Pressure grid points..... {:3d}
Temperature grid points.. {:3d}
Total grid steps......... {:3d}
""".format(species, version, len(pressures), len(temperatures),nsteps)
)

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


FileNotFoundError: [WinError 3] The system cannot find the path specified: '/dataranch/VLT/HITRAN_2016/C2H6/k_coefficient/VIMS/12C2H6/'

In [2]:
def readfits(filename, header=False) :
"""Read FITS file reproducing simple IDL-esque syntax."""
    import pyfits
    fits = pyfits.open(filename, ignore_missing_end=True )
    im  = fits[0].data
    hdr = fits[0].header
    if header:
        return im, hdr
    else:
        return im
        
def fi(array, v): 
    """Find array index (or indices) nearest to value(s) of v.
       Returns np.int64 or list."""
    if type(v) == float or type(v) == int: 
        return (np.abs(np.array(array)-v)).argmin()
    else:
        i = []
        for vi in v: i.append((np.abs(np.array(array)-vi)).argmin())
        return i

# Wavelength scale 

In [3]:
import numpy as np

wavelength = {}

mu = np.load('/dataranch/VLT/HITRAN_2016/wavelengths/vims_{:04d}.npy'.format(year))
bounds = np.load('/dataranch/VLT/HITRAN_2016/wavelengths/vimsb_{:04d}.npy'.format(year))

wavelength.update({'mu':mu,
                   'nu':1e4/mu,
                   'nu_min':1e4/bounds[1:],
                   'nu_max':1e4/bounds[:-1],
                   'nlam':len(mu),
                   },
                  )

## Setup Parallel engines & calculate k-coefficients

Although it works fine to have most of the code sitting in a module, it somewhat
defeats the purpose of having the code visible and executable in the notebook. 
Below we'll see if th `%%px` commands a good alternative for running/editing
the methods in parallel.


It is interesting that passing each of these values as a "static" reference is fine, so that 
each engine can perform a calculation and store the data. However, when trying to update csteps, so 
that there can be a running tabulation of the completed grid steps, using a csteps+=1, or similar,
cause a `referenced before assignment` error. Checking some [documentation](http://ipython.org/ipython-doc/2/parallel/parallel_multiengine.html) suggests that I might need to use the `push` or `pull` 
methods in the direct view --- maybe.

An alternative is to query the file list in the output directory to see how many files have been written 
and use this value...

    npy_files = specdatapath+'*.npy'
    flist = !ls $npy_files

In [4]:
from IPython import parallel
rc = parallel.Client()
dv = rc[:]
dv.scatter('id', dv.targets, flatten=True)

dv['temperatures'] = temperatures
dv['pressures'] = pressures
dv['ng'] = ng
dv['wavelength'] = wavelength
dv['specdatapath'] = specdatapath
dv['kcpath'] = kcpath
dv['gridpath'] = gridpath
dv['version'] = version
dv['fstr_gridpoint'] = fstr_gridpoint
dv['nsteps'] = nsteps
dv['year'] = year
dv['isotope'] = isotope
dv['molecule'] = molecule
dv['species'] = species

# Legendre-Gauss quadrature nodes and weights on interval [0,1]
g_node, g_weight = np.polynomial.legendre.leggauss(ng)
g_node = g_node*0.5+0.5
g_weight *= 0.5 

dv['g_node'] = g_node
dv['g_weight'] = g_weight



## Define methods for parallel execution on all engines

In [5]:
%%px

import logging
logger = logging.getLogger('kc')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s')

if logger.handlers: logger.handlers = []
        
fh = logging.FileHandler('kc.parallel.log')
fh.setFormatter(formatter)
fh.setLevel(logging.DEBUG)
logger.addHandler(fh)

def get_spectrum(P,T,molecule,isotope):

    from os.path import isfile
    import numpy as np
    
    fstr  = specdatapath+'{:s}.1000-11500.I{:1d}.HITRAN2016_{:7.2e}mb_{:05.1f}K.npy'

    fname  = fstr.format(molecule,isotope,P,T)

    if isfile(fname):
        nu_min = 700.  # minimum frequency [cm-1]
        nu_max = 11800.  # maximum frequency (cm-1)
        dnu    = 1e-3   # frequency grid steps, or "resolution" (cm-1)
        num_nu = (nu_max-nu_min)/dnu
        nu     = np.linspace(nu_min,nu_max,num_nu)
        k_spec = np.load(fname)
        logger.info('[{:02d}] {:s} spectrum loaded from {:s}'.format(id, molecule, fname))
    else:
        logger.warning('[{:02d}] {:s} Not found.'.format(id, fname))
        return

    logger.info('[{:02d}] {:s} opacity spectra.'.format(id, molecule))
    return nu, k_spec

def inbandpass(k_spec, nu, nu_min, nu_max, ng):
    """Calculate k-coefficients in a given bandpass from 
       the input opacity spectrum.

        k_spec   - opacity array, usually [km-amagat]
        nu       - dispersion axis of k_spec, usually [cm-1]
        nu_cen   - center of output bandpass, [cm-1]
        delta_nu - full width of bandpass, [cm-1]
       
        return k-coefficient values at the Legendre-Gauss 
        nodes (i.e., zeros) in the interval [0,1]
        with corresponding weights."""

    import numpy as np
    fi = lambda array, value : (np.abs(array-value)).argmin()
    imn, imx = (fi(nu, nu_min), fi(nu, nu_max))
    x = nu[imn:imx] ; k_band = k_spec[imn:imx]
    nbins  = 100
    bin_edges = np.logspace(np.log10(min(k_band)),
                            np.log10(max(k_band)), 
                            nbins+1)
    fk, bin_edges = np.histogram(k_band, bins=bin_edges)
    k = np.sqrt(bin_edges[0:nbins]*np.roll(bin_edges,-1)[0:nbins])
    G = np.cumsum(fk)/float(len(k_band))
    g_indices = list(map(fi,[G]*len(g_node),g_node)) 
    return k[g_indices]

## Define procedure to be called with the `map_async` method and map over atmosphere layers

In [6]:
def calc_kcoeff(gridpoint):
    """Calculate the k-coefficients for pressure and temperature
    specified by gridpoint."""
    import numpy as np
    import os

    P = gridpoint[0] ; T = gridpoint[1] ; 
    molecule = gridpoint[2]; isotope = gridpoint[3]; species = gridpoint[4];
    version = gridpoint[5]; year = gridpoint[6]

    fname = fstr_gridpoint.format(species,year,version,P,T)
    tmpname = 'tmp_'+fname

    if os.path.isfile(gridpath+fname):
        logger.info('[{:02d}] Loading existing file: {:s}'.format(id, fname))
        kc_spec = np.load(gridpath+fname)
        return kc_spec

    ng = 10
    nu, k_spec = get_spectrum(P,T,molecule,isotope)    

    kc_spec = np.ndarray([wavelength['nlam'],ng])
    for i in range(wavelength['nlam']):
        kc_spec[i,:] = inbandpass(k_spec, nu, wavelength['nu_min'][i], wavelength['nu_max'][i], ng)
        
        output_interval = 100
        if not np.mod(i,output_interval) :
            logger.debug("[{:02d}] ({:7.2e}mb, {:05.1f}K) - spectral step {:3d} of {:3d}".format(
                         id, P , T, i, len(k_spec)))
            np.save(gridpath+tmpname, kc_spec)

    np.save(gridpath+fname, kc_spec)
    if os.path.isfile(gridpath+fname): 
        os.remove(gridpath+tmpname)
    logger.info("[{:02d}] ({:7.2e}mb, {:05.1f}K) - Completed. File saved {:s}".format(
                         id, P,T, fname))
    
    return kc_spec

## Run calculation and save data

- temporary files don't contain the last ~65 wavelength gridpoints (code revised to output P,T k-coefficients
- Currently the list of temperature and pressure combinations go into one long output file, and final output ndarray should fix this
- versioning should be revised to only occur in one location, and perhaps be included in logs.
- if `kc_out` is written to disk without failure, then all is well, otherwise need to re-run calc (9min for set of 24 T,P combinations on OSIRIS Kbb grid)

In [7]:
import os
gridpoints = list((P,T,molecule,isotope,species,version,year) for P in pressures for T in temperatures )

kc_layers = dv.map_async(calc_kcoeff, gridpoints)
kc_tmp = np.array(kc_layers.get())
kc_out = np.ndarray([wavelength['nlam'], len(pressures), len(temperatures), ng])

def fi(array, v): 
    """Find array index (or indices) nearest to value(s) of v.
       Returns np.int64 or list."""
    return (np.abs(np.array(array)-v)).argmin()

for i,gridstep in enumerate(gridpoints): 
    j = fi(pressures, gridstep[0])
    k = fi(temperatures, gridstep[1])
    kc_out[:,j,k,:] = kc_tmp[i,:,:] 
    
with open(gridpath+'kc.parallel.log', mode='r') as f: logfile = f.readlines()
with open('calc_kcoeff_grid_dunes.ipynb', mode='r') as f: notebook = f.readlines()
    
out_dict = {'kc':kc_out,
            'wavelength':wavelength,
            'pressures':pressures,
            'temperatures':temperatures,
            'ng':ng,
            'g':g_node,
            'w':g_weight,
            'datafilename':kcpath+fname_kc_grid,
            'logfile':logfile,
            'notebook':notebook,
            }

save(kcpath+fname_kc_grid, out_dict)
os.remove(gridpath+'kc.parallel.log')