# MERRA-2

This notebook conglomerates all of the MERRA-2 data into a single nc4 file.

Each file comes in different temporal timesteps. This notebook will take the daily average of each value.

In [1]:
import glob
import os
import xarray as xr
import pandas as pd

There are two 'TS' (skin temperature columns). We will drop that column from the M2T1NXRAD radiation diagnostics dataset.

Also, PM2.5 can be recovered from the M2T1NXAER aerosol diagnostics series using this formula:

\begin{align}
PM2.5&= Dust2.5+ Sea Salt2.5+ Black Carbon + 1.4 \times Organic Carbon + 1.375 \times SO4 \\
PM2.5&= DUSMASS25 + SSSMASS25 + BCSMASS + 1.4\times OCSMASS + 1.375 \times SO4SMASS
\end{align}

This formula was taken from this paper:

        Buchard, V., and Coauthors, 2016: Evaluation of the surface PM2.5 in Version 1 of the NASA MERRA Aerosol Reanalysis over the United States. Atmospheric Environment, 125, 100–111, https://doi.org/10.1016/j.atmosenv.2015.11.004.

In [2]:
nc4s = [i for i in glob.glob('data/MERRA-2/**/merged.nc4') if 'M2SDNXSLV' not in i]
dfs = {}
for file in nc4s:
    ds = xr.open_dataset(file)
    df = ds.to_dataframe()
    
    # add PM2.5 column to the aersol diagnostics dataframe
    if 'M2T1NXAER' in file:
        df['PM2.5'] = df['DUSMASS25'] + df['SSSMASS25'] + df['BCSMASS'] + 1.4 * df['OCSMASS'] + 1.375 * df['SO4SMASS']
    # drop the redundant skin surface temperature from M2T1NXRAD
    if 'M2T1NXRAD' in file:
        df.drop(['TS'], axis = 1, inplace=True)

    # now, average each column value on a daily basis
    df = df.groupby([pd.Grouper(level='lat'), 
            pd.Grouper(level='lon'), 
            pd.Grouper(level='time', freq='D')]
          ).mean()

    dfs[file] = df

In [3]:
# concatenate all of the dataframes together
combined = pd.concat(dfs.values(), axis=1)

In [4]:
combined.columns.values

array(['SSSMASS25', 'DUSCATAU', 'BCSCATAU', 'DUEXTTAU', 'BCFLUXU',
       'OCFLUXV', 'BCANGSTR', 'SUFLUXV', 'SSSMASS', 'OCSMASS', 'BCCMASS',
       'BCSMASS', 'SO4CMASS', 'SSFLUXU', 'DUCMASS', 'SSEXTTAU',
       'SO2CMASS', 'OCANGSTR', 'OCCMASS', 'TOTEXTTAU', 'DUSCAT25',
       'TOTANGSTR', 'DMSCMASS', 'SSEXTT25', 'DUANGSTR', 'DMSSMASS',
       'BCEXTTAU', 'SSSCATAU', 'DUFLUXV', 'DUFLUXU', 'SUEXTTAU',
       'SSFLUXV', 'DUCMASS25', 'OCEXTTAU', 'SUANGSTR', 'SSSCAT25',
       'SSCMASS25', 'SO4SMASS', 'DUSMASS', 'SUFLUXU', 'BCFLUXV',
       'DUSMASS25', 'SSCMASS', 'SUSCATAU', 'SO2SMASS', 'SSANGSTR',
       'DUEXTT25', 'OCFLUXU', 'OCSCATAU', 'TOTSCATAU', 'PM2.5', 'AODANA',
       'AODINC', 'LWGAB', 'CLDTOT', 'ALBNIRDF', 'SWTDN', 'EMIS',
       'LWTUPCLRCLN', 'SWTNTCLR', 'CLDHGH', 'LWGABCLR', 'LWGABCLRCLN',
       'LWGNTCLRCLN', 'SWGNTCLRCLN', 'SWGNT', 'TAUMID', 'ALBEDO',
       'SWGNTCLR', 'SWGNTCLN', 'LWGNTCLR', 'SWGDNCLR', 'ALBVISDF',
       'LWTUPCLR', 'TAUTOT', 'LWGNT', 'CLDLOW', 'ALBV

In [5]:
combined.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,SSSMASS25,DUSCATAU,BCSCATAU,DUEXTTAU,BCFLUXU,OCFLUXV,BCANGSTR,SUFLUXV,SSSMASS,OCSMASS,...,SLP,TQV,V2M,TROPQ,V10M,U50M,U10M,QV2M,TROPPV,QV10M
lat,lon,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
27.5,-97.5,2015-01-01,1.92991e-09,0.00156,0.002431,0.001664,4e-06,2.1e-05,1.539294,7e-06,1.59483e-08,1.090845e-09,...,102362.734375,34.89156,-4.906837,1.4e-05,-6.401117,-1.040568,-0.583242,0.005816,12167.754883,0.005655
27.5,-97.5,2015-01-02,1.398983e-09,0.001476,0.003116,0.001572,6e-06,5.7e-05,1.497134,8.1e-05,6.315156e-09,1.088885e-09,...,101865.828125,33.353931,-2.013163,4e-06,-2.569824,1.499022,1.378516,0.006199,10776.143555,0.006056
27.5,-97.5,2015-01-03,7.728337e-10,0.001717,0.002361,0.00183,9e-06,5.1e-05,1.462451,0.000108,3.319087e-09,1.363154e-09,...,101606.859375,29.238062,-1.764241,6e-06,-2.303708,3.86548,3.092033,0.006609,11952.199219,0.006434
27.5,-97.5,2015-01-04,9.810147e-10,0.005071,0.001328,0.005431,6e-06,9e-06,1.428846,-8e-06,5.571303e-09,1.100944e-09,...,102456.515625,11.435887,-3.088592,1.8e-05,-4.104443,0.783023,0.458247,0.005969,13455.602539,0.005817
27.5,-97.5,2015-01-05,3.11986e-09,0.002209,0.001528,0.00235,3e-06,1.3e-05,1.458938,8e-06,2.227582e-08,5.11407e-10,...,103517.070312,15.4013,-3.853312,1.6e-05,-5.023897,-2.091863,-1.499263,0.004544,12372.834961,0.00436


In [8]:
merged_ds = combined.to_xarray()

In [10]:
merged_ds.to_netcdf('data/MERRA-2/merged.nc4')