In [46]:
"""

1. Download MERRA2 data at daily interval

2. Save as NetCDF4
    - Downward shortwave radiation (allsky) 
    - Downward shortwave radiation (clearsky)

"""

# Import libraries
import glob
import os
from datetime import datetime
import numpy as np
import xarray as xr
import pandas as pd
import netCDF4
import matplotlib.pyplot as plt

# Define user
user = 'johnnyryan'

# Define base path
path = '/Users/' + user + '/Dropbox (University of Oregon)/research/feedbacks/'

# Define path to links
links = pd.read_csv(path + 'data/links/subset_M2T1NXRAD_5.12.4_20231217_213425_.txt', 
                    skiprows=1, sep='\t', header=None)

In [47]:
links.rename(columns={0: "link"}, inplace=True)
links['year'] = links['link'].str[119:123]
links['month'] = links['link'].str[162:164].astype(int)

# Define years
years = np.arange(2000, 2023)

# Filter June
links_summer = links[(links['month'] > 5) & (links['month'] < 9)]

In [48]:
%%capture
# Loop over every link, resample to daily, and save as NetCDF
for year in years:
    
    if os.path.exists(path + 'data/merra-swd/swd_' + str(year) + '.nc'):
        print(f'Skipping...{str(year)}')
    else:
        print(f'Processing...{str(year)}')

        # Make a new DataFrame
        link_year = links_summer[links_summer['year'] == str(year)]

        t = []
        swd_allsky = []
        swd_clrsky = []
        for j in range(len(link_year)):

            # Index link
            link = '"' + str(link_year.iloc[j].values[0]) + '"'

            # Download MERRA2 using WGET
            !wget --load-cookies ~/.urs_cookies --save-cookies ~/.urs_cookies --auth-no-challenge=on --keep-session-cookies --no-check-certificate --content-disposition $link --directory-prefix=temp-files -nd

            # Import temporary file
            merra = xr.open_dataset(sorted(glob.glob(path + 'repo/temp-files/*.nc'))[0])
            
            # Clean up temporary files
            files = glob.glob(path + 'repo/temp-files/*.nc')
            for f in files:
                os.remove(f)

            # Calculate daily mean
            swd_resample = np.mean(merra['SWGDN'], axis=0).values
            clrsky_resample = np.mean(merra['SWGDNCLR'], axis=0).values

            # Append to list
            swd_allsky.append(swd_resample)
            swd_clrsky.append(clrsky_resample)
            t.append(merra['time'].values[0].astype('datetime64[D]'))

        # Save as NetCDF
        ds_data = xr.Dataset(
        data_vars={
            "swd_allsky": (("time", "lat", "lon"), np.array(swd_allsky).astype('float32')),
            "swd_clrsky": (("time", "lat", "lon"), np.array(swd_clrsky).astype('float32')),
        },

        coords={
            "time": pd.DatetimeIndex(np.array(t), freq='D'),
            "longitude": (('lon',), merra['lon'].values),
            "latitude": (('lat',), merra['lat'].values),    
        },

        attrs={
            "Produced": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "Units": 'Wm-2',
            "Author":'Johnny Ryan', 
            "Email":'jryan4@uoregon.edu'
        },
        )

        # Save
        ds_data.to_netcdf(path + 'data/merra-swd/swd_' + str(year) + '.nc')
        

NameError: name 'swd' is not defined