In [None]:
import os, sys, glob

if os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))) not in sys.path:
    sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))))

from ecmwfapi import ECMWFDataServer
import xarray as xr
import calendar
    
from wp4.constants import DATA_DIR_GFAS

## Downloading the data

For this script to work the ECMWF Api needs to be set up properly. For more information on how to sign up and use this API see: 



### Efficient requests

Data retrieval is not designed to be instant. A larger request can take hours and even days to complete.

To retrieve data efficiently (and get your data quicker!) you should retrieve all the data you need from one tape, then from the next tape, and so on. For GFAS, this means retrieving all the data you need for one month, then for the next month, and so on.  

https://confluence.ecmwf.int/display/WEBAPI/Retrieval+efficiency

https://confluence.ecmwf.int/display/UDOC/Retrieve#Retrieve-Retrievalefficiency


### Parameters

Some of the most relevant parameters for the requests:

##### param

*80.210*  : Wildfire flux of Carbon Dioxide\
*81.210*  : Wildfire flux of Carbon Monoxide\
*82.210*  : Wildfire flux of Methane\
*85.210*  : Wildfire flux of Nitrogen Oxides NOx\
*87.210*  : Wildfire flux of Particulate Matter PM2.5\
*88.210*  : Wildfire flux of Total Particulate Matter\
*102.210* : Wildfire flux of Sulfur Dioxide\
*99.210*  : Wildfire radiative power

full list here: https://confluence.ecmwf.int/display/CKB/CAMS%3A+Global+Fire+Assimilation+System+%28GFAS%29+data+documentation

##### type

*gsd* : gridded satellite data (hourly) - Only available for fire radiative power (FRP)\
*ga* : gridded average (daily average)

check the data catalogue here to see what is available for download: https://apps.ecmwf.int/archive-catalogue/?type=gsd&class=mc&stream=gfas&expver=0001

##### instrument

*389*: MODIS

##### ident

*784* : Aqua\
*783* : Terra

##### format

If not format is specified the data will be downloaded as a grib file, set *netcdf* for the format paramater to have the data converted to a netcdf file on the server before downloading.



## Step 1. Create/check the directories to store the data

As only the FRP data is available on an hourly basis we cannot acquire all the data for each month using a single server request. Therefore the downloading will be split up into two different request loops, one loop requesting the FRP data for each month, the other requesting the wildfire fluxes that are of interest for the Flares product. For each of the loops a different directory will be used for storing the downloaded data.   

In [None]:
if not os.path.exists(f'{DATA_DIR_GFAS}/frp/'):
    os.makedirs(f'{DATA_DIR_GFAS}/frp/')
    
if not os.path.exists(f'{DATA_DIR_GFAS}/flux/'):
    os.makedirs(f'{DATA_DIR_GFAS}/flux/')

## Step 2. Run first for loop, downloading the hourly FRP data for each month, starting from 2015. 

In [None]:
# Initiate the server
server = ECMWFDataServer()

for year in [2015, 2016, 2017, 2018, 2019, 2020, 2021]:
    for month in ['01','02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']:     
        
        # get the number of days in the month
        days_in_month = calendar.monthrange(year, int(month))[1]
        
        # Target location to store the downloaded data 
        output_loc = f'{DATA_DIR_GFAS}/frp/{year}_{month}.nc'
        
        
        # If the file already exists, continue with next iteration
        if os.path.exists(output_loc):
            print(f'SKIPPED: {year}_{month}.nc')
            continue
        else:
            print(f'STARTING DOWNLOAD REQUEST FOR: {year}_{month}.nc')
        
        # Set request parameters and send the request
        server.retrieve({
            "class": "mc",
            "dataset": "cams_gfas",
            "date": f"{year}-{month}-01/to/{year}-{month}-{days_in_month}",
            "expver": "0001",
            "levtype": "sfc",
            "param": "99.210",
            "step": "0-24",
            "stream": "gfas",
            "time": "0/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18/19/20/21/22/23",
            "type": "gsd",
            "ident": "784/783",
            "instrument": "389",
            "area":"55.65/-11.35/51.35/-5.25",
            "grid":"0.1/0.1",
            "format": "netcdf",
            "target": output_loc,
        })

In [None]:
# Initiate the server
server = ECMWFDataServer()

for year in [2015, 2016, 2017, 2018, 2019, 2020, 2021]:
    for month in ['01','02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']:
        
        # get the number of days in the month
        days_in_month = calendar.monthrange(year, int(month))[1]
        
        # Target location to store the downloaded data 
        output_loc = f'{DATA_DIR_GFAS}/flux/{year}_{month}.nc'
        
        # If the file already exists, continue with next iteration
        if os.path.exists(output_loc):
            print(f'SKIPPED: {year}_{month}.nc')
            continue
        else:
            print(f'STARTING DOWNLOAD REQUEST FOR: {year}_{month}.nc')
        
        # Set request parameters and send the request
        server.retrieve({
            "class": "mc",
            "dataset": "cams_gfas",
            "date": f"{year}-{month}-01/to/{year}-{month}-{days_in_month}",
            "expver": "0001",
            "levtype": "sfc",
            "param": "80.210/81.210/82.210/85.210/87.210/88.210/102.210",
            "step": "0-24",
            "stream": "gfas",
            "target": output_loc,
            "format": "netcdf",
            "time": "00",
            "type": "ga",
            "area":"55.65/-11.35/51.35/-5.25",
            "grid":"0.1/0.1",
        })

In [None]:
# get all the filenames ending on nc in the download directory 
monthly_frp_files = glob.glob(f'{DATA_DIR_GFAS}/frp/*.nc')

# open each nc file using xarray
monthly_frp_datasets = [xr.open_dataset(x) for x in monthly_frp_files]

# combine the data along the time dimension using the xarray combine_nested function 
ds = xr.combine_nested(monthly_frp_datasets , concat_dim='time', combine_attrs='drop_conflicts')

# sort the data based on date
sorted_ds = ds.sortby('time')

# save as a single netcdf file
sorted_ds.to_netcdf(f'{DATA_DIR_GFAS}/frp.nc')

In [None]:
# get all the filenames ending on nc in the download directory 
monthly_flux_files = glob.glob(f'{DATA_DIR_GFAS}/flux/*.nc')

# open each nc file using xarray
monthly_flux_files = [xr.open_dataset(x) for x in monthly_flux_files]

# combine the data along the time dimension using the xarray combine_nested function 
ds = xr.combine_nested(monthly_flux_files , concat_dim='time', combine_attrs='drop_conflicts')

# sort the data based on date
sorted_ds = ds.sortby('time')

# save as a single netcdf file
sorted_ds.to_netcdf(f'{DATA_DIR_GFAS}/wildfire_flux.nc')