# core

> This is a core library for the ERA5 dataset pipeline. It defines
a few helpful functions such as an API tester to test your API key and connection.

In [2]:
#| default_exp core

In [4]:
#| hide
from nbdev.showdoc import *

In [6]:
#| export
import os
import cdsapi
import hydra

from omegaconf import DictConfig, OmegaConf
from pyprojroot import here

## Utilities

Some utilities are provided to help you with the ERA5 dataset.

In [7]:
#| exporti
def _expand_path(
        path: str   # Path on user's machine
        )->   str:  # Expanded path
    "Expand the path on the user's machine for cross compatibility"

    # Expand ~ to the user's home directory
    path = os.path.expanduser(path)
    # Expand environment variables
    path = os.path.expandvars(path)
    # Convert to absolute path
    path = os.path.abspath(path)
    return path

In [8]:
#| export
def describe(
    cfg: DictConfig=None,  # Configuration file
    )-> None:
    "Describe the configuration file used by Hydra for the pipeline"
    
    if cfg is None:
        cfg = OmegaConf.create()
        
    print("This package fetches ERA5 data. The following is the config file used by Hydra for the pipeline:\n")
    print(OmegaConf.to_yaml(cfg))

## Tests and Main

In `nbdev`, our tests are embedded in the notebook. Whenever you export the notebook, all the cells that are specified to run are run, and hence, the tests are executed. The tests are also exported. This is a great way to ensure that your documentation is always up-to-date. For this module, we're using the `testAPI()` function as our main test.

In [9]:
#| export
def testAPI(
    cfg: DictConfig=None,
    output_path:str=None,
    dataset:str="reanalysis-era5-pressure-levels",
    remove:bool=True
    )-> bool:    
    
    print(OmegaConf.to_yaml(cfg))

    try:
        client = cdsapi.Client()

        # check the path
        if output_path is None:
            output_path = here() / "data"
        else:
            output_path = _expand_path(output_path)

        if not os.path.exists(output_path):
            os.makedirs(output_path)

        # build request
        request = {
            'product_type': ['reanalysis'],
            'variable': ['geopotential'],
            'year': ['2024'],
            'month': ['03'],
            'day': ['01'],
            'time': ['13:00'],
            'pressure_level': ['1000'],
            'data_format': 'grib',
        }

        target = output_path / 'download.grib'
        
        print("Testing API connection by downloading a dummy dataset to {}...".format(output_path))

        client.retrieve(dataset, request, target)

        if remove:
            os.remove(target)
        
        print("API connection test successful.")
        return True

    except Exception as e:
        print("API connection test failed.")
        print("Did you set up your API key with CDS? If not, please visit https://cds.climate.copernicus.eu/how-to-api#install-the-cds-api-client")
        print("Error: {}".format(e))
        return False

We can see that this API tester tool works with Hydra configuration:

In [10]:
from hydra import initialize, compose
from omegaconf import OmegaConf

# unfortunately, we have to use the initialize function to load the config file
# this is because the @hydra decorator does not work with Notebooks very well
# this is a known issue with Hydra: https://gist.github.com/bdsaglam/586704a98336a0cf0a65a6e7c247d248
# 
# just use the relative path from the notebook to the config dir
with initialize(version_base=None, config_path="../conf"):
    cfg = compose(config_name='config.yaml')

describe(cfg)

This package fetches ERA5 data. The following is the config file used by Hydra for the pipeline:

query:
  product_type: reanalysis
  variable:
  - 2m_dewpoint_temperature
  - 2m_temperature
  - skin_temperature
  - total_precipitation
  year:
  - 2010
  - 2011
  month:
  - 1
  - 2
  - 3
  - 4
  - 5
  - 6
  - 7
  - 8
  - 9
  - 10
  - 11
  - 12
  day:
  - 1
  - 2
  - 3
  - 4
  - 5
  - 6
  - 7
  - 8
  - 9
  - 10
  - 11
  - 12
  - 13
  - 14
  - 15
  - 16
  - 17
  - 18
  - 19
  - 20
  - 21
  - 22
  - 23
  - 24
  - 25
  - 26
  - 27
  - 28
  - 29
  - 30
  - 31
  time:
  - 0
  - 1
  - 2
  - 3
  - 4
  - 5
  - 6
  - 7
  - 8
  - 9
  - 10
  - 11
  - 12
  - 13
  - 14
  - 15
  - 16
  - 17
  - 18
  - 19
  - 20
  - 21
  - 22
  - 23
  area:
  - 0
  - 360
  - -90
  - 90
  data_format: netcdf
  download_format: unarchived
datapaths:
  input: null
  output: null



### Importing the Main Function

Important: using `__main__` in nbdev and Hydra is a little bit tricky. We need to define the main function in the module ONLY ONCE and then when we export the notebook to script, we need to add the `nbdev.imports.IN_NOTEBOOK` variable. This way, the main function will only be executed when we run the notebook and not when we import the module.

```python
from nbdev.imports import IN_NOTEBOOK
```

You'll see this listed throughout the notebooks.

In [None]:
#| export
@hydra.main(version_base=None, config_path="../conf", config_name="config")
def main(cfg: DictConfig) -> None:
    describe(cfg=cfg)
    testAPI(cfg=cfg)

In [None]:
#| export
try: from nbdev.imports import IN_NOTEBOOK
except: IN_NOTEBOOK=False

if __name__ == "__main__" and not IN_NOTEBOOK:
    main()

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()