From 4e70ecda41a249817b0313c1f54f858dbbe02495 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Tue, 18 Apr 2017 17:15:11 +0200 Subject: [PATCH 1/3] Config file for download cache settings --- .travis.yml | 10 +- oggm/cfg.py | 99 ++++++---- oggm/params.cfg | 28 +-- oggm/tests/__init__.py | 2 +- oggm/tests/test_prepro.py | 4 +- oggm/tests/test_utils.py | 48 +++-- oggm/tests/test_workflow.py | 4 +- oggm/utils.py | 383 +++++++++++++++++------------------- 8 files changed, 286 insertions(+), 292 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3edd55681..36d204c3d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,10 +18,10 @@ cache: matrix: fast_finish: true include: - - env: OGGM_ENV=prepro MPL= - - env: OGGM_ENV=models MPL= - - env: OGGM_ENV=workflow MPL=--mpl - - env: OGGM_ENV=graphics MPL=--mpl + - env: OGGM_TEST_ENV=prepro MPL= + - env: OGGM_TEST_ENV=models MPL= + - env: OGGM_TEST_ENV=workflow MPL=--mpl + - env: OGGM_TEST_ENV=graphics MPL=--mpl before_install: - docker pull oggm/untested_base:latest @@ -40,7 +40,7 @@ install: EOF - mkdir -p $HOME/dl_cache - export OGGM_DOWNLOAD_CACHE=/dl_cache - - docker create --name oggm_travis -ti -v $HOME/dl_cache:/dl_cache -e OGGM_DOWNLOAD_CACHE -e OGGM_ENV -e CI -e TRAVIS -e TRAVIS_JOB_ID -e TRAVIS_BRANCH -e TRAVIS_PULL_REQUEST oggm/untested_base:latest /bin/bash /root/oggm/test.sh + - docker create --name oggm_travis -ti -v $HOME/dl_cache:/dl_cache -e OGGM_DOWNLOAD_CACHE -e OGGM_TEST_ENV -e CI -e TRAVIS -e TRAVIS_JOB_ID -e TRAVIS_BRANCH -e TRAVIS_PULL_REQUEST oggm/untested_base:latest /bin/bash /root/oggm/test.sh - docker cp $PWD oggm_travis:/root/oggm script: - export OGGM_DOWNLOAD_CACHE=/dl_cache diff --git a/oggm/cfg.py b/oggm/cfg.py index 162bfe1ca..007440d2f 100644 --- a/oggm/cfg.py +++ b/oggm/cfg.py @@ -29,6 +29,8 @@ CACHE_DIR = os.path.join(os.path.expanduser('~'), '.oggm') if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR) +# Path to the config file +CONFIG_FILE = os.path.join(os.path.expanduser('~'), '.oggm_config') class DocumentedDict(dict): @@ -227,40 +229,17 @@ def initialize(file=None): try: cp = ConfigObj(file, file_error=True) except (ConfigObjError, IOError) as e: - log.critical('Config file could not be parsed (%s): %s', file, e) + log.critical('Param file could not be parsed (%s): %s', file, e) sys.exit() - homedir = os.path.expanduser('~') - - # Some defaults - if cp['working_dir'] == '~': - cp['working_dir'] = os.path.join(homedir, 'OGGM_wd') - if cp['topo_dir'] == '~': - cp['topo_dir'] = os.path.join(homedir, 'OGGM_data', 'topo') - if cp['cru_dir'] == '~': - cp['cru_dir'] = os.path.join(homedir, 'OGGM_data', 'cru') - if cp['rgi_dir'] == '~': - cp['rgi_dir'] = os.path.join(homedir, 'OGGM_data', 'rgi') - - # Setup Download-Cache-Dir - if os.environ.get('OGGM_DOWNLOAD_CACHE_RO') is not None: - cp['dl_cache_readonly'] = bool(strtobool(os.environ.get('OGGM_DOWNLOAD_CACHE_RO'))) - if os.environ.get('OGGM_DOWNLOAD_CACHE') is not None: - cp['dl_cache_dir'] = os.environ.get('OGGM_DOWNLOAD_CACHE') - - PATHS['dl_cache_dir'] = cp['dl_cache_dir'] - PARAMS['dl_cache_readonly'] = cp.as_bool('dl_cache_readonly') - - if PATHS['dl_cache_dir'] and not os.path.exists(PATHS['dl_cache_dir']): - if not PARAMS['dl_cache_readonly']: - os.makedirs(PATHS['dl_cache_dir']) - CONTINUE_ON_ERROR = cp.as_bool('continue_on_error') + # Paths + oggm_static_paths() PATHS['working_dir'] = cp['working_dir'] - PATHS['topo_dir'] = cp['topo_dir'] - PATHS['cru_dir'] = cp['cru_dir'] - PATHS['rgi_dir'] = cp['rgi_dir'] + # Default + if not PATHS['working_dir']: + PATHS['working_dir'] = os.path.expanduser('~/OGGM_WORKING_DIRECTORY') PATHS['dem_file'] = cp['dem_file'] PATHS['climate_file'] = cp['climate_file'] PATHS['wgms_rgi_links'] = cp['wgms_rgi_links'] @@ -307,8 +286,8 @@ def initialize(file=None): PARAMS[_k] = cp.as_bool(_k) # Make sure we have a proper cache dir - from oggm.utils import _download_oggm_files - _download_oggm_files() + from oggm.utils import download_oggm_files + download_oggm_files() # Parse RGI metadata _d = os.path.join(CACHE_DIR, 'oggm-sample-data-master', 'rgi_meta') @@ -327,10 +306,9 @@ def initialize(file=None): 'optimize_inversion_params', 'use_multiple_flowlines', 'leclercq_rgi_links', 'optimize_thick', 'mpi_recv_buf_size', 'tstar_search_window', 'use_bias_for_run', 'run_period', - 'prcp_scaling_factor', 'use_intersects', - 'dl_cache_dir', 'dl_cache_readonly'] + 'prcp_scaling_factor', 'use_intersects'] for k in ltr: - del cp[k] + cp.pop(k, None) # Other params are floats for k in cp: @@ -343,14 +321,61 @@ def initialize(file=None): IS_INITIALIZED = True +def oggm_static_paths(): + """Initialise the OGGM paths from the config file.""" + + global PATHS, PARAMS + + # See if the file is there, if not create it + if not os.path.exists(CONFIG_FILE): + dldir = os.path.join(os.path.expanduser('~'), 'OGGM_DOWNLOADS') + config = ConfigObj() + config['dl_cache_dir'] = dldir + config['tmp_dir'] = os.path.join(dldir, 'tmp') + config['topo_dir'] = os.path.join(dldir, 'topo') + config['cru_dir'] = os.path.join(dldir, 'cru') + config['rgi_dir'] = os.path.join(dldir, 'rgi') + config['has_internet'] = True + config.filename = CONFIG_FILE + config.write() + + # OK, read in the file + try: + config = ConfigObj(CONFIG_FILE, file_error=True) + except (ConfigObjError, IOError) as e: + log.critical('Config file could not be parsed (%s): %s', + CONFIG_FILE, e) + sys.exit() + + # Check that all keys are here + for k in ['dl_cache_dir', 'tmp_dir', 'topo_dir', + 'cru_dir', 'rgi_dir', 'has_internet']: + if k not in config: + raise RuntimeError('The oggm config file ({}) should have an' + 'entry for {}.'.format(CONFIG_FILE, k)) + + # Override defaults with env variables + if os.environ.get('OGGM_DOWNLOAD_CACHE') is not None: + config['dl_cache_dir'] = os.environ.get('OGGM_DOWNLOAD_CACHE') + + # Fill the PATH dict + for k, v in config.iteritems(): + if not v and '_dir' in k: + v = os.path.join(config['dl_cache_dir'], k.replace('_dir', '')) + PATHS[k] = os.path.abspath(os.path.expanduser(v)) + + # Other + PARAMS['has_internet'] = config.as_bool('has_internet') + + def get_lru_handler(tmpdir=None, maxsize=100, ending='.tif'): """LRU handler for a given temporary directory (singleton). Parameters ---------- tmpdir : str - path to the temporary directory to handle. Default is "tmp" in the - working directory. + path to the temporary directory to handle. Default is + ``cfg.PATHS['tmp_dir']``. maxsize : int the max number of files to keep in the directory ending : str @@ -360,7 +385,7 @@ def get_lru_handler(tmpdir=None, maxsize=100, ending='.tif'): # see if we're set up if tmpdir is None: - tmpdir = os.path.join(PATHS['working_dir'], 'tmp') + tmpdir = PATHS['tmp_dir'] if not os.path.exists(tmpdir): os.makedirs(tmpdir) diff --git a/oggm/params.cfg b/oggm/params.cfg index 25bd95ff6..1f572f306 100644 --- a/oggm/params.cfg +++ b/oggm/params.cfg @@ -3,31 +3,7 @@ ### Input/Output paths. Set to ~ to default to home directory # Where OGGM will write its output -working_dir = ~ - -# Input directory for topography data. It can be emtpy: in that case OGGM will -# download the data from the SRTM (open) and ASTER (not open) databases -topo_dir = ~ - -# Input directory for CRU TS data. It can be emtpy: in that case OGGM will -# download the data from https://crudata.uea.ac.uk -cru_dir = ~ - -# Input directory for RGI data. It can be emtpy: in that case OGGM will -# download the data -rgi_dir = ~ - -# Cache directory for downloads. If set, all downloaded data will be stored here -# initially. If a file requested for download is present in the cache, it will be -# copied from there instead of downloaded again. -# Can be overridden via OGGM_DOWNLOAD_CACHE environment variable. -dl_cache_dir = - -# Set to True if the download cache is readonly. -# Downloads will be written to their normal location if neccessary. -# Alternatively set the OGGM_DOWNLOAD_CACHE_RO environment variable to indicate -# a read-only cache dir. -dl_cache_readonly = False +working_dir = # Users can specify their own topography file if they want to. In this case, # the topo_dir above will be ignored. This is useful for testing, or if you @@ -36,7 +12,7 @@ dl_cache_readonly = False dem_file = # Users can specify their own climate dataset if they want to. In this case, -# the cru_dir above will be ignored. This is useful for testing, or if you +# the static cru_dir will be ignored. This is useful for testing, or if you # are simulating a single region with better data. # The format of the file is not (yet) very flexible. See the HISTALP data # in the sample-data folder for an example: diff --git a/oggm/tests/__init__.py b/oggm/tests/__init__.py index 317d8b831..0bd83f4ff 100644 --- a/oggm/tests/__init__.py +++ b/oggm/tests/__init__.py @@ -65,7 +65,7 @@ else: # distribute the tests RUN_SLOW_TESTS = True - env = os.environ.get('OGGM_ENV') + env = os.environ.get('OGGM_TEST_ENV') if env == 'prepro': RUN_PREPRO_TESTS = True RUN_MODEL_TESTS = False diff --git a/oggm/tests/test_prepro.py b/oggm/tests/test_prepro.py index 44ff93a40..a69c0bf07 100644 --- a/oggm/tests/test_prepro.py +++ b/oggm/tests/test_prepro.py @@ -596,10 +596,10 @@ def test_distribute_climate_cru(self): climate.process_histalp_nonparallel([gdirs[0]]) cru_dir = get_demo_file('cru_ts3.23.1901.2014.tmp.dat.nc') cru_dir = os.path.dirname(cru_dir) - cfg.PATHS['climate_file'] = '~' + cfg.PATHS['climate_file'] = '' cfg.PATHS['cru_dir'] = cru_dir climate.process_cru_data(gdirs[1]) - cfg.PATHS['cru_dir'] = '~' + cfg.PATHS['cru_dir'] = '' cfg.PATHS['climate_file'] = get_demo_file('histalp_merged_hef.nc') ci = gdir.read_pickle('climate_info') diff --git a/oggm/tests/test_utils.py b/oggm/tests/test_utils.py index af7a39d49..c9ccc6085 100644 --- a/oggm/tests/test_utils.py +++ b/oggm/tests/test_utils.py @@ -140,10 +140,8 @@ def setUp(self): self.homedir = os.path.expanduser('~') def test_defaults(self): - expected = os.path.join(self.homedir, 'OGGM_wd') + expected = os.path.join(self.homedir, 'OGGM_WORKING_DIRECTORY') self.assertEqual(cfg.PATHS['working_dir'], expected) - expected = os.path.join(self.homedir, 'OGGM_data', 'topo') - self.assertEqual(cfg.PATHS['topo_dir'], expected) def test_pathsetter(self): cfg.PATHS['working_dir'] = os.path.join('~', 'my_OGGM_wd') @@ -154,8 +152,10 @@ def test_pathsetter(self): class TestDataFiles(unittest.TestCase): def setUp(self): + cfg.initialize() cfg.PATHS['topo_dir'] = TEST_DIR cfg.PATHS['working_dir'] = TEST_DIR + cfg.PATHS['tmp_dir'] = os.path.join(TEST_DIR, 'extract') self.reset_dir() def tearDown(self): @@ -179,9 +179,6 @@ def test_download_demo_files(self): # Data files cfg.initialize() - lf, df = utils.get_leclercq_files() - self.assertTrue(os.path.exists(lf)) - lf, df = utils.get_wgms_files() self.assertTrue(os.path.exists(lf)) @@ -267,6 +264,11 @@ def test_dem3_viewpano_zone(self): self.assertTrue(len(z) == 1) self.assertEqual('Q01', z[0]) + # normal tile + z = utils.dem3_viewpano_zone([107, 107], [69, 69]) + self.assertTrue(len(z) == 1) + self.assertEqual('R48', z[0]) + # Alps ref = sorted(['K31', 'K32', 'K33', 'L31', 'L32', 'L33', 'M31', 'M32', 'M33']) @@ -342,7 +344,7 @@ def test_srtmdownloadfails(self): # this zone does not exist zone = '41_20' - self.assertRaises(FileNotFoundError, utils._download_srtm_file, zone) + self.assertTrue(utils._download_srtm_file(zone) is None) @is_download def test_asterdownload(self): @@ -356,7 +358,8 @@ def test_asterdownload(self): @is_download def test_gimp(self): fp, z = utils.get_topo_file([], [], rgi_region=5) - self.assertTrue(os.path.exists(fp)) + self.assertTrue(os.path.exists(fp[0])) + self.assertEqual(z, 'GIMP') @is_download def test_iceland(self): @@ -366,7 +369,7 @@ def test_iceland(self): @is_download def test_asterdownloadfails(self): - # this zone does exist and file should be small enough for download + # this zone does not exist zone = 'bli' unit = 'S75E135' self.assertTrue(utils._download_aster_file(zone, unit) is None) @@ -381,8 +384,6 @@ def test_alternatedownload(self): @is_download def test_download_cru(self): - cfg.initialize() - tmp = cfg.PATHS['cru_dir'] cfg.PATHS['cru_dir'] = TEST_DIR @@ -394,8 +395,6 @@ def test_download_cru(self): @is_download def test_download_rgi(self): - cfg.initialize() - tmp = cfg.PATHS['rgi_dir'] cfg.PATHS['rgi_dir'] = TEST_DIR @@ -415,3 +414,26 @@ def test_download_dem3_viewpano(self): zone = 'U44' fp = utils._download_dem3_viewpano(zone) self.assertTrue(os.path.exists(fp)) + + @is_download + def test_download_dem3_viewpano_fails(self): + + # this zone does not exist + zone = 'dummy' + fp = utils._download_dem3_viewpano(zone) + self.assertTrue(fp is None) + + @is_download + def test_auto_topo(self): + # Test for combine + fdem, src = utils.get_topo_file([6, 14], [41, 41]) + self.assertEqual(src, 'SRTM') + self.assertEqual(len(fdem), 2) + for fp in fdem: + self.assertTrue(os.path.exists(fp)) + + fdem, src = utils.get_topo_file([-143, -131], [61, 61]) + self.assertEqual(src, 'DEM3') + self.assertEqual(len(fdem), 3) + for fp in fdem: + self.assertTrue(os.path.exists(fp)) diff --git a/oggm/tests/test_workflow.py b/oggm/tests/test_workflow.py index 9e24e79f8..ebf575f0c 100644 --- a/oggm/tests/test_workflow.py +++ b/oggm/tests/test_workflow.py @@ -116,7 +116,7 @@ def up_to_inversion(reset=False): # Use histalp for the actual inversion test cfg.PARAMS['temp_use_local_gradient'] = True cfg.PATHS['climate_file'] = get_demo_file('HISTALP_oetztal.nc') - cfg.PATHS['cru_dir'] = '~' + cfg.PATHS['cru_dir'] = '' workflow.climate_tasks(gdirs) with open(CLI_LOGF, 'wb') as f: pickle.dump('histalp', f) @@ -147,7 +147,7 @@ def up_to_distrib(reset=False): # Use CRU cfg.PARAMS['prcp_scaling_factor'] = 2.5 cfg.PARAMS['temp_use_local_gradient'] = False - cfg.PATHS['climate_file'] = '~' + cfg.PATHS['climate_file'] = '' cru_dir = get_demo_file('cru_ts3.23.1901.2014.tmp.dat.nc') cfg.PATHS['cru_dir'] = os.path.dirname(cru_dir) with warnings.catch_warnings(): diff --git a/oggm/utils.py b/oggm/utils.py index 99aff3607..47e17798e 100644 --- a/oggm/utils.py +++ b/oggm/utils.py @@ -23,7 +23,6 @@ import json import time import fnmatch -import subprocess # External libs import geopandas as gpd @@ -51,6 +50,9 @@ import oggm.cfg as cfg from oggm.cfg import CUMSEC_IN_MONTHS, SEC_IN_YEAR, BEGINSEC_IN_MONTHS +# Module logger +logger = logging.getLogger(__name__) + SAMPLE_DATA_GH_REPO = 'OGGM/oggm-sample-data' CRU_SERVER = 'https://crudata.uea.ac.uk/cru/data/hrg/cru_ts_3.24.01/cruts' \ '.1701201703.v3.24.01/' @@ -89,63 +91,49 @@ def _get_download_lock(): return lock -def _cached_download_helper(cache_obj_name, dl_func): +def _cached_download_helper(dest_path, dl_func): """Helper function for downloads. - Takes care of checking if the file is already cached. - Only calls the actuall download function when no cached version exists.""" - cache_dir = cfg.PATHS['dl_cache_dir'] - cache_ro = cfg.PARAMS['dl_cache_readonly'] - fb_cache_dir = os.path.join(cfg.PATHS['working_dir'], 'cache') - - if not cache_dir: - cache_dir = fb_cache_dir - cache_ro = False - - cache_path = os.path.join(cache_dir, cache_obj_name) - if os.path.isfile(cache_path): - return cache_path - - fb_path = os.path.join(fb_cache_dir, cache_obj_name) - if os.path.isfile(fb_path): - return fb_path + + Takes care of checking if the file is already downloaded. + Only calls the actual download function when no downloaded version exists. + """ - if cache_ro: - cache_path = fb_path + if os.path.isfile(dest_path): + return dest_path - mkdir(os.path.dirname(cache_path)) + mkdir(os.path.dirname(dest_path)) try: - cache_path = dl_func(cache_path) + dest_path = dl_func(dest_path) except: - if os.path.exists(cache_path): - os.remove(cache_path) + if os.path.exists(dest_path): + os.remove(dest_path) raise - return cache_path + return dest_path -def _urlretrieve(url, *args, **kwargs): +def _urlretrieve(url, dest_path, *args, **kwargs): """Wrapper around urlretrieve, to implement our caching logic. + Instead of accepting a destination path, it decided where to store the file - and returns the local path.""" - log = logging.getLogger('download') - - cache_obj_name = urlparse(url) - cache_obj_name = cache_obj_name.netloc + cache_obj_name.path + and returns the local path. + """ - def _dlf(cache_path): - log.info("Downloading %s to %s..." % (url, cache_path)) - urlretrieve(url, cache_path, *args, **kwargs) - return cache_path + def _dlf(_dest_path): + logger.info("Downloading %s to %s..." % (url, _dest_path)) + urlretrieve(url, _dest_path, *args, **kwargs) + return _dest_path - return _cached_download_helper(cache_obj_name, _dlf) + return _cached_download_helper(dest_path, _dlf) -def progress_urlretrieve(url): - """Downloads a file, returns its local path, and shows a progressbar.""" +def _progress_urlretrieve(url, dest_path): + """Downloads a file to dest_path if dest_path doesn't exist yet.""" try: from progressbar import DataTransferBar, UnknownLength pbar = DataTransferBar() + def _upd(count, size, total): if pbar.max_value is None: if total > 0: @@ -154,17 +142,53 @@ def _upd(count, size, total): pbar.start(UnknownLength) pbar.update(min(count * size, total)) sys.stdout.flush() - res = _urlretrieve(url, reporthook=_upd) + res = _urlretrieve(url, dest_path, reporthook=_upd) try: pbar.finish() except: pass return res except ImportError: - return _urlretrieve(url) + return _urlretrieve(url, dest_path) + + +def aws_file_download(aws_path, dest_path): + with _get_download_lock(): + return _aws_file_download_unlocked(aws_path, dest_path) -def file_downloader(www_path, retry_max=5): +def _aws_file_download_unlocked(aws_path, dest_path): + """Download a file from the AWS drive s3://astgtmv2/ + + **Note:** you need AWS credentials for this to work. + + Parameters + ---------- + aws_path: path relative to s3://astgtmv2/ + dest_path: where to copy the file + """ + + while aws_path.startswith('/'): + aws_path = aws_path[1:] + + def _dlf(_dest_path): + import boto3 + import botocore + client = boto3.client('s3') + logger.info("Downloading %s from s3 to %s..." % (aws_path, _dest_path)) + try: + client.download_file('astgtmv2', aws_path, _dest_path) + except botocore.exceptions.ClientError as e: + if e.response['Error']['Code'] == "404": + return None + else: + raise + return _dest_path + + return _cached_download_helper(dest_path, _dlf) + + +def file_downloader(www_path, dest_path, retry_max=5): """A slightly better downloader: it tries more than once.""" local_path = None @@ -173,7 +197,7 @@ def file_downloader(www_path, retry_max=5): # Try to download try: retry_counter += 1 - local_path = progress_urlretrieve(www_path) + local_path = _progress_urlretrieve(www_path, dest_path) # if no error, exit break except HTTPError as err: @@ -182,23 +206,23 @@ def file_downloader(www_path, retry_max=5): # Ok so this *should* be an ocean tile return None elif err.code >= 500 and err.code < 600: - print("Downloading %s failed with HTTP error %s, " - "retrying in 10 seconds... %s/%s" % - (www_path, err.code, retry_counter, retry_max)) + logger.info("Downloading %s failed with HTTP error %s, " + "retrying in 10 seconds... %s/%s" % + (www_path, err.code, retry_counter, retry_max)) time.sleep(10) continue else: raise except ContentTooShortError: - print("Downloading %s failed with ContentTooShortError" - " error %s, retrying in 10 seconds... %s/%s" % - (www_path, err.code, retry_counter, retry_max)) + logger.info("Downloading %s failed with ContentTooShortError" + " error %s, retrying in 10 seconds... %s/%s" % + (www_path, err.code, retry_counter, retry_max)) time.sleep(10) continue - # See if we managed + # See if we managed (fail is allowed) if not local_path or not os.path.exists(local_path): - raise RuntimeError('Downloading %s failed.' % www_path) + logger.warning('Downloading %s failed.' % www_path) return local_path @@ -282,7 +306,7 @@ def append(self, fpath): self.purge() -def _download_oggm_files(): +def download_oggm_files(): with _get_download_lock(): return _download_oggm_files_unlocked() @@ -295,6 +319,7 @@ def _download_oggm_files_unlocked(): master_zip_url = 'https://github.com/%s/archive/master.zip' % \ SAMPLE_DATA_GH_REPO rename_output = False + dest_path = os.path.join(cfg.CACHE_DIR, 'oggm-sample-data.zip') shafile = os.path.join(cfg.CACHE_DIR, 'oggm-sample-data-commit.txt') odir = os.path.join(cfg.CACHE_DIR) sdir = os.path.join(cfg.CACHE_DIR, 'oggm-sample-data-master') @@ -310,7 +335,7 @@ def _download_oggm_files_unlocked(): last_mod = 0 # test only every hour - if time.time() - last_mod > 3600: + if (time.time() - last_mod) > 3600: write_sha = True try: # this might fail with HTTP 403 when server overload @@ -333,25 +358,15 @@ def _download_oggm_files_unlocked(): rename_output = "oggm-sample-data-%s" % master_sha except (HTTPError, URLError): master_sha = 'error' + write_sha = False else: write_sha = False # download only if necessary if not os.path.exists(sdir): - ofile = progress_urlretrieve(master_zip_url) - - # Trying to make the download more robust - try: - with zipfile.ZipFile(ofile) as zf: - zf.extractall(odir) - except zipfile.BadZipfile: - # try another time - if os.path.exists(ofile): - os.remove(ofile) - ofile = progress_urlretrieve(master_zip_url) - with zipfile.ZipFile(ofile) as zf: - zf.extractall(odir) - + ofile = file_downloader(master_zip_url, dest_path) + with zipfile.ZipFile(ofile) as zf: + zf.extractall(odir) # rename dir in case of download from different url if rename_output: fdir = os.path.join(cfg.CACHE_DIR, rename_output) @@ -388,7 +403,7 @@ def _download_srtm_file_unlocked(zone): """ # extract directory - tmpdir = os.path.join(cfg.PATHS['working_dir'], 'tmp') + tmpdir = cfg.PATHS['tmp_dir'] mkdir(tmpdir) outpath = os.path.join(tmpdir, 'srtm_' + zone + '.tif') @@ -397,12 +412,18 @@ def _download_srtm_file_unlocked(zone): return outpath # Did we download it yet? - ifile = 'http://droppr.org/srtm/v4.1/6_5x5_TIFs/srtm_' + zone + '.zip' - dfile = file_downloader(ifile) + wwwfile = 'http://droppr.org/srtm/v4.1/6_5x5_TIFs/srtm_' + zone + '.zip' + dest_file = os.path.join(cfg.PATHS['topo_dir'], 'srtm', + 'srtm_' + zone + '.zip') + dest_file = file_downloader(wwwfile, dest_file) + + # None means we tried hard but we couldn't find it + if not dest_file: + return None # ok we have to extract it if not os.path.exists(outpath): - with zipfile.ZipFile(dfile) as zf: + with zipfile.ZipFile(dest_file) as zf: zf.extractall(tmpdir) # See if we're good, don't overfill the tmp directory @@ -421,7 +442,7 @@ def _download_dem3_viewpano_unlocked(zone): """ # extract directory - tmpdir = os.path.join(cfg.PATHS['working_dir'], 'tmp') + tmpdir = cfg.PATHS['tmp_dir'] mkdir(tmpdir) outpath = os.path.join(tmpdir, zone+'.tif') @@ -430,17 +451,25 @@ def _download_dem3_viewpano_unlocked(zone): return outpath # OK, so see if downloaded already + dest_path = os.path.join(cfg.PATHS['topo_dir'], 'dem3') # some files have a newer version 'v2' if zone in ['R33', 'R34', 'R35', 'R36', 'R37', 'R38', 'Q32', 'Q33', 'Q34', 'Q35', 'Q36', 'Q37', 'Q38', 'Q39', 'Q40', 'P31', 'P32', 'P33', 'P34', 'P35', 'P36', 'P37', 'P38', 'P39', 'P40']: ifile = 'http://viewfinderpanoramas.org/dem3/' + zone + 'v2.zip' + dest_path = os.path.join(dest_path, zone + 'v2.zip') elif zone in ['01-15', '16-30', '31-45', '46-60']: ifile = 'http://viewfinderpanoramas.org/ANTDEM3/' + zone + '.zip' + dest_path = os.path.join(dest_path, 'antdem_' + zone + '.zip') else: ifile = 'http://viewfinderpanoramas.org/dem3/' + zone + '.zip' + dest_path = os.path.join(dest_path, zone + '.zip') + + dfile = file_downloader(ifile, dest_path) - dfile = file_downloader(ifile) + # None means we tried hard but we couldn't find it + if not dfile: + return None # ok we have to extract it with zipfile.ZipFile(dfile) as zf: @@ -509,12 +538,13 @@ def _download_aster_file_unlocked(zone, unit): fbname = 'ASTGTM2_' + zone + '.zip' dirbname = 'UNIT_' + unit # extract directory - tmpdir = os.path.join(cfg.PATHS['working_dir'], 'tmp') + tmpdir = cfg.PATHS['tmp_dir'] mkdir(tmpdir) outpath = os.path.join(tmpdir, 'ASTGTM2_' + zone + '_dem.tif') aws_path = 'ASTGTM_V2/' + dirbname + '/' + fbname - dfile = _aws_file_download_unlocked(aws_path) + dest_path = os.path.join(cfg.PATHS['topo_dir'], 'aster', fbname) + dfile = _aws_file_download_unlocked(aws_path, dest_path) if dfile is not None: # Ok so the tile is a valid one @@ -539,24 +569,29 @@ def _download_alternate_topo_file_unlocked(fname): """Checks if the special topo data is in the directory and if not, download it from AWS. - You need AWS cli and AWS credentials for this. Quoting Timo: - - $ aws configure + You need AWS cli and AWS credentials for this. Quoting Timo:: - Key ID und Secret you should have - Region is eu-west-1 and Output Format is json. + $ aws configure + + Key ID und Secret you should have + Region is eu-west-1 and Output Format is json. + """ # extract directory - tmpdir = os.path.join(cfg.PATHS['working_dir'], 'tmp') + tmpdir = cfg.PATHS['tmp_dir'] mkdir(tmpdir) outpath = os.path.join(tmpdir, fname) + # Download directory + dl_dir = os.path.join(cfg.PATHS['topo_dir'], 'alternate') + aws_path = 'topo/' + fname + '.zip' - dfile = _aws_file_download_unlocked(aws_path) + dest_path = os.path.join(dl_dir, fname + '.zip') + dfile = _aws_file_download_unlocked(aws_path, dest_path) if not os.path.exists(outpath): - print('Extracting ' + fname + '.zip ...') + logger.info('Extracting ' + fname + '.zip to ' + outpath + '...') with zipfile.ZipFile(dfile) as zf: zf.extractall(tmpdir) @@ -586,47 +621,6 @@ def _get_centerline_lonlat(gdir): return olist -def aws_file_download(aws_path): - with _get_download_lock(): - return _aws_file_download_unlocked(aws_path) - - -def _aws_file_download_unlocked(aws_path): - """Download a file from the AWS drive s3://astgtmv2/ - - **Note:** you need AWS credentials for this to work. - - Parameters - ---------- - aws_path: path relative to s3://astgtmv2/ - local_path: where to copy the file - reset: overwrite the local file - """ - - log = logging.getLogger('download') - - while aws_path.startswith('/'): - aws_path = aws_path[1:] - - cache_obj_name = 'astgtmv2/' + aws_path - - def _dlf(cache_path): - import boto3 - import botocore - client = boto3.client('s3') - log.info("Downloading %s from s3 to %s..." % (aws_path, cache_path)) - try: - client.download_file('astgtmv2', aws_path, cache_path) - except botocore.exceptions.ClientError as e: - if e.response['Error']['Code'] == "404": - return None - else: - raise - return cache_path - - return _cached_download_helper(cache_obj_name, _dlf) - - def mkdir(path, reset=False): """Checks if directory exists and if not, create one. @@ -1138,7 +1132,7 @@ def aster_zone(lon_ex, lat_ex): def get_demo_file(fname): """Returns the path to the desired OGGM file.""" - d = _download_oggm_files() + d = download_oggm_files() if fname in d: return d[fname] else: @@ -1148,7 +1142,7 @@ def get_demo_file(fname): def get_cru_cl_file(): """Returns the path to the unpacked CRU CL file (is in sample data).""" - _download_oggm_files() + download_oggm_files() sdir = os.path.join(cfg.CACHE_DIR, 'oggm-sample-data-master', 'cru') fpath = os.path.join(sdir, 'cru_cl2.nc') @@ -1169,9 +1163,9 @@ def get_wgms_files(): (file, dir): paths to the files """ - if cfg.PATHS['wgms_rgi_links'] != '': + if cfg.PATHS['wgms_rgi_links']: if not os.path.exists(cfg.PATHS['wgms_rgi_links']): - raise ValueError('wrong wgms_rgi_links path provided.') + raise ValueError('Wrong wgms_rgi_links path provided.') # User provided data outf = cfg.PATHS['wgms_rgi_links'] datadir = os.path.join(os.path.dirname(outf), 'mbdata') @@ -1180,7 +1174,7 @@ def get_wgms_files(): return outf, datadir # Roll our own - _download_oggm_files() + download_oggm_files() sdir = os.path.join(cfg.CACHE_DIR, 'oggm-sample-data-master', 'wgms') outf = os.path.join(sdir, 'rgi_wgms_links_20170217_RGIV5.csv') assert os.path.exists(outf) @@ -1189,52 +1183,22 @@ def get_wgms_files(): return outf, datadir -def get_leclercq_files(): - """Get the path to the default Leclercq-RGI link file and the data dir. - - Returns - ------- - (file, dir): paths to the files - """ - - if cfg.PATHS['leclercq_rgi_links'] != '': - if not os.path.exists(cfg.PATHS['leclercq_rgi_links']): - raise ValueError('wrong leclercq_rgi_links path provided.') - # User provided data - outf = cfg.PATHS['leclercq_rgi_links'] - # TODO: This doesnt exist yet - datadir = os.path.join(os.path.dirname(outf), 'lendata') - # if not os.path.exists(datadir): - # raise ValueError('The Leclercq data directory is missing') - return outf, datadir - - # Roll our own - _download_oggm_files() - sdir = os.path.join(cfg.CACHE_DIR, 'oggm-sample-data-master', 'leclercq') - outf = os.path.join(sdir, 'rgi_leclercq_links_2012_RGIV5.csv') - assert os.path.exists(outf) - # TODO: This doesnt exist yet - datadir = os.path.join(sdir, 'lendata') - # assert os.path.exists(datadir) - return outf, datadir - - def get_glathida_file(): - """Get the path to the default WGMS-RGI link file and the data dir. + """Get the path to the default GlaThiDa-RGI link file. Returns ------- - (file, dir): paths to the files + file: paths to the file """ - if cfg.PATHS['glathida_rgi_links'] != '': + if cfg.PATHS['glathida_rgi_links']: if not os.path.exists(cfg.PATHS['glathida_rgi_links']): - raise ValueError('wrong glathida_rgi_links path provided.') + raise ValueError('Wrong glathida_rgi_links path provided.') # User provided data return cfg.PATHS['glathida_rgi_links'] # Roll our own - _download_oggm_files() + download_oggm_files() sdir = os.path.join(cfg.CACHE_DIR, 'oggm-sample-data-master', 'glathida') outf = os.path.join(sdir, 'rgi_glathida_links_2014_RGIV5.csv') assert os.path.exists(outf) @@ -1242,37 +1206,40 @@ def get_glathida_file(): def get_rgi_dir(): - with _get_download_lock(): - return _get_rgi_dir_unlocked() - - -def _get_rgi_dir_unlocked(): - """ - Returns a path to the RGI directory. + """Returns a path to the RGI directory. - If the files are not present, download them. + If the RGI files are not present, download them. Returns ------- path to the RGI directory """ - # Be sure the user gave a sensible path to the rgi dir + with _get_download_lock(): + return _get_rgi_dir_unlocked() + + +def _get_rgi_dir_unlocked(): + rgi_dir = cfg.PATHS['rgi_dir'] - if not os.path.exists(rgi_dir): - raise ValueError('The RGI data directory does not exist!') + + # Be sure the user gave a sensible path to the RGI dir + if not rgi_dir: + raise ValueError('The RGI data directory has to be' + 'specified explicitly.') + rgi_dir = os.path.abspath(os.path.expanduser(rgi_dir)) + mkdir(rgi_dir) bname = 'rgi50.zip' - tf = 'http://www.glims.org/RGI/rgi50_files/' + bname + dfile = 'http://www.glims.org/RGI/rgi50_files/' + bname test_file = os.path.join(rgi_dir, '000_rgi50_manifest.txt') - ofile = progress_urlretrieve(tf) - # if not there download it - if not os.path.exists(test_file): # pragma: no cover + if not os.path.exists(test_file): + # if not there download it + ofile = file_downloader(dfile, os.path.join(rgi_dir, bname)) # Extract root with zipfile.ZipFile(ofile) as zf: zf.extractall(rgi_dir) - # Extract subdirs pattern = '*_rgi50_*.zip' for root, dirs, files in os.walk(cfg.PATHS['rgi_dir']): @@ -1282,18 +1249,11 @@ def _get_rgi_dir_unlocked(): ex_root = ofile.replace('.zip', '') mkdir(ex_root) zf.extractall(ex_root) - return rgi_dir def get_cru_file(var=None): - with _get_download_lock(): - return _get_cru_file_unlocked(var) - - -def _get_cru_file_unlocked(var=None): - """ - Returns a path to the desired CRU TS file. + """Returns a path to the desired CRU TS file. If the file is not present, download it. @@ -1305,12 +1265,20 @@ def _get_cru_file_unlocked(var=None): ------- path to the CRU file """ + with _get_download_lock(): + return _get_cru_file_unlocked(var) + + +def _get_cru_file_unlocked(var=None): cru_dir = cfg.PATHS['cru_dir'] # Be sure the user gave a sensible path to the climate dir - if cru_dir == '' or not os.path.exists(cru_dir): - raise ValueError('The CRU data directory({}) does not exist!'.format(cru_dir)) + if not cru_dir: + raise ValueError('The CRU data directory has to be' + 'specified explicitly.') + cru_dir = os.path.abspath(os.path.expanduser(cru_dir)) + mkdir(cru_dir) # Be sure input makes sense if var not in ['tmp', 'pre']: @@ -1324,16 +1292,17 @@ def _get_cru_file_unlocked(var=None): ofile = search[0] elif len(search) > 1: raise ValueError('The CRU filename should match "{}".'.format(bname)) - else: # pragma: no cover + else: # if not there download it - tf = CRU_SERVER + '{}/cru_ts3.24.01.1901.2015.{}.dat.nc.gz'.format(var, - var) - dlfile = progress_urlretrieve(tf) + dest_path = 'cru_ts3.24.01.1901.2015.{}.dat.nc.gz'.format(var) + tf = CRU_SERVER + '{}/'.format(var) + dest_path + dest_path = os.path.join(cru_dir, dest_path) + dlfile = file_downloader(tf, dest_path) + ofile = dlfile.replace('.gz', '') with gzip.GzipFile(dlfile) as zf: with open(ofile, 'wb') as outfile: for line in zf: outfile.write(line) - return ofile @@ -1343,16 +1312,18 @@ def get_topo_file(lon_ex, lat_ex, rgi_region=None, source=None): If the needed files for covering the extent are not present, download them. - By default it will be referred to SRTM for [-60S;60N], and - a corrected DEM3 from viewfinderpanoramas.org elsewhere. However, a - user-specified data source can be given with the ``source`` keyword. + By default it will be referred to SRTM for [-60S; 60N], GIMP for Greenland, + RAMP for Antarctica, and a corrected DEM3 (viewfinderpanoramas.org) + elsewhere. + + A user-specified data source can be given with the ``source`` keyword. Parameters ---------- lon_ex : tuple, required - a (min_lon, max_lon) tuple deliminating the requested area longitudes + a (min_lon, max_lon) tuple delimiting the requested area longitudes lat_ex : tuple, required - a (min_lat, max_lat) tuple deliminating the requested area latitudes + a (min_lat, max_lat) tuple delimiting the requested area latitudes rgi_region : int, optional the RGI region number (required for the GIMP DEM) source : str or list of str, optional @@ -1375,7 +1346,7 @@ def get_topo_file(lon_ex, lat_ex, rgi_region=None, source=None): demf, source_str = get_topo_file(lon_ex, lat_ex, rgi_region=rgi_region, source=s) - if demf: + if demf[0]: return demf, source_str # Did the user specify a specific DEM file? @@ -1390,8 +1361,8 @@ def get_topo_file(lon_ex, lat_ex, rgi_region=None, source=None): if source == 'GIMP' or (rgi_region is not None and int(rgi_region) == 5): source = 'GIMP' if source is None else source if source == 'GIMP': - gimp_file = _download_alternate_topo_file('gimpdem_90m.tif') - return [gimp_file], source + _file = _download_alternate_topo_file('gimpdem_90m.tif') + return [_file], source # Same for Antarctica if source == 'RAMP' or (rgi_region is not None and int(rgi_region) == 19): @@ -1401,12 +1372,12 @@ def get_topo_file(lon_ex, lat_ex, rgi_region=None, source=None): else: source = 'RAMP' if source is None else source if source == 'RAMP': - gimp_file = _download_alternate_topo_file('AntarcticDEM_wgs84.tif') - return [gimp_file], source + _file = _download_alternate_topo_file('AntarcticDEM_wgs84.tif') + return [_file], source # Anywhere else on Earth we check for DEM3, ASTER, or SRTM if (np.min(lat_ex) < -60.) or (np.max(lat_ex) > 60.) or \ - source == 'DEM3' or source == 'ASTER': + (source == 'DEM3') or (source == 'ASTER'): # default is DEM3 source = 'DEM3' if source is None else source if source == 'DEM3': From 873309075d70feac29ad4185db951a7d764823e7 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Tue, 18 Apr 2017 17:31:06 +0200 Subject: [PATCH 2/3] Better defaults --- oggm/cfg.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/oggm/cfg.py b/oggm/cfg.py index 007440d2f..0e922c43b 100644 --- a/oggm/cfg.py +++ b/oggm/cfg.py @@ -331,10 +331,10 @@ def oggm_static_paths(): dldir = os.path.join(os.path.expanduser('~'), 'OGGM_DOWNLOADS') config = ConfigObj() config['dl_cache_dir'] = dldir - config['tmp_dir'] = os.path.join(dldir, 'tmp') - config['topo_dir'] = os.path.join(dldir, 'topo') - config['cru_dir'] = os.path.join(dldir, 'cru') - config['rgi_dir'] = os.path.join(dldir, 'rgi') + config['tmp_dir'] = '' + config['topo_dir'] = '' + config['cru_dir'] = '' + config['rgi_dir'] = '' config['has_internet'] = True config.filename = CONFIG_FILE config.write() @@ -351,16 +351,22 @@ def oggm_static_paths(): for k in ['dl_cache_dir', 'tmp_dir', 'topo_dir', 'cru_dir', 'rgi_dir', 'has_internet']: if k not in config: - raise RuntimeError('The oggm config file ({}) should have an' + raise RuntimeError('The oggm config file ({}) should have an ' 'entry for {}.'.format(CONFIG_FILE, k)) - # Override defaults with env variables + # Override defaults with env variables if available if os.environ.get('OGGM_DOWNLOAD_CACHE') is not None: config['dl_cache_dir'] = os.environ.get('OGGM_DOWNLOAD_CACHE') + if not config['dl_cache_dir']: + raise RuntimeError('At the very least, the "dl_cache_dir" entry ' + 'should be provided in the oggm config file ' + '({})'.format(CONFIG_FILE, k)) + # Fill the PATH dict for k, v in config.iteritems(): if not v and '_dir' in k: + # defaults to the cache dir v = os.path.join(config['dl_cache_dir'], k.replace('_dir', '')) PATHS[k] = os.path.abspath(os.path.expanduser(v)) From f9d3e5509fb67d2e21a30ecdc6161482cc02c611 Mon Sep 17 00:00:00 2001 From: Fabien Maussion Date: Tue, 18 Apr 2017 18:57:36 +0200 Subject: [PATCH 3/3] Windows bug --- oggm/cfg.py | 3 ++- oggm/tests/test_prepro.py | 1 + oggm/utils.py | 9 +++++---- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/oggm/cfg.py b/oggm/cfg.py index 0e922c43b..e1c39f591 100644 --- a/oggm/cfg.py +++ b/oggm/cfg.py @@ -239,7 +239,8 @@ def initialize(file=None): PATHS['working_dir'] = cp['working_dir'] # Default if not PATHS['working_dir']: - PATHS['working_dir'] = os.path.expanduser('~/OGGM_WORKING_DIRECTORY') + PATHS['working_dir'] = os.path.join(os.path.expanduser('~'), + 'OGGM_WORKING_DIRECTORY') PATHS['dem_file'] = cp['dem_file'] PATHS['climate_file'] = cp['climate_file'] PATHS['wgms_rgi_links'] = cp['wgms_rgi_links'] diff --git a/oggm/tests/test_prepro.py b/oggm/tests/test_prepro.py index a69c0bf07..8c4b53099 100644 --- a/oggm/tests/test_prepro.py +++ b/oggm/tests/test_prepro.py @@ -1434,6 +1434,7 @@ def to_optimize(x): def test_continue_on_error(self): cfg.CONTINUE_ON_ERROR = True + cfg.PATHS['working_dir'] = self.testdir hef_file = get_demo_file('Hintereisferner.shp') entity = gpd.GeoDataFrame.from_file(hef_file).iloc[0] diff --git a/oggm/utils.py b/oggm/utils.py index 47e17798e..b87755477 100644 --- a/oggm/utils.py +++ b/oggm/utils.py @@ -1892,10 +1892,11 @@ def __repr__(self): summary += [' Area: ' + str(self.rgi_area_km2) + ' mk2'] summary += [' Lon, Lat: (' + str(self.cenlon) + ', ' + str(self.cenlat) + ')'] - summary += [' Grid (nx, ny): (' + str(self.grid.nx) + ', ' + - str(self.grid.ny) + ')'] - summary += [' Grid (dx, dy): (' + str(self.grid.dx) + ', ' + - str(self.grid.dy) + ')'] + if os.path.isfile(self.get_filepath('glacier_grid')): + summary += [' Grid (nx, ny): (' + str(self.grid.nx) + ', ' + + str(self.grid.ny) + ')'] + summary += [' Grid (dx, dy): (' + str(self.grid.dx) + ', ' + + str(self.grid.dy) + ')'] return '\n'.join(summary) + '\n' @lazy_property