Skip to content

Commit

Permalink
first cut at getting a data downloader (#580)
Browse files Browse the repository at this point in the history
* first cut at getting a data downloader

* fix typos noted by @durack1
  • Loading branch information
doutriaux1 committed Oct 25, 2018
1 parent 1635473 commit 8cc11d6
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 0 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
scripts = ['src/python/pcmdi/scripts/mean_climate_driver.py',
'src/python/pcmdi/scripts/pcmdi_compute_climatologies.py',
'src/python/misc/scripts/parallelize_driver.py',
'src/python/misc/scripts/get_pmp_data.py',
'src/python/monsoon_wang/scripts/mpindex_compute.py',
'src/python/monsoon_sperber/scripts/driver_monsoon_sperber.py',
]
Expand Down
50 changes: 50 additions & 0 deletions src/python/misc/scripts/get_pmp_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python
from __future__ import print_function
from pcmdi_metrics.driver.pmp_parser import PMPParser
import tempfile
import requests
import os
import cdat_info


def download_file(download_url_root, name, local_filename):
r = requests.get("%s/%s" % (download_url_root, name), stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter local_filename keep-alive new chunks
f.write(chunk)


parser = PMPParser(description='Get sample data')
parser.add_argument("--dataset", help="Download observation or sample data or both",
default="all", choices=["all", "obs", "sample"])
parser.add_argument("--version", help="which version to use", default="latest")
parser.add_argument("--server", help="which server to use",
default="https://pcmdiweb.llnl.gov/pss/pmpdata")
parser.add_argument("--version_in_path", action="store_true", default=False,
help="Append version in root path, avoids clobbering versions")
parser.add_argument(
"--output-path", help="directory where to download", default=None)
# parser.use("num_workers")
p = parser.get_parameter()

# Step1 prepare the paths to get the sample datafiles
pth = tempfile.mkdtemp()
files = []
if p.dataset in ["all", "obs"]: # ok we need obs
download_file(p.server, "obs_{}.txt".format(p.version), "obs.txt")
files.append("obs.txt")
if p.dataset in ["all", "sample"]:
download_file(p.server, "sample_{}.txt".format(p.version), "sample.txt")
files.append("sample.txt")

# Ok now we can download
for file in files:
# First do we clobber or not?
pathout = p.output_path
if p.version_in_path:
with open(file) as f:
header = f.readline().strip()
version = header.split("_")[-1]
pathout = os.path.join(p.output_path, version)
cdat_info.download_sample_data_files(file, path=pathout)

0 comments on commit 8cc11d6

Please sign in to comment.