In [1]:
# %matplotlib inline

In [2]:
from geophys_utils import CSWUtils
from geophys_utils import DataStats # https://github.com/alex-ip/geophys2netcdf/tree/develop
import os
import re
from netCDF4 import Dataset
from pprint import pprint

In [3]:
# Setup proxy as required
GA_STAFF_WIFI = False

if GA_STAFF_WIFI:
    os.environ['http_proxy'] = 'http://proxy.inno.lan:3128'
    os.environ['https_proxy'] = 'http://proxy.inno.lan:3128'

In [4]:
csw_url = 'https://ecat.ga.gov.au/geonetwork/srv/eng/csw' # GA's externally-facing eCat
#csw_url = 'https://internal.ecat.ga.gov.au/geonetwork/srv/eng/csw' # GA's internally-facing eCat
#csw_url = 'http://geonetworkrr2.nci.org.au/geonetwork/srv/eng/csw' # NCI GeoNetwork

In [5]:
# Define search parameters
keywords = "NCI, geoscientific%Information, grid, potassium" # Comma-separated list of keywords for GA
#keywords = "NCI, National Coverage, grid" # Comma-separated list of keywords for NCI
allwords = "NCI, National Coverage, grid"
#bounds = [110, -45, 160, -5] # Bounding box slightly larger than national coverage
#bounds = [115, -40, 150, -10] # Bounding box slightly smaller than national coverage
bounds = [148.996,-35.48,149.399,-35.124] # Bounding box way smaller than national coverage
#bounds = [110, -40, 160, -10] # Bounding box slightly wider than national coverage
#bounds = [115, -45, 150, -5] # Bounding box slightly taller than national coverage
#bounds = [0, 0, 1, 1] # Invalid bounding box somewhere South of England
titlewords = "onshore, gravity, grid, Australia, 2016"
#titlewords = "Joanna,Spring,uranium,profiles"
not_title = '%image%'

In [6]:
# Find all datasets of interest.
#create a csw_utils object and populate the parameters with search parameters
cswu = CSWUtils(csw_url)
record_generator = cswu.query_csw(keyword_list=keywords,
                                  #anytext_list=allwords,
                                  #titleword_list=titlewords,
                                  bounding_box=bounds,
                                  #start_datetime=start_date,
                                  #stop_datetime=end_date,
                                  #max_total_records=2000
                                  )

# Access datasets and print some info
for distribution in cswu.get_netcdf_urls(record_generator):
    dataset = Dataset(distribution['url'])
    data_variable = [variable for variable in dataset.variables.values() if hasattr(variable, 'grid_mapping')][0]
    dataset_extent = [round(ordinate, 6) for ordinate in [dataset.geospatial_lon_min,
              dataset.geospatial_lat_min,
              dataset.geospatial_lon_max,
              dataset.geospatial_lat_max
             ]]
    print '%s' % distribution['title']
    print '\tNetCDF %s' % (distribution['url'],)
    print '\tUUID %s' % (distribution['uuid'],)
    print '\textent %s' % (dataset_extent,)
    print '\tshape %s' % (data_variable.shape,)

radmap v3 2015 filtered pct potassium grid
	NetCDF http://dapds00.nci.org.au/thredds/dodsC/rr2/National_Coverages/radmap_v3_2015_filtered_pctk/radmap_v3_2015_filtered_pctk.nc
	UUID 221dcfd8-04f4-5083-e053-10a3070a64e3
	extent [112.7175, -43.7615, 153.6715, -9.0005]
	shape (34761, 40954)
radmap v3 2015 unfiltered pct potassium grid
	NetCDF http://dapds00.nci.org.au/thredds/dodsC/rr2/National_Coverages/radmap_v3_2015_unfiltered_pctk/radmap_v3_2015_unfiltered_pctk.nc
	UUID 221dcfd8-04fc-5083-e053-10a3070a64e3
	extent [112.7175, -43.7615, 153.6715, -9.0005]
	shape (34761, 40954)
Radiometric Potassium grid of NSW DMR, Discovery 2000, Area S, Braidwood, NSW 2001 survey
	NetCDF http://dapds00.nci.org.au/thredds/dodsC/uc0/rr2_dev/rcb547/AWAGS_Levelled_Grids/rad_survey_grids_levelled/potassium/rNSW0756k/rNSW0756k.nc
	UUID 4d4eb01b-4539-4c66-ae18-73e0b5b411a0
	extent [149.066355, -36.021368, 150.053341, -34.977687]
	shape (2117, 2002)
Radiometric Potassium grid of Southeast Lachlan, NSW, 2010 su