In [1]:
from __future__ import print_function
from netCDF4 import Dataset
import requests
from lxml import etree  
import matplotlib.pyplot as plt
from owslib.wps import WebProcessingService, ComplexDataInput 


In [2]:
def parseStatus(execute):
    o = requests.get(execute.statusLocation)
    t = etree.fromstring(o.content)
    ref = t.getchildren()[-1].getchildren()[-1].getchildren()[-1].get('{http://www.w3.org/1999/xlink}href')
    
    return ref

In [3]:
# catalogue WPS url
wpsURL = 'https://pavics.ouranos.ca/twitcher/ows/proxy/catalog/pywps'

# Connection 
wpsCatalogue = WebProcessingService(url=wpsURL)

In [4]:
for process in wpsCatalogue.processes:
    print ('%s \t : %s \n' %(process.identifier, process.abstract))

getpoint 	 : Return a single value from a NetCDF file at the given grid coordinates. 

ncplotly 	 : Return a dictionary storing the data necessary to create a simple plotly time series. 

pavicrawler 	 : Crawl thredds server and write metadata to SOLR database. 

pavicsearch 	 : Search the PAVICS database and return a catalogue of matches. 

pavicsupdate 	 : Update database entries using key:value pairs and identified by their ids. 

pavicsvalidate 	 : Query database entries for missing required facets. 

period2indices 	 : The final index is inclusive. 

pavicstestdocs 	 : Add test documents to Solr index. 



In [5]:
wpsURL = 'https://pavics.ouranos.ca/twitcher/ows/proxy/flyingpigeon/wps'
wpsFP = WebProcessingService(wpsURL)
print(wpsFP.identification.title)

Flyingpigeon 1.1_dev


In [6]:
for process in wpsFP.processes:
    print ('%s \t : %s \n' %(process.identifier, process.abstract))

subset_countries 	 : Return the data whose grid cells intersect the selected countries for each input dataset. 

subset_continents 	 : Return the data whose grid cells intersect the selected continents for each input dataset. 

subset_regionseurope 	 : Return the data whose grid cells inteserct the selected regions for each input dataset. 

pointinspection 	 : Extract the timeseries at the given coordinates. 

landseamask 	 : Mask grid cells according to their land area fraction. This process uses the ESGF datastore to access an appropriate land/sea mask. 

fetch_resources 	 : Fetch data resources (limited to 50GB) to the local filesystem of the birdhouse compute provider. 

indices_percentiledays 	 : Climatological percentile for each day of the year computed over the entire dataset. 

indices_single 	 : Climate index calculated from one daily input variable. 

sdm_gbiffetch 	 : Species occurence search in Global Biodiversity              Infrastructure Facillity (GBIF) 

sdm_getindic

In [7]:
proc_name = 'pavicsearch'
constraintString = 'variable:tasmax'
maxfiles = '1000000'
myinputs = [('constraints', constraintString),('type','File'), ('limit',maxfiles)]
execution = wpsCatalogue.execute(identifier=proc_name, inputs=myinputs)
print(execution.status)
print(execution.processOutputs[-1].reference)

ProcessSucceeded
https://pavics.ouranos.ca/wpsoutputs/catalog/f990ae8e-3c6b-11e9-988e-0242ac120008/list_result_2019-03-01T21:50:09Z__2I6iQW.json


In [8]:
proc_name = 'pavicsearch'
process = wpsCatalogue.describeprocess(proc_name) # get process info
for i in process.dataInputs:
    print('inputs :', i.identifier, ' : ', i.abstract)
for i in process.processOutputs:
    print('outputs :', i.identifier, ' : ', i.abstract)

inputs : facets  :  Comma separated list of facets; facets are searchable indexing terms in the database.
inputs : shards  :  Shards to be queried
inputs : offset  :  Where to start in the document count of the database search.
inputs : limit  :  Maximum number of documents to return.
inputs : fields  :  Comme separated list of fields to return.
inputs : format  :  Output format.
inputs : query  :  Direct query to the database.
inputs : distrib  :  Distributed query
inputs : type  :  One of Dataset, File, Aggregate or FileAsAggregate.
inputs : constraints  :  Format is facet1:value1,facet2:value2,...
inputs : esgf  :  Whether to also search ESGF nodes.
inputs : list_type  :  Can be opendap_url, fileserver_url, gridftp_url, globus_url, wms_url
outputs : search_result  :  PAVICS Catalogue Search Result
outputs : list_result  :  List of urls of the search result.


In [9]:
proc_name = 'subset_bbox'
process = wpsFP.describeprocess(identifier=proc_name)

print(process.title,' : ',process.abstract,'\n')
for i in process.dataInputs:
    print('inputs :', i.identifier, ' : ', i.abstract)
for i in process.processOutputs:
    print('outputs :', i.identifier, ' : ', i.abstract)

Subset  :  Return the data for which grid cells intersect the bounding box for each input dataset as well asthe time range selected. 

inputs : resource  :  NetCDF files, can be OPEnDAP urls.
inputs : lon0  :  Minimum longitude.
inputs : lon1  :  Maximum longitude.
inputs : lat0  :  Minimum latitude.
inputs : lat1  :  Maximum latitude.
inputs : initial_datetime  :  Initial datetime for temporal subsetting.
inputs : final_datetime  :  Final datetime for temporal subsetting.
inputs : variable  :  Name of the variable in the NetCDF file.Will be guessed if not provided.
outputs : output  :  JSON file with link to NetCDF outputs.


In [10]:
ref = parseStatus(execution)
r = requests.get(ref)
list_nc = r.json()
print('Numer of files found :',len(list_nc), '\n')
print("\n".join(list_nc[1:15]),'\n...')

Numer of files found : 13026 

https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/ouranos/climex/QC11d3_CCCma-CanESM2_rcp85/day/historical-r1-r3i1p1/tasmax/tasmax_kdc_198902_se.nc
https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/ouranos/climex/QC11d3_CCCma-CanESM2_rcp85/day/historical-r1-r1i1p1/tasmax/tasmax_kda_206005_se.nc
https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/ouranos/cb-oura-1.0/HadGEM2-CC/rcp45/day/tasmax/tasmax_day_HadGEM2-CC_rcp45_r1i1p1_na10kgrid_qm-moving-50bins-detrend_2043.nc
https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/ouranos/climex/QC11d3_CCCma-CanESM2_rcp85/day/historical-r1-r2i1p1/tasmax/tasmax_kdb_202907_se.nc
https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/ouranos/climex/QC11d3_CCCma-CanESM2_rcp85/day/historical-r1-r1i1p1/tasmax/tasmax_kda_200310_se.nc
https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/ouranos/climex/QC11d3_CCCma-CanESM2_r

In [11]:
nrcan_nc = [i for i in list_nc if 'nrcan' in i and ('1991' in i or '1992' in i or '1993' in i)]
# sort the filtered list
nrcan_nc.sort()

print('Number of files :', "%s\n" % len(nrcan_nc), "\n".join(nrcan_nc))

Number of files : 3
 https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/nrcan/nrcan_canada_daily/tasmax/nrcan_canada_daily_tasmax_1991.nc
https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/nrcan/nrcan_canada_daily/tasmax/nrcan_canada_daily_tasmax_1992.nc
https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/nrcan/nrcan_canada_daily/tasmax/nrcan_canada_daily_tasmax_1993.nc


In [12]:
nc_test = Dataset(nrcan_nc[0])
print(nc_test)

<type 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_CLASSIC data model, file format DAP2):
    Conventions: CF-1.5
    title: NRCAN 10km Gridded Climate Dataset
    history: 2012-10-22T11:26:06: Convert from original format to NetCDF
    institution: NRCAN
    source: ANUSPLIN
    redistribution: Redistribution policy unknown. For internal use only.
    DODS_EXTRA.Unlimited_Dimension: time
    dimensions(sizes): time(365), lat(510), lon(1068), ts(3)
    variables(dimensions): float32 [4mlon[0m(lon), float32 [4mlat[0m(lat), int16 [4mts[0m(ts), int16 [4mtime[0m(time), int16 [4mtime_vectors[0m(time,ts), float32 [4mtasmax[0m(time,lat,lon)
    groups: 



In [13]:
myinputs = []
# To keep things reasonably quick : subset jan-april
for i in nrcan_nc: 
    myinputs.append(('resource', i))
myinputs.append(('lon0', '-80.0'))
myinputs.append(('lon1', '-70.0'))
myinputs.append(('lat0', '44.0'))
myinputs.append(('lat1', '50'))
print(myinputs)

[('resource', 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/nrcan/nrcan_canada_daily/tasmax/nrcan_canada_daily_tasmax_1991.nc'), ('resource', 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/nrcan/nrcan_canada_daily/tasmax/nrcan_canada_daily_tasmax_1992.nc'), ('resource', 'https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/nrcan/nrcan_canada_daily/tasmax/nrcan_canada_daily_tasmax_1993.nc'), ('lon0', '-80.0'), ('lon1', '-70.0'), ('lat0', '44.0'), ('lat1', '50')]


In [14]:
execution = wpsFP.execute(identifier=proc_name, inputs=myinputs)
print(execution.status)
print(execution.processOutputs[-1].reference)
print(execution.statusLocation)

ProcessSucceeded
https://pavics.ouranos.ca:443/wpsoutputs/flyingpigeon/0fc900f2-3c6c-11e9-9291-0242ac120010/result_2019-03-01T21:50:56Z__pCYZio.json
https://pavics.ouranos.ca:443/wpsoutputs/flyingpigeon/0fc900f2-3c6c-11e9-9291-0242ac120010.xml
