From a0327f9e403935849670783b18aab6ab9df8c47a Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 21 Oct 2016 15:41:26 -0400 Subject: [PATCH 1/7] Processor for Hadley Centre gridded daily temperature datasets 1950-2014 --- dataqs/hadghcnd/__init__.py | 0 dataqs/hadghcnd/hadghcnd.py | 212 ++++++++++++++++++ .../HadGHCND_TXTN_anoms_1950-1960_15052015.nc | Bin 0 -> 170328 bytes .../hadghcnd/resources/HadGHCND_anomalies.sld | 28 +++ .../resources/HadGHCND_temperatures.sld | 28 +++ dataqs/hadghcnd/tasks.py | 29 +++ dataqs/hadghcnd/tests.py | 103 +++++++++ dataqs/helpers.py | 109 +++++++-- dataqs/processor_base.py | 16 +- 9 files changed, 496 insertions(+), 29 deletions(-) create mode 100644 dataqs/hadghcnd/__init__.py create mode 100644 dataqs/hadghcnd/hadghcnd.py create mode 100644 dataqs/hadghcnd/resources/HadGHCND_TXTN_anoms_1950-1960_15052015.nc create mode 100644 dataqs/hadghcnd/resources/HadGHCND_anomalies.sld create mode 100644 dataqs/hadghcnd/resources/HadGHCND_temperatures.sld create mode 100644 dataqs/hadghcnd/tasks.py create mode 100644 dataqs/hadghcnd/tests.py diff --git a/dataqs/hadghcnd/__init__.py b/dataqs/hadghcnd/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataqs/hadghcnd/hadghcnd.py b/dataqs/hadghcnd/hadghcnd.py new file mode 100644 index 0000000..b4596a3 --- /dev/null +++ b/dataqs/hadghcnd/hadghcnd.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +# Copyright Kitware Inc. and Epidemico Inc. +# +# Licensed under the Apache License, Version 2.0 ( the "License" ); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### + +from __future__ import absolute_import +import logging +import os +import re +import shutil +from datetime import date +from time import sleep +import gdal +from dateutil.relativedelta import relativedelta +from dataqs.processor_base import GeoDataMosaicProcessor, GS_DATA_DIR, \ + GS_TMP_DIR, RSYNC_WAIT_TIME +from dataqs.helpers import gdal_translate, style_exists, create_band_vrt, untar + +logger = logging.getLogger("dataqs.processors") +script_dir = os.path.dirname(os.path.realpath(__file__)) + + +class HadGHCNDProcessor(GeoDataMosaicProcessor): + """ + Hadley Centre gridded daily temperature dataset based upon near-surface + maximum (TX) and minimum (TN) temperature observations. + It is designed primarily for the analysis of climate extremes and also for + climate model evaluation. It spans the years 1950 to present and is + available on a 2.5° latitude by 3.75° longitude grid. This dataset has + been developed in collaboration with the United States National Centers for + Environmental Information (NCEI), formerly the National Climatic Data Center + (NCDC). + More info at http://www.metoffice.gov.uk/hadobs/hadghcnd/index.html + """ + prefix = "HadGHCND" + tmp_dir = os.path.join(GS_TMP_DIR, prefix) + base_url = "http://www.metoffice.gov.uk/hadobs/hadghcnd/data/" + + layers = { + # 'HadGHCND_TXTN_anoms_1950-2014_15052015.nc.tgz': { + # 'title': 'HadGHCND Temperature Anomalies - {measure}, {interval}', + # 'name': '{prefix}_anomalies_{measure}_{interval}' + # }, + 'HadGHCND_TXTN_acts_1950-2014_15102015.nc.tgz': { + 'title': 'HadGHCND Actual Temperatures - {measure}, {interval}', + 'name': '{prefix}_temperatures_{measure}_{interval}' + } + + } + abstract = ( + u"HadGHCND is a gridded daily temperature dataset based upon near-" + u"surface maximum (TX) and minimum (TN) temperature observations. It is" + u" designed primarily for the analysis of climate extremes and also for" + u" climate model evaluation. It spans the years 1950 to present and is " + u"available on a 2.5° latitude by 3.75° longitude grid. This dataset " + u"has been developed in collaboration with the United States National " + u"Centers for Environmental Information (NCEI), formerly the National " + u"Climatic Data Center (NCDC).\n\nHadGHCND has been created using daily" + u" station observations in NCEI's GHCN (Global Historical Climatology " + u"Network)-Daily database. This consists of over 27,000 stations with " + u"temperature observations, though many of these have quite short " + u"records, or gaps in the record. Quality control has been carried out " + u"to indicate potentially spurious values. We have filtered down these " + u"stations to obtain those for which we can adequately calculate a 1961" + u"-90 daily climatology. The dataset represents anomalies from the 1961" + u"-1990 climatology.\n\nAn angular-distance weighting approach was used" + u" to interpolate the station data onto the required grid. We chose to " + u"grid the station anomalies to overcome some of the issues associated " + u"with elevation dependence.\n\nThe data are available as gridded " + u"anomalies, relative to the 1961-90 base period, and also as gridded " + u"actual temperatures. The actual temperatures were created by gridding" + u" the daily normals and adding these to the relevant daily anomaly.\n" + u"\nSource: http://www.metoffice.gov.uk/hadobs/hadghcnd/index.html\n\n" + u"Raw data file: {}") + + def extract_band(self, ncfile, band, outname, projection=None): + """ + Extract specified band from NetCDF file and convert to GeoTIFF, + using a VRT file to swap E-W axis + :param ncfile: NetCDF input filename, formatted for use in GDAL + :param band: Band number to process + :param outname: Output GeoTIFF filename + :param projection: Projection to use + :return: Full pathname of output GeoTIFF + """ + temp_vrt = os.path.join(self.tmp_dir, outname + '.vrt') + try: + source_xml = """ + + + {fname} + {band} + + + + + {fname} + {band} + + + + + """.format(fname=ncfile, band=band) + geotransform = ','.join(['-1.818750000000000000e+02', + '3.7500000000000000e+00', + '0.0000000000000000e+00', + '9.1250000000000000e+01', + '0.0000000000000000e+00', + '-2.5000000000000000e+00']) + create_band_vrt(ncfile, temp_vrt, [band], source_xml, + projection=projection, geotransform=geotransform) + gdal_translate(temp_vrt, + os.path.join(self.tmp_dir, outname), + projection='EPSG:4326', + options=['TILED=YES', 'COMPRESS=LZW']) + finally: + if os.path.exists(temp_vrt): + os.remove(temp_vrt) + + return os.path.join(self.tmp_dir, outname) + + def get_date(self, days): + """ + Calculate the date from the NetCDF band time value in days since 0/0/00 + :param days: Days since 00/00/00 + :return: Python date object + """ + start = date(1, 1, 1) + return start + relativedelta(days=days-2) + relativedelta(years=-1) + + def run(self): + """ + Retrieve and process the HadGHCND climate data. + """ + for key in self.layers.keys(): + src = os.path.join(self.base_url, key) + cdf_files = untar(src, self.tmp_dir) + for cdf in cdf_files: + interval = re.findall('\d{4}-\d{4}', + os.path.basename(cdf))[0] + for measure in ('tmin', 'tmax'): + ncds_gdal_name = 'NETCDF:{}:{}'.format(cdf, measure) + ncds = gdal.Open(ncds_gdal_name) + bands = ncds.RasterCount + layer_name = self.layers[key]['name'].format( + prefix=self.prefix, measure=measure, interval=interval + ) + img_list = self.get_mosaic_filenames(layer_name) + files = [] + for band in range(1, bands + 1): + days = int(ncds.GetRasterBand(band) + .GetMetadata()['NETCDF_DIM_time']) + band_date = re.sub('[\-\.]+', '', + self.get_date(days).isoformat()) + img_name = '{}_{}T000000000Z.tif'.format(layer_name, + band_date) + if img_name not in img_list: + band_tif = self.extract_band(ncds_gdal_name, + band, + img_name, + projection='WGS84') + dst_file = self.data_dir.format(gsd=GS_DATA_DIR, + ws=self.workspace, + layer=layer_name, + file=img_name) + dst_dir = os.path.dirname(dst_file) + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + shutil.move(band_tif, dst_file) + files.append(dst_file) + sleep(RSYNC_WAIT_TIME * 2) + for file in files: + self.post_geoserver(file, layer_name, sleeptime=0) + style = '_'.join(layer_name.split('_')[0:2]) + if not style_exists(layer_name): + with open(os.path.join(script_dir, + 'resources/{}.sld'.format( + style))) as sld: + self.set_default_style(layer_name, + layer_name, + sld.read()) + title = self.layers[key]['title'].format( + measure=measure, interval=interval + ) + self.update_geonode(layer_name, + title=title, + description=self.abstract.format(src), + store=layer_name, + bounds=('-180.0', '180.0', + '-90.0', '90.0', + 'EPSG:4326')) + self.truncate_gs_cache(layer_name) + self.cleanup() + + +if __name__ == '__main__': + processor = HadGHCNDProcessor() + processor.run() diff --git a/dataqs/hadghcnd/resources/HadGHCND_TXTN_anoms_1950-1960_15052015.nc b/dataqs/hadghcnd/resources/HadGHCND_TXTN_anoms_1950-1960_15052015.nc new file mode 100644 index 0000000000000000000000000000000000000000..13b2755fb2ae20f8c2b71633cddcd1a3b906da06 GIT binary patch literal 170328 zcmeI5eQaGvdfu-qhp}SD(3Xa=g4M+}yh~VJ#+H^|Wwrje=a>d-)r#XS9mB-BFbbpZ=Z;sl?ASy3A*EGsOIP#2R3hc<$*1`wr$?BecPs;I(l)-&K)~ZWi#5p@Y2g$Ufwd~x^K&on*tW;kOyzIgyyeB+Teb|n zu%&-UgD) ztiMdwS~#tLynox^-h522=qLUa%(VUG?fLtj`IY#&)%l*6)82jGH@2_N$J=)2pV_kA zRD5}NnH$ymEiYr6?@HVLg%`TF4GuorKlpNb0hs1)`Nza3e{uV^p`mR%UVPz|E2quZ z>^x<+52M=?lt#C00%0M#ZQGIQw&AHJ(e35G7ZTAf!o^%J#{b0}$A<`iOZZ;|@*n$o z!d--~5h(K*;6C=R2>k@gKlWRM?-PDFoO_IGANwQX|44j{n0g-jJL30={~IxNK6Z}y zABg`ivFiVS#Q&JfbuEi>U9{Ep3rVi)j^SL_KP64OT`NidGt&2vMux8YNOzE4LmGLy zewp+`qzj~xt*e{#za;$>Y2@tsHPXGL|21i3?%GUxE9pVf$lokpz_*N^hKF7)mC!}GbW(InUP)3IC^ zx_A9aU#{yaKe69-|y2j7vy8b-Lb^UBC*99hBf7O@kIv(Y^{wAN|^VeYTGlDQ1 zBm9&g9DkoMLJ+p!Bm6r8@;*kdee8Jxx;^#;;ZcI@^3Mr*0+>D4O86(K{o;>^|DJ#? zV(O2n({|?CnDVlHM1I7v{*pXlj6i;g;}Ydcv|Hl368R zr2I+bIEg$bk?SPaoFh(KZq_6Uk3l% zm>;enV2c~w4u#In&M0CI{0{n0G@f^b;VHWoz`P*L+mK-+?H}U!^yp8bOP!tZE!g3X zfq{6{sy{AeHXDKNer ztXG2ZJp}j|e}`Du4`Y`fV2|G={RgDq;&_Dcn;dT>=9=gnfp*l-Gx>^?5eJhV+82KB z6MqWUE5I6im%d7%jZz=!ZN%VH`gKAt$1&kf^g7Emd%<#|v-3ULlKfZd7ako`=_`1^ zt)zQkpqS&KHf17?vuoxXTWz+F z93Vzlhv9NO)Y)0Q)Y*xsxu_LeP)7bKrVaOntza+S3;ToEOH@$=b^w>n+E_9(ThnL8g4XD$4AZHQW3kmYIUyb9V=aIMc z6@qM}>z84R%bb_3bna1tuxKM6%+;TZE0Jv@c0f+ek>V!_uYmDyga4c4Dd2-*aEie% z?&172_^WRfMyWT44JaocBwkp?@6q-Fj(>n$Kjc{9-y^s3>2sxS0&?2tb37d`1?sLo zCm&TDzBa$QT&?tUKB{yF@(BAU2;N^ZIJAqeekhFquUQ75g`3m<^512bBU&rJpX9$!p z4h#$kw_38sZM89}Rd%{O`yFg;8yrU-+A19({Uc<>R+e2nV~o%kP+d>7zT8i{ku#Ye z*RwjKp{whc56I3X9kl;Ga@|f4W*V~;gq64te}tFE*kk3g^0Dpc(?j`P=psJ#qf000 zFA*;%P)7FhbKqM@w{ebkoqvk_f!f&n{sCKO@+~B`O!zYM|F4j@hzvg?W^8f)eVB9V zmtf#I!=WjeqA~g>b`o|vC%(*A7LCt^vJHzD&^-^fVCViSe91FD!hbawsUOIPfLX$I znrnI9sWf%TCZ!zb*HI3?VJ>Yf+eG*;qnxn+kZ_u7!BctmaVw}#GK>@UPVpeqt={)9 z_Vw~ZaH#Yji2pNk3xD+Y{0BMg3y(0j`W#<~e3B(XcFE%TLd(E_=GMY4lXJFl#I=P> z={!N#8~=86?*oT@;B*4K#MiGNn|Lq$*C1n_&RIbpa%#*MhviGflcbg7BgmFZu^0DsZM+Qp(ZkPDNJk@6GU}RI_0J}+NL}(_?ne`} z+eT;y2g%ZVH4iD86VFwz?5_S3ktgg6)S*0*xgu$g4QHwA0=$=>C*L@K#<}xge6*@= zu<-pVb*O&XcQ*1bq)alD_J5kn`8m}-^nnAKGez*jeuwcdYa>EN`<(-f!EICd+4=dz zG<`+SY%EE@z;^{)gW1|B2>V8A_xU!*aAWr%Ia}X{~%uddvhc>&fZf`#N z%;g&QwT<|Zc5p>z-AnskoUtkfmzcgAw-UfXe#m9|unl1@?bIq;pk3jl=R~#6C$ujf z7e~NSF*xdeP}k_*M%Q=@;k8t0=ya7Dl9EPQX_ zUIQN!FDIYA;Ct~rdNLlw*u>8+5LT6WwT^WkZyUgkBDlW`&f>eu+V2c= zp0xcHv25UHe_sHewd9{EPu%i2+f96;%>T7uEzI#fdRBYonc-VA<26%z+4;gqilh97 zc&#`)S1Xo3Q6CVmHO|Tg@--fFv8BiHR_eOIaXG(MJCYhMH@=*DEg{F8st*^}*|vo$j2TL5cseA>#S@=5%G7 zJwNK1n$LM|e+)aIl*fe;jyYdX9mIWcxSn+`^|~VN9sPVKpW@y~eZ$!slkiW3lzteN}{CuJt@#<6fiW(VV^3ZwifSuUYH$GdFyJ#$w4>eP817lKgth zpUo%hUGGw?Pw#FR=V9UVM0 z^ZdOvJP%wbuCu-VU7q*q_fYdc&u@P{^aJb>VVBv~5z5USEB)50=id4)RPQ4he=yJ9 z@78Z2!4J(H*V%H>?|JZht85_r{Z0%0z`sEa60gJ$wS{qDws_99TyzGm^G=y?7WRIR z2*0D>S^XY@-bcV5bFFtV%S*nc+DH8zLm&9#bM*VOz7_U+UY8h)FEAF%4hNf;|CYap zajGqqcQNf=kGAv;PqmNtcQmbVO1=l*RF8HVQHQ?c*Ei&PPr~mZ!t?_&=r}*W8Fe)GoHW>Ves&GG z1>6D$0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I z0tW&I0tW&I0tW&InwtZ0C+nHAE_$%j;$h%*tl#Hd;ZwY?s&Jb3nibAA?|bD-l^^1RMur536Y}abj`yU{y z*TzS_q(Z-1<*V}|6pi-6wG#x^cdb4@o3!>{m`%B)t-Nl19(g-C&O!&@RJ8JK1-N@{ zVz4ewa6E!N&`sYic>k^su?1`G$E3a20kV6Ksjb8>u@>#Fx%FA^BXNwjCu?IXoxGof z_Vmym5%;i0{wm_Tru5aGJ>Cld3|(Izx(%V*for*LL9+`QHI^+VS2^nH^Z?+`8#n%8M&h=kH^$)ZYnzMz6Mk_j#jUtrZthUhAjp z`?lceG!R_vblFd;%Si)0iVqavWSf!bV{1 zfNt8SL2G;K`!9cY_YT-ncf0|7_Y$(b@3gdkTkS4B1+cP5geG zu!G|_iIK(o1NCwKdD2f4$LPl%BbNU(GE1(F^wDwf1n)}x^j(f*t<2N0`ndJry6SUE zN5{~S`f8ndV-nlmqWm;6S^!1@gJR z2z}R6M!2X9n5%#5J7D*NPr#UZ;yqw2?(gRq+~V&ME64_l2WsmQji(%RA4_OUvTmd; z?ePU4NON6@{(5Qo;acb2Z^e(N zknus}6~E!D_b)=uD|QffzC}6pXW2qFQC=(9f~T+fjPk3|aRcEQ>QevD>I5f^6Z#I- zYmeUw-qh!P@Yqi*lP!j?+3|F-zYCHLsL%UwyiL20)7}U{`2PuV9Y)SSB&IKw_L9f= zV*iA{^?3W%wfL$%_t9MKbJNK)-+b@+C_II|u$6DTh-=o;N{lDoZ}3OR z_vak{6dBmpv!wmNq$_>eKJpNHwOrd?)B#73?~zGti_EiWbGkCBtMXjsI`s0@o}W?1 z7|N%#A?tEtaJK(G4W?sYH%!2%YF{Id13i>`iE?`h&ypvsZ==3jDE}!oJH_=17dWT- z?xc-Y@+S#VZEk7*kC-|t<(=**WHhMImU0;$LN|>To2aXW9@COIh>YvpO6jIM=o>zed-#Y(l^toUf5^# znICV^Za?85@@Y(Xp3sV1q!X|xTw)Fb))BhNC+b?wX|(s0zB!e@_nu8}&D{q2=3nET z@$RLb9@-ot41f)odSANxz;ZqEJxweuC7*ouqZ|wWvy|^6Miz&Cq(7xhJLkK~cG5V| zK_30u{pK=leMGzOQ&!<3b<;0AUduk8a}K#OoEjNR4(~}Ry~YSnB9CN4UXMk>A8b5_ zM<(xu0e76Y68mhV9QY()H(xo`PLc=1Tp35TEliIg&+TBg27KffY0Kk4#PL_aS6GdM zdp=#G+WwF-iNa~{#9w%?!uu(!a8HV{IsP$vo}mrOsSg?dW#kF_3)K0}BH@3O_H)S3 zf99F6lw4q2oB(@xs6DBKt^MCuu|b|bFvPk2Q+;$gpE(uq==7H_CEqy$1EC0y1kIANR|H*scZKS7A5xqkBoOAYP6g#CJkv-M%+C1@;8x z%ePjZZ^W^9uJ4S!)&Un{?ndGZ??12w+J=ol&B|AlW1e{_Ex zZz9ATx54>g^sJMd@2OqtoJl|V6Jcxrp3p~I>9eCLr{#CVbH$GmY|}jaKrh*0rn;Nq zyyW-zdLEh2aeRWn7@)n);~j9GG1+~hC0!q?UNXyXcz&h!W!o8SVXU#4?m3DRgib<^ zzCb;SJy#xpKO7tXk=b*#0x3P6@irr;X{Z z(S4lylLNR;S5EoHnlc`87A}X1KIUAN*wjzZ!W_9mc`?yf19Q6-6$9? zL=XS|pgwTwTKp@Y!!4wZ>T4Maya&1NeXGyiY-#_t)yLS#d_uabKU886wWiMtD)Sb4 zoZ#z{cJ6sQuWoCh^)A)xWdqI4JwFGRM(dGa2c%_SK*s_9#lb(vpMJmhv&Q}W_QA8j!T{eU1sgQ7~vfy`bNONF&LQt9bhc~ z56}HhSZDe!i+xcyze6RBX}exFEh3-ZNvl_Vi&>9<>(0afVcu&bPQcsm-peM~!+Yb= z4;E9VxwNJ6NxuOUFYq1|VJ_D;Ls`#tTETi`%E$YAntl8p+JZmoYi{oAolDQ%7`uwt zq8Tu~nPkw|Ro?zeZ`9R$@OiEzonpE?Lp?X9oaj;>-$E;THlvF1sr&v zJMLcmcZrjaS(}}8x3xckLbX2LD}uGTy+6VrYhw3JkD=C3&#u=y^x9KI-*S4r&rYzI z=l2Yyj@iB6?tMjAKURBTSZ**aCfGj*{%~G4&^qGYyG!^#1Lm6v{p3AQ{4{x8*yLf3 zA1cS##P(5H>dW+-O?!AU^4DPl3sqG(ObIf8xyT}o0XeutUVOGM-JFlUVDW%+N%U!drzsa!zZvVwGp$< zf3~lSu<+W@2^h-;tH4w1ENiW6VT~_}JIZ-nZyV{{WeiLDU{65Ml9TDG!P0y$6 z)_QLiu*u}}_f23F(XPI6^7j%W;BpCks6XjNHuUjc5v#!;8N4?W9PpkKouuz3ETjB~ zls`fFu_^t%M*{U_?I~7$$v9=Z)3t&1UE6f~2~}P<)3NvExB#ZeQfdWjbh?7|T8$I29*L$jL;}}kBPY>^ZAsybK@8&Dk>PAL?gLDv#j$#|+k0$x<=p44W4QweJQ?J+H z-^KB39KQno=;Z#OJ_0^*ZVHo1J-jd81a+(a_1It>OsLP_I$mc6mp9n9mt6PDCNs`Q99yS^1n`i zW2IL~3xC;0*XX?V?@?cQh&0!F-rVUvD7KZ<3~;l{O*QGvsX~z^%$Uvt!A+SnH)G z*s~;gJCLWlEVD3_J*2}*xO|xacF8&PXr-L`hRSXrR#;ECudMUBavjPCS96g1y`R-# z>ez@K#_7M*m+4y>NAJ|muPiguvFAGSFW)8nlrV-I2Z`TA_ScAigYfIfi7fJ;)%6u+ zOJw#pkfT$)s4h2~w8q5BavHa?w8l)2Ri}{U9`F+8!ctr=<7e#QdV)Mf{xdg@a@`q@ z;h+0Cec1c&C|-q5>NjoFaXW$QRL1pu3jU|Lb{d_u?~umXOumK0-WN{&^KX&m?~vmi zj{g$fexEQ%p2jb*5I;NyRLB%{BS#;7EMi`RjTZ9#V?SGN zk3Tkk;;Z=kIPwYqc5nl0`)TA7MBJ!7xqZgbmokX{T#4nMz5gqBX-gA+2NgHDb`l$}>Ng^NU z{Yt<f67PXYp$n!-Gd~1!Sqn7hw!(* zxrMy*W$aIJ?lS4m%XUy%>Ji`Os>giFmu^Fj1GL|ZZhwwme@g&s=_^inT@dtF-QF9Z zi}^rHY6tN@Yg2wBJDzJ?oYIzO8r2xKDC#97-ge8TsTKw zXKELHA8sG?7P{#8IC|v>!e3lRr^>pMc5tW8gnE=OKUjIb5y#?|?`x4c>4y88;fTgM z<`5cZ_3TRf-(_vb$nX1yQohfaUAjL0#RN<*U@ywqU&3$A$?gBw z<1l$bMjCLjbXZ#iSr+YqAS@1S-lx&1t_x)r&aWs3-e9la7mn*}x#@Q?M$-2n^}JDb@Ncmk&$fPV9@{z`$K8q?#?)O8zPw8(@kw3%O-wW(d?IE8OHK|^H z@2@BQonG%TFV^qG8BRBQtl!J^{vY(Pue{Ol&^PAIu9N)cdvL$m<-Ael`y#$sxKXhT zdd+_RAio>%nmt|ttAJJDK;S^&K;S^&K;S^&K;S^&K;S^&K;S^&K;S^&K;S^&K;S^& zK;S^&K;S^&K;S^&K;S^&K;S^&K;S^&K;S?j>p$*Z=kXN`{r*$<~^nT2szz6u$%hTGx?ud@q4D#=Vyg*Lcsb z^^0#0ufNTDtJ;qsle;qZn%75Jb9s#K5f1Zh$Q;)I_1gTx;1J=!{Pfd4OqDWCcWha~ zSbN+gNBD+fU-~^!aWq{UH#&$NSkuva>Z~qf-%a{q`a>4hk(OQ7U=#Le@jiv@^^olk zHeK7b^7Tzk_FZ4C^UCwS9(S?sd=}7Edje$TwZDhI$r(ha-tzj_iorB1lf^~q%Z|N< z=n(pjz&VBe*Ot%9&J=6U7O#_k>gqbYX9akAkGPBQ<8#)|$G>}h|I5gzaGreH*BZOt zC*y5+K)HAsvRwjS?d74618??&(OSRqOK}Hz>#)TWD&t5zki2% z2ML_ly6~AU-Z$YOd9O9J2ozL60mWpo%G!tFGuc=DK|kF zBCIFf%Jt`_c$lrjoVBO8NYJ|DdMDj$)E`4G^sOv2pJVTt(}HZ^g}!K$>9 zz#7%1Jc0d!N)hpsoZmqHL&Ti7tq$^C-DJAny}obsn%C&%^-lLA^Ed$=y~l_80N2M` z!F#wI`pbE;!D^0?r*aR9O5MC(^J%_aqD_56rf-DQ57jrtZ{z+j$KA-gky!FdZt5)| zllBO6|3!A;TD?~7yOXu|e97KN*LQ`L@?^*6Nl)!_S()nC+~`0?VXJ^|@P1OO$P?~& zktchIOI-wTDh(0uEyvr4pDkafGIw&_9rT;a;DMY;TW6=fVbGe7UjO$D_cn)`xuqYdem)mIsdD~t85 zm-Z(r<&kS0vOWZU!fXTbDTbrwFlCZ)_@ZzwmEC2Z8Mvjg1BCtQHL{C+ zlkBFjDlIGD>X7657V_b!@u&Y>+3WRSZHzOYp2@e6*nQylk?|bi{~*KvB>oHH*9Z?H zqxw6()&8;j>isl5-lK=t?w`a)y;GRYRVK}gWJ_J=^+!0?JfS#&Uv3AJx54O3$R?SD zqxgzH@cw*?#gX^RW0lHxkRBp^6h5CO$PO*!pGOx3;S6VzQ)%7yZL-B}1lfXP?+y0h zwSHar&sD$cl`ri_A7n5658}V4{l6i89X;YMs+Pk>+Ne*AAw3l#kSY9EsA0sF!x^Glji;MmO1~c80Xbjc( z<}pqfc5wX==g`sLQje!~d91jMUCwgu7-i7G``QZsW#nBbmw)HleqT$6h30wgIRfVL z|9^)q6t6_SgUFHx;{nES&PV6Lo&FzhWIXC(9?()=v!WBd$y6$TGo85cKP>*;~R2{>F!_+fc#{3gv>hyJB?0)b$ z@g>@Yi%B=@Z^#Ddw@_?nbG?15?0x_q{20A{4;|Ma^V`TU0QSfdqqpjc`v_YJcheU> z;~Y9>ZHTwo@myp38R11+vZHM;Y~W_~9KdtWeA*A~SCC0_VU0w!bjKCfPZ~2^h(@`}hESC#`w=qmyuc0Q@*sf7JX% zeOGHK$tP9TG2OB59ka1$x-#|7w{N81+0WodBV=~oapqyj?)&i_^a18Y-V2OAA2Tl3 zV@(i`vjU<6&qIXTbavG9jDB`$VCgG+5~x;U%oq2RvrC(T~Pc+|?(fSFO5hYmoRQZ1Fs?d=GtNzU@_BCx0zpQJGik z*y91X3unohjj<}5Va(~Ao{!ky=Tq*;UbX6LwmkV=`Eos<^cZ_T9KVb3FnB+Wzh{qZ zb02QWHuaKUYu(9jhIcPyz7Y}n2-sG>{Cf2@%ef-5 zM=jue3-}|ypDnHC7(L>>#Dn1f_0&G{PxX{r94#b-OVp1H*Q=YN=DXze7z@U_XONFE z{^R(i_4I+ErGa~;uDb7xm*aCT6LU}FXRfb;zrqvvqz~yM=wErwLXR`w?dPTK*T;Uk zwj_7fcjWgqXPd5Uv*&B9)$c>z|Bm^!eLsEJ;{v)xef7ovJ$NkN)9kn`rhNXLY7EZP zkM-=Yk!@>!CH`h@Wb^JUeY3`quxFkV=j+S&>*1+!SmW#U%G@mH^jt^dtnPiQb&gsa zgHbWZoDpBAK2hD~&6bY9BI;mHRGvp@e5}Mka}UkkEAtk5thu|!&wzjGbM(7$)RWE$ z3rXYske^>pEWLT-LU}iv>kIIGF>E0I&qkh`u}tMW8gE7m&|`fW7)9&`0w99R}zE0-ow^6hw?r8Ew)*8lkC~=0M+xiW|g^8F8LSpXJiUv>7#ES{0<}d zzl)BQ*EG|y-o2E3!hR{@-%MD}M;5(*sd=(EG@t96`5L|F-pukzHqF^`{589rLB3$4 zz=8U4p!&PzfNlM-4Oj*&0|x>J0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I0tW&I z0tW&I0tW&I0tW&I7QzAjey_iU)Df-?_yrCG4g?Ma4g?M~KL@mad%f3_4|c!4-J>xu zPYCCDr~D#o3~Ieo)}7aWJ2yIZ#R1m0mEMDVV?t=OzLjfk`&%xx-Ag#aH%1?`wxDod z3I1ANJkK%j+Fl9wExtXx#`1yk`pJ8*{ocdt`u3F9aUbU0c*;(PVdXikOJ9U@`o2Ve z`j%(DYxrsH`03iL%=fz8ypQd5#A#3K5$k&jeXF3g9i!v)fy-duh8?udqp-i1n03V+ z*zd%79oUBbHXL}*I9B?rggN$?!U*5d4D+t?#_4(IJqnH@E3$d-4#{1CaWc-@vJ>Ec zf_w$O+i{v}Ds|Vf_B-?56UeN!rNw=q_je}ZVs-i1rnQ%d_XL2ymDhM*80|{}Z@r%Z z_&EkoulIiunIvx;-`F8v)I%ST4L-pZ)TupnqEE5SIgZbiZLAG#pZnV}@(xjEFV}P;17);ka*n^^2v{jlkN4z2 zH}8{jmiR+tR{AsaS;oEq}lJ3>3~bA zoA4ddJ;=s-z1|~ZJ!#gB_1-*Dd5;p6WBqFF@!~x$kW1ga2m`GgR910{XurE|}<@T3tI8s}enEGPXcY$!6x@jkB2Yd3p@692w zPmm3680*!@rhbfk-h+z%=KhV$)8V4;)T+-FsZ-pq&Yw*>0*6}V%nNMhJ%WA*9Dhjo zO+r7ib`uMGe2(^k%>0!0mhxUsvdvz?8wBmU^(uO?r-W@LeTByf>~Vn5k$(T^K0%)F zb6=3(xp&IY$f5q)nQ%=5ITw%$S3kC8_{Q*vaySw}iX)+flr z?v-_B$MRj`ah7gmY(IAydBo|h$RzyVBK`qkkUZf}-!z~5u>A#e%%NjM7^4iwwhunU zc97j(=eoXf{+GZ4oV4GGebf>9!}{s>htPM!3)RGE z-%+=vepcV|%C~5Yv#!|5eY%VE8uYsh%rA5PbIK7^Cfg${lc6&99IA6+`eyY>Ui+Fs z+C|P1a+*uOP5cr;*!L1k=hevg;glV`PW=k}8}i#1?ycQVvbL@ldu~3GVs8DAL*Jkm zz|7x=j)M_8d(W=R$RqzKpS}l~hmcJ&io?jDxt{D7_0Zp2sADvp0~D#t{eiUnllQ&5 zL>|YQbGVP(M%+eyT<8A4xoWvJW>@DeBrTs=+DDr|BGx#jxx$+q|1L7_B{24Q-%;sz zG}T4zd0sq#Z^S2fE{82O9?ex2V=v5TD~if&mI3vCUX{? z{mnm^%O@4U!S@id!8zt9!mNz|U%WRba(H}|?*Xf%1->JLe53mhw($4tx_&hMw%z-~ zoB;2Su%$45hgdea2=3@%-*c9_R0d9E@-;HH-PeNi3S`jSKznlj8L@oO2IL+A59F5& z!rJ?=?PYF&e&T;7TQoL*d4;zzr;P&b$&YxBEW8O>8;acr-cR$qpPlmHgXVR*&i6RT zt9xF58$OZNq303`9%DZy?-aOG{;IlV6Y3WJ%2&UuJl}|8>u^4`z4?SprN4$-2f!bW z3y;eCZtq96hdlay++H8~G%wC{%kgL3g_ZEnS7C%76mM0=SRtoyeS~1!A+PTFyp{%b zD5_k?6i>2tYQ+h7t3T8_*Qh-8!xH*i7kFJl|I+v918?G!$S0ZZ{Wu8b+u(g4>6Q2* z<_imzN&Cv^{(ZXVLY}*HBijIB2wRMSrThi_)V+(^*Zio+dHE~*FJk+AV*4NXKU+D~ z-fYX(tDJE^2ky(z4_)LtHBRZ?UVYkQZ9ljRd&T$T_d6Djz2w)pt7oG#=^|h3d-dZr z@Nd6*&$|SB7Dov50gbsc)!hu|?F;4idcgy}6jl<0eZ;Z%VS5G4ABO*T5u;zdWY+x0 z{+jmQot$~yHT(V}$eE3s*hS;AnCfybgos8`!w=vz9Ag+oIY*? zd+x{l><^scd+<4*5Z^uf{#>iA>OA!~Ozr1^MeJRDzA~-xFzG4#A=viNP5_>bXtxHs^#*KZMb!+UZ6mYIFUT=est2h2S;Z<*h?Cm-o& zdXwe72(iOlWN4*ul^J z+8T?E`dcMlnm<;iZ`R|8J}u76=iICqe35!80sVfd-;#;^97^vH&E@(~ zcK&?R@7{WUQP0=?J1o!N9|`y`+VfDoJ1y?}-68!ZJ;HDMdiI;l-=?8I%#59%vy1m| zU420BVEFe9`0w|+hST3d0+uslIUkJlPO;y^i}3x}LGSwNJvhCKqjy>SZbj$=4f8|( zKJgSbfcJVI#_z%q>%G$=xa)n80`YuoyI9w_4)@(z~<9AC453pbgCb55XSXGyQY?X&lhIqF_US8jfHM@)v{j+dsY5M+V$96XI$^Xx$Tqx80eQ7D# zC>joUE+u;fSp#1K2LcBI2LcBI2LcBI2LcBI2LcBI2LcBI2LcBI2LcBI2LcBI2LcBI z2LcBI2LcBI2LcBI2LcBI2LcBI2W|`wcrTEOcWr}R8*f+dy>f{6R9W9CI?dW@AM>vN zMb^0Ez14t!eKFAbtQn?ymtSkM@)~WzZnFmP5Py%*%eOiEru3VQj1hQPcdpa=db7Evx{US)sm`mF z_BzU7?)7}fk^2DhcJmFzFz;uRAEB$(6!hAr!k>L5yv}bcHo23qiqM8_?!YE3^p{I% zd3_t;eRQa^R=d;X>3gOuOjjnMPRo6P_%qf$&w({@Rz9K6CS!zAbWx0*944-{gT9^0 zuJgO;1L!ae_Y?+ahhcVoW$gVD&cc&Vudbui!5Zy()@z5y-XBJLGJsvFmHZsCf}hrk z*7qzC<)XI;)Z;xnly{2oHUXXd9ZbS``iF3LA5lM{9`DiiKG&)2c)2{d_}dNi@;7Ad zhs89Q^X}>t{*#s}VCxhhw1gtq{`yiWak>TQg zb6^T@UOx7cm>yCn+Tg4 z*UstRy?1}1U9HovKIgrPhSP6^^sQym0`AD<_2(}lyLdw%^j=%Y=Jn_=Q05$E&VW7F zRN|+#ujP{}^K_hjE0yh;QGMS1g8HlTua`Eiv}L@tFVH5ApC#5FFv49ydw9qePm$M0 zdIz!g@Yu@n^W?7~UIv#=qt8L&qr@i&ibv1|eTtHQ0({5tQElK&oiVnIHxREQUP+t- z%l9dB0BrM=2cufDYpraTzm4*z=o`J*pSs1-q=)to(W(xtnQvkT03ar_3Devfp28EAibSg9+H!#|mHcme2PXeJ^%s2YYbIWE20hs-C_k}+ic1exwf zrftO9_iF^1_myRqk3pVNp1kFpU&gUQEBUgU`o_Z?Bd_<2goo}Q8;HTxed0rSb{3t$ zReeL_nC4BjbeXPS*D5<*p8fensC#2MKoB4ESndWs|EZ#3ar zGO#c^=-cPY`YXR{P&@XM#2%A`;Slm2Ku+0UFR^eG#>{0rP9G&EKj{I_vV89AXGve; z+RP@h7yR>HU!PF^JjWNxb{M-_rc6HjPOi(+3yI}d%!j`~ChT7NPsG}H=a13x6+&;S zo64zgY7UXLr?JcZ34Qd9+Cut*$0gcV-JVZi{|K9UuCTGZroV9EeCclFlkeC>c#R+* zFMpEf+%jZ2hOC{)ioG70M8;8gOkMhh+O}-v{C?^>gUqMF{4CdeitahCzXg3;sE^n_ z37cg4W^rNUS}tbi<`dgb{uKHDJK?_)jw1&$dw)xfbBu2upV7hdn(@>Io)6psM(CwI zZ?txW^qs2=9>-2m&wfH5_`}yIBJL*LTE>sMJ!YLiuCE{y_?q+bmt13ik1WQL>wq%d z!%EmAll+L+){tFDyI&k5ew%9)KBAsm3Aa=4a@vp`#kmis_%>I)=2G7ChM%F=QQH21 z@Rx+|Ba8gcJ#Ya%lJ@lbUd_K^_!iUm;~`>X_T2aYxS{)8+N~^WoP^am>RAbHv8OP#dpwWJt2Vy@g`<_IM2$(!xHW zp2|9=JNCW6#O%sWSI1oQ)qhLk#V7PRn=Kx0;4_m?f_rUxO{7x_7 z>DpYKk500+@E3O(y-;&w}e`$a@CvBdg@) z@^s&|RyZ&GkJpbQ*ORB7=iWmP;Sj-jKOfJ7_d2jYK$<s2=`&+R?L|Be5LeII>H&a>p9^p4r zxtr;{?@8&u@p?GVSldY5iwV51uxaHyw2|#Ld);~l;NRxf)Yso!Pj8RKj0M*#bF-W) zOyKL~i!NL{e~@h&fkQSHXg<@3`j=9j1^IzmE)LEpgnz=6Plz=6Plz=6Plz=6Plz=6Plg>s@zbyyMrpK(F*!cM?&l!ce-%C=wk!P|4*Zu}l&vtM2y7bM}OyIpazk6D? zg~sA~_PKi26~PnZe~I5Jo17=L^fv!5rth6)=d#+`ccBAdU}tG-ZA^V>YwDA*{+U*JK&AaEdXAaEdXAaEdX vAaEdXAaEdXAaEdXAaEdXAaEdXAaEdXAaEdXAaEdXAaEdXAaJ1hIPm`g*D9 + + gistemp1200_ersstv4 + + gistemp1200_ersstv4 + gistemp1200_ersstv4 + + name + + + 1.0 + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dataqs/hadghcnd/resources/HadGHCND_temperatures.sld b/dataqs/hadghcnd/resources/HadGHCND_temperatures.sld new file mode 100644 index 0000000..f3d908b --- /dev/null +++ b/dataqs/hadghcnd/resources/HadGHCND_temperatures.sld @@ -0,0 +1,28 @@ + + + gistemp1200_ersstv4 + + gistemp1200_ersstv4 + gistemp1200_ersstv4 + + name + + + 1.0 + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dataqs/hadghcnd/tasks.py b/dataqs/hadghcnd/tasks.py new file mode 100644 index 0000000..928f574 --- /dev/null +++ b/dataqs/hadghcnd/tasks.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +# Copyright Kitware Inc. and Epidemico Inc. +# +# Licensed under the Apache License, Version 2.0 ( the "License" ); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### + +from __future__ import absolute_import + +from celery import shared_task +from dataqs.hadghcnd.hadghcnd import HadGHCNDProcessor + + +@shared_task +def hadghcnd_task(): + processor = HadGHCNDProcessor() + processor.run() diff --git a/dataqs/hadghcnd/tests.py b/dataqs/hadghcnd/tests.py new file mode 100644 index 0000000..96acc1e --- /dev/null +++ b/dataqs/hadghcnd/tests.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +# Copyright Kitware Inc. and Epidemico Inc. +# +# Licensed under the Apache License, Version 2.0 ( the "License" ); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### + +import glob +import os +from datetime import date + +import gdal +import httpretty +from dataqs.hadghcnd.hadghcnd import HadGHCNDProcessor +from django.test import TestCase + +script_dir = os.path.dirname(os.path.realpath(__file__)) + + +def get_mock_image(): + """ + Return a canned test image (1 band of original NetCDF raster) + """ + nc = os.path.join(script_dir, + 'resources/HadGHCND_TXTN_anoms_1950-1960_15052015.nc') + with open(nc, 'rb') as ncfile: + return ncfile.read() + + +class HadGHCNDTest(TestCase): + """ + Tests the dataqs.hadghcnd module. Since each processor is highly + dependent on a running GeoNode instance for most functions, only + independent functions are tested here. + """ + + def setUp(self): + self.processor = HadGHCNDProcessor() + httpretty.enable() + + def tearDown(self): + httpretty.disable() + self.processor.cleanup() + + def test_download(self): + """ + Verify that a file is downloaded + """ + httpretty.register_uri(httpretty.GET, + self.processor.base_url, + body=get_mock_image()) + layer = self.processor.layers.keys()[0] + imgfile = self.processor.download( + self.processor.base_url, layer) + self.assertTrue(os.path.exists( + os.path.join(self.processor.tmp_dir, imgfile))) + + def test_extract_band(self): + httpretty.register_uri(httpretty.GET, + self.processor.base_url, + body=get_mock_image()) + layer = self.processor.layers.keys()[0] + imgfile = self.processor.download( + self.processor.base_url, layer.rstrip('.tgz')) + ncds_gdal_name = 'NETCDF:{}:tmin'.format( + os.path.join(self.processor.tmp_dir, imgfile)) + bandout = os.path.join(self.processor.tmp_dir, + '{}test'.format(self.processor.prefix)) + self.processor.extract_band(ncds_gdal_name, 1, bandout) + self.assertTrue(os.path.exists(os.path.join(bandout))) + img = gdal.Open(bandout) + try: + self.assertEquals(1, img.RasterCount) + finally: + del img + + def test_date(self): + self.assertEquals(self.processor.get_date(712224), date(1950, 1, 1)) + self.assertEquals(self.processor.get_date(735964), date(2014, 12, 31)) + + def test_cleanup(self): + httpretty.register_uri(httpretty.GET, + self.processor.base_url, + body=get_mock_image()) + layer = self.processor.layers.keys()[0] + self.processor.download(self.processor.base_url, layer) + self.assertNotEqual([], glob.glob(os.path.join( + self.processor.tmp_dir, self.processor.prefix + '*'))) + self.processor.cleanup() + self.assertEquals([], glob.glob(os.path.join( + self.processor.tmp_dir, self.processor.prefix + '*'))) diff --git a/dataqs/helpers.py b/dataqs/helpers.py index e8d4f05..6106361 100755 --- a/dataqs/helpers.py +++ b/dataqs/helpers.py @@ -21,22 +21,24 @@ import gzip import logging import shutil +import tarfile import traceback import os import subprocess import requests -from geoserver.catalog import Catalog, FailedRequestError import psycopg2 import re import sys -from StringIO import StringIO +import unicodedata +import ogr2ogr import rasterio from osgeo import gdal, ogr +import xml.etree.ElementTree as ET +from StringIO import StringIO from rasterio.warp import RESAMPLING from rasterio.warp import calculate_default_transform, reproject -import unicodedata from geonode.geoserver.helpers import ogc_server_settings -import ogr2ogr +from geoserver.catalog import Catalog, FailedRequestError logger = logging.getLogger("dataqs.helpers") @@ -79,10 +81,39 @@ def get_band_count(raster_file): :return: number of bands """ datafile = gdal.Open(raster_file) - return datafile.RasterCount - - -def gdal_translate(src_filename, dst_filename, dst_format="GTiff", bands=None, + count = datafile.RasterCount + del datafile + return count + + +def create_band_vrt(src, dst, bands, source_str, nodata=None, projection=None, + geotransform=None): + """ + Create a VRT file for an image by band. + :param src: Source raster filepath + :param dst: Destination VRT filepath + :param bands: list of band numbers + :param source_str: String version of 'Source' XML elements + :param nodata: NoData value for output + :param projection: Projection of output + :param geotransform: Geotransform string for output + """ + gdal_translate(src, dst, + bands=bands, nodata=nodata, of='vrt', projection=projection) + tree = ET.parse(dst) + root = tree.getroot() + if geotransform: + gt = root.find('GeoTransform') + gt.text = geotransform + sources = ET.fromstring(source_str).getchildren() + bands = root.findall('VRTRasterBand') + for band in bands: + for source in sources: + band.append(source) + tree.write(dst, encoding='utf-8') + + +def gdal_translate(src_filename, dst_filename, of="GTiff", bands=None, nodata=None, projection=None, options=None): """ Convert a raster image with the specified arguments @@ -95,44 +126,78 @@ def gdal_translate(src_filename, dst_filename, dst_format="GTiff", bands=None, options = [] # Open existing dataset, subsetting bands if necessary - if bands: - tmp_file = src_filename + ".sub" - gdal_band_subset(src_filename, bands, tmp_file) - src_ds = gdal.Open(tmp_file) - else: - src_ds = gdal.Open(src_filename) + + src_ds = gdal.Open(src_filename) try: # Open output format driver, see gdal_translate --formats for list - driver = gdal.GetDriverByName(dst_format) + driver = gdal.GetDriverByName(of) # Output to new format - dst_ds = driver.CreateCopy(dst_filename, src_ds, 0, options) + if bands: + dst_ds = driver.Create(dst_filename, src_ds.RasterXSize, + src_ds.RasterYSize, len(bands), + src_ds.GetRasterBand(bands[0]).DataType) + + dst_ds.SetMetadata(src_ds.GetMetadata()) + for idx, band_num in enumerate(bands): + inband = src_ds.GetRasterBand(band_num).ReadAsArray() + outBand = dst_ds.GetRasterBand(idx + 1) + outBand.WriteArray(inband) + outBand.SetMetadata(outBand.GetMetadata()) + if nodata is not None: + outBand.SetNoDataValue(nodata) + else: + outBand.SetNoDataValue( + src_ds.GetRasterBand(band_num).GetNoDataValue()) + inband = None + outBand = None + else: + dst_ds = driver.CreateCopy(dst_filename, src_ds, 0, options) + if nodata is not None: + band = dst_ds.GetRasterBand(1) + band.SetNoDataValue(nodata) + + dst_ds.SetGeoTransform(src_ds.GetGeoTransform()) if projection: srs = SpatialReference() srs.SetWellKnownGeogCS(projection) dst_ds.SetProjection(srs.ExportToWkt()) - if nodata is not None: - band = dst_ds.GetRasterBand(1) - band.SetNoDataValue(nodata) - finally: # Properly close the datasets to flush to disk dst_ds = None src_ds = None band = None - if bands and tmp_file: - os.remove(tmp_file) def gunzip(filepath): + """ + Gunzip a file. + :param filepath: Filepath of gzipped file + :return: Name of output file + """ outfile = filepath.rstrip('.gz') with gzip.open(filepath) as gfile, open(outfile, 'wb') as ucfile: shutil.copyfileobj(gfile, ucfile) return outfile +def untar(filepath, outpath=''): + """ + Extract contents of a tar file to a specified directory + :param filepath: Filepath of tar file + :param outpath: Output directory + :return: + """ + files = [] + tf = tarfile.open(filepath) + for item in tf: + tf.extract(item, outpath) + files.append(os.path.join(outpath, item.name)) + return files + + def nc_convert(filename): """ Transform a NETCDF4 file to classic-model format. diff --git a/dataqs/processor_base.py b/dataqs/processor_base.py index 667d5dd..bd5697a 100755 --- a/dataqs/processor_base.py +++ b/dataqs/processor_base.py @@ -91,17 +91,19 @@ class GeoDataProcessor(object): for import into GeoNode/GeoServer """ + tmp_dir = GS_TMP_DIR base_url = "http://{}:8080/geoserver/rest/workspaces/" gs_url = base_url + "{}/coveragestores/{}/file.geotiff" gs_vec_url = base_url + "{}/datastores/{}/featuretypes" gs_style_url = "http://{}:8080/geoserver/rest/styles/" - def __init__(self, workspace=DEFAULT_WORKSPACE, tmp_dir=GS_TMP_DIR, + def __init__(self, workspace=DEFAULT_WORKSPACE, tmp_dir=None, **kwargs): self.workspace = workspace - self.tmp_dir = tmp_dir - if not os.path.exists(tmp_dir): - os.makedirs(tmp_dir) + if tmp_dir: + self.tmp_dir = tmp_dir + if not os.path.exists(self.tmp_dir): + os.makedirs(self.tmp_dir) if 'days' in kwargs.keys(): self.days = kwargs['days'] @@ -337,7 +339,7 @@ def cleanup(self): Remove any files in the temp directory matching the processor class prefix """ - filelist = glob.glob("{}*.*".format( + filelist = glob.glob("{}*".format( os.path.join(self.tmp_dir, self.prefix))) for f in filelist: os.remove(f) @@ -374,13 +376,13 @@ def del_mosaic_image(self, url): r.raise_for_status() return r.status_code, r.content - def post_geoserver(self, filepath, layer_name): + def post_geoserver(self, filepath, layer_name, sleeptime=RSYNC_WAIT_TIME): """ Add another image to a mosaic datastore :param filepath: Full path&name of GeoTIFF to import :param layer_name: Name of the layer & store (assumed to be same) """ - sleep(RSYNC_WAIT_TIME) + sleep(sleeptime) gs_url = self.gs_url.format(ogc_server_settings.hostname, self.workspace, layer_name) data = "file://{}".format(filepath) From 01da4a86c4c266111f1994c738db5204271b0763 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 21 Oct 2016 16:43:41 -0400 Subject: [PATCH 2/7] Create 4 layers, combining decades --- dataqs/hadghcnd/hadghcnd.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/dataqs/hadghcnd/hadghcnd.py b/dataqs/hadghcnd/hadghcnd.py index b4596a3..fde6519 100644 --- a/dataqs/hadghcnd/hadghcnd.py +++ b/dataqs/hadghcnd/hadghcnd.py @@ -51,13 +51,13 @@ class HadGHCNDProcessor(GeoDataMosaicProcessor): base_url = "http://www.metoffice.gov.uk/hadobs/hadghcnd/data/" layers = { - # 'HadGHCND_TXTN_anoms_1950-2014_15052015.nc.tgz': { - # 'title': 'HadGHCND Temperature Anomalies - {measure}, {interval}', - # 'name': '{prefix}_anomalies_{measure}_{interval}' - # }, + 'HadGHCND_TXTN_anoms_1950-2014_15052015.nc.tgz': { + 'title': 'HadGHCND Temperature Anomalies - {measure}, 1950-2014', + 'name': '{prefix}_anomalies_{measure}' + }, 'HadGHCND_TXTN_acts_1950-2014_15102015.nc.tgz': { - 'title': 'HadGHCND Actual Temperatures - {measure}, {interval}', - 'name': '{prefix}_temperatures_{measure}_{interval}' + 'title': 'HadGHCND Actual Temperatures - {measure}, 1950-2014', + 'name': '{prefix}_temperatures_{measure}' } } @@ -148,7 +148,8 @@ def run(self): """ for key in self.layers.keys(): src = os.path.join(self.base_url, key) - cdf_files = untar(src, self.tmp_dir) + tarfile = self.download(src) + cdf_files = untar(os.path.join(self.tmp_dir, tarfile), self.tmp_dir) for cdf in cdf_files: interval = re.findall('\d{4}-\d{4}', os.path.basename(cdf))[0] @@ -157,11 +158,11 @@ def run(self): ncds = gdal.Open(ncds_gdal_name) bands = ncds.RasterCount layer_name = self.layers[key]['name'].format( - prefix=self.prefix, measure=measure, interval=interval + prefix=self.prefix, measure=measure ) img_list = self.get_mosaic_filenames(layer_name) files = [] - for band in range(1, bands + 1): + for band in range(1, min(11, bands + 1)): days = int(ncds.GetRasterBand(band) .GetMetadata()['NETCDF_DIM_time']) band_date = re.sub('[\-\.]+', '', @@ -193,9 +194,7 @@ def run(self): self.set_default_style(layer_name, layer_name, sld.read()) - title = self.layers[key]['title'].format( - measure=measure, interval=interval - ) + title = self.layers[key]['title'].format(measure=measure) self.update_geonode(layer_name, title=title, description=self.abstract.format(src), @@ -204,7 +203,7 @@ def run(self): '-90.0', '90.0', 'EPSG:4326')) self.truncate_gs_cache(layer_name) - self.cleanup() + self.cleanup() if __name__ == '__main__': From 20a173f10d1380279f6b28b23a57a476dfe38295 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 21 Oct 2016 17:16:43 -0400 Subject: [PATCH 3/7] Removed unused variable --- dataqs/hadghcnd/hadghcnd.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dataqs/hadghcnd/hadghcnd.py b/dataqs/hadghcnd/hadghcnd.py index fde6519..582e3e2 100644 --- a/dataqs/hadghcnd/hadghcnd.py +++ b/dataqs/hadghcnd/hadghcnd.py @@ -151,8 +151,6 @@ def run(self): tarfile = self.download(src) cdf_files = untar(os.path.join(self.tmp_dir, tarfile), self.tmp_dir) for cdf in cdf_files: - interval = re.findall('\d{4}-\d{4}', - os.path.basename(cdf))[0] for measure in ('tmin', 'tmax'): ncds_gdal_name = 'NETCDF:{}:{}'.format(cdf, measure) ncds = gdal.Open(ncds_gdal_name) From 1ee8a0d64ee9a1666ee8259ba23e98890e656786 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Sat, 22 Oct 2016 11:56:49 -0400 Subject: [PATCH 4/7] Fix test --- dataqs/hadghcnd/tests.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dataqs/hadghcnd/tests.py b/dataqs/hadghcnd/tests.py index 96acc1e..b637269 100644 --- a/dataqs/hadghcnd/tests.py +++ b/dataqs/hadghcnd/tests.py @@ -76,11 +76,11 @@ def test_extract_band(self): self.processor.base_url, layer.rstrip('.tgz')) ncds_gdal_name = 'NETCDF:{}:tmin'.format( os.path.join(self.processor.tmp_dir, imgfile)) - bandout = os.path.join(self.processor.tmp_dir, - '{}test'.format(self.processor.prefix)) - self.processor.extract_band(ncds_gdal_name, 1, bandout) - self.assertTrue(os.path.exists(os.path.join(bandout))) - img = gdal.Open(bandout) + bandout = '{}_test.tif'.format(self.processor.prefix) + outpath = self.processor.extract_band(ncds_gdal_name, 1, bandout, + projection='WGS84') + self.assertTrue(os.path.exists(outpath)) + img = gdal.Open(outpath) try: self.assertEquals(1, img.RasterCount) finally: From 51ebb683f2d3022a3dd5e1ede447e7876562f391 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Sat, 22 Oct 2016 17:34:51 -0400 Subject: [PATCH 5/7] Upgrade gdal to 1.11.2 --- .travis.yml | 11 ++++------- local_settings.py.template | 5 +++++ setup.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 local_settings.py.template diff --git a/.travis.yml b/.travis.yml index eaaecdb..c307e51 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,18 +1,15 @@ language: python sudo: required - -addons: - apt: - packages: - - python-dev - - libgdal1-dev +dist: trusty python: - "2.7" before_install: + - sudo add-apt-repository ppa:ubuntugis/ppa -y - sudo apt-get -qq update + - sudo apt-get install python-dev libgdal1-dev gdal-bin install: - export CPLUS_INCLUDE_PATH=/usr/include/gdal @@ -20,7 +17,7 @@ install: - pip install -r dev-requirements.txt - pip install -e . - git clone -b 2.4.x https://github.com/GeoNode/geonode.git - - cp local_settings.py geonode/geonode/. + - cp local_settings.py.template geonode/geonode/local_settings.py - pip install -e geonode script: diff --git a/local_settings.py.template b/local_settings.py.template new file mode 100644 index 0000000..41a7e88 --- /dev/null +++ b/local_settings.py.template @@ -0,0 +1,5 @@ +from django.conf import settings + +INSTALLED_APPS = settings.INSTALLED_APPS + ( + 'dataqs', +) diff --git a/setup.py b/setup.py index 1eb6a61..8fdf437 100644 --- a/setup.py +++ b/setup.py @@ -42,6 +42,6 @@ 'pymongo', 'numpy', 'rasterio==0.31.0', - 'gdal==1.10' + 'gdal==1.11.2' ] ) From fa8303ab6ae1f3da47206edbaf0fa14a873edaea Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Sun, 23 Oct 2016 17:21:51 -0400 Subject: [PATCH 6/7] Add UbuntuGIS repository in ansible script so that GDAL 1.11 can be installed. --- ansible/roles/common/tasks/main.yml | 4 ++++ local_settings.py | 16 ---------------- 2 files changed, 4 insertions(+), 16 deletions(-) delete mode 100644 local_settings.py diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml index 500e559..ec364cd 100644 --- a/ansible/roles/common/tasks/main.yml +++ b/ansible/roles/common/tasks/main.yml @@ -1,5 +1,9 @@ --- +- name: get UbuntuGIS repository + apt_repository: repo='ppa:ubuntugis/ppa' + sudo: yes + - name: ensure apt cache is up to date apt: update_cache=yes sudo: yes diff --git a/local_settings.py b/local_settings.py deleted file mode 100644 index a777f70..0000000 --- a/local_settings.py +++ /dev/null @@ -1,16 +0,0 @@ -from django.conf import settings - -INSTALLED_APPS = settings.INSTALLED_APPS + ( - 'dataqs', - 'dataqs.aqicn', - 'dataqs.airnow', - 'dataqs.forecastio', - 'dataqs.gfms', - 'dataqs.gdacs', - 'dataqs.hifld', - 'dataqs.nasa_gpm', - 'dataqs.spei', - 'dataqs.usgs_quakes', - 'dataqs.gistemp', - 'dataqs.worldclim', -) From a7dfc55e24cf6236bad9dfe33d5505cc1f366172 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Fri, 10 Feb 2017 10:55:42 -0500 Subject: [PATCH 7/7] Upgrade GDAL to 2.1.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8fdf437..a286c42 100644 --- a/setup.py +++ b/setup.py @@ -42,6 +42,6 @@ 'pymongo', 'numpy', 'rasterio==0.31.0', - 'gdal==1.11.2' + 'gdal==2.1.0' ] )