Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Commit

Permalink
fix ESI to only download and process new data and fixed hdf permissio…
Browse files Browse the repository at this point in the history
…ns on creation
  • Loading branch information
billyz313 committed Jan 31, 2019
1 parent 9f58504 commit 9700216
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def __openFileForWriting__(self,dataType, year):
if (os.path.isfile(outputfile) != True) :
indexLastDay = dit.convertEpochToJulianDay(dit.convertDayMonthYearToEpoch(31, 12, year))
self.f = h5py.File(outputfile,'a')
try:
os.chmod(outputfile, 0o777)
except:
pass
return self.f.create_dataset("data", (indexLastDay,size[1],size[0]), dtype='float32', compression="lzf", fillvalue=params.getFillValue(dataType))
else:
print 'file exists'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import requests
from bs4 import BeautifulSoup as bs
import CHIRPS.utils.configuration.parameters as params
import json
from datetime import datetime

validFile = re.compile(r"\.tif")
gzFilePattern = re.compile(r"\.tif\.gz$")
Expand Down Expand Up @@ -66,20 +68,24 @@ def getFilesForYear(files_urls,yearToGet):
fileToWriteTo = open(rootoutputdir4WK+str(yearToGet)+"/"+fileToProcess, 'wb')
fileToWriteTo.write(res.read())
fileToWriteTo.close()
os.chmod(rootoutputdir4WK+str(yearToGet)+"/"+fileToProcess, 0o777)
if (gzFilePattern.search(fileToProcess)):
try :
print "Gunzipping the file: ",fileToProcess
gunzipFile(rootoutputdir4WK+str(yearToGet)+"/"+fileToProcess)
os.chmod(rootoutputdir4WK+str(yearToGet)+"/"+fileToProcess.replace(".gz", ""), 0o777)
except IOError:
print "************error processing "+fileToProcess
if "12WK" in fileToProcess:
fileToWriteTo = open(rootoutputdir12WK+str(yearToGet)+"/"+fileToProcess, 'wb')
fileToWriteTo.write(res.read())
fileToWriteTo.close()
os.chmod(rootoutputdir12WK+str(yearToGet)+"/"+fileToProcess, 0o777)
if (gzFilePattern.search(fileToProcess)):
try :
print "Gunzipping the file: ",fileToProcess
gunzipFile(rootoutputdir12WK+str(yearToGet)+"/"+fileToProcess)
os.chmod(rootoutputdir12WK+str(yearToGet)+"/"+fileToProcess.replace(".gz", ""), 0o777)
except IOError:
print "************error processing "+fileToProcess

Expand All @@ -102,23 +108,34 @@ def createEndDirectory(year):
os.makedirs(fullPath)
else:
print "Directory already exists "+fullPath


def getESIDate(item):
if item['name'] == 'esi4week':
return item
def getDatePattern(url):
return url.split('_')[2].split('.')[0]
def processYear(yearToGet):
'''
:param yearToGet:
'''

filePattern = None
print "-------------------------------Working on ",yearToGet,"------------------------------------"
with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
data = json.load(f)
theDate = filter(getESIDate, data['items'])[0]['Latest']
filePattern = theDate.split(' ')[2] + str("%03d" % ((datetime.strptime(theDate, '%d %M %Y') - datetime(2019,1,1)).days + 1,))
response = requests.get('https://geo.nsstc.nasa.gov/SPoRT/outgoing/crh/4servir/')
soup = bs(response.text,"html.parser")
urls = []
names = []
for i, link in enumerate(soup.findAll('a')):
_FULLURL = "https://geo.nsstc.nasa.gov/SPoRT/outgoing/crh/4servir/" + link.get('href')
if _FULLURL.endswith('.tif.gz'):
urls.append(_FULLURL)
names.append(soup.select('a')[i].attrs['href'])
#check if datepattern is greater than filePattern
if int(getDatePattern(link.get('href'))) > int(filePattern):
urls.append(_FULLURL)
names.append(soup.select('a')[i].attrs['href'])
names_urls = zip(names, urls)
getFilesForYear(names_urls, yearToGet)
print "-----------------------------Done working on ",yearToGet,"---------------------------------"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
import json
import datetime

def getESIDate(item):
if item['name'] == 'esi4week':
return item

def getDatePattern(url):
return url.split('_')[2].split('.')[0]

def processYearByDirectory(dataType,year, inputdir):
'''
Expand All @@ -20,11 +27,17 @@ def processYearByDirectory(dataType,year, inputdir):
:param inputdir:
'''
###Process the incoming data


filePattern = None
with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
data = json.load(f)
theDate = filter(getESIDate, data['items'])[0]['Latest']
filePattern = theDate.split(' ')[2] + str("%03d" % ((datetime.datetime.strptime(theDate, '%d %M %Y') - datetime.datetime(2019,1,1)).days + 1,))

dataStore = dataS.datastorage(dataType, year, forWriting=True)
indexer = params.dataTypes[dataType]['indexer']
for filename in os.listdir(inputdir):
if filename.endswith(".tif"):
if filename.endswith(".tif") and int(getDatePattern(filename)) > int(filePattern):

fileToProcess = inputdir+"/"+filename
print "Processing "+fileToProcess
Expand Down Expand Up @@ -73,7 +86,7 @@ def processYearByDirectory(dataType,year, inputdir):
dataStore.putData(index, img)

dataStore.close()
dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
#dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)


if __name__ == '__main__':
Expand Down

0 comments on commit 9700216

Please sign in to comment.