fix ESI to only download and process new data and fixed hdf permissio…

…ns on creation
SERVIR · Jan 31, 2019 · 9700216 · 9700216
1 parent 9f58504
commit 9700216
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 7 deletions.
diff --git a/cserv/pythonCode/servirchirpsdjango/CHIRPS/utils/file/h5datastorage.py b/cserv/pythonCode/servirchirpsdjango/CHIRPS/utils/file/h5datastorage.py
@@ -71,6 +71,10 @@ def __openFileForWriting__(self,dataType, year):
         if (os.path.isfile(outputfile) !=  True) :
             indexLastDay = dit.convertEpochToJulianDay(dit.convertDayMonthYearToEpoch(31, 12, year))
             self.f = h5py.File(outputfile,'a')
+            try:
+				os.chmod(outputfile, 0o777)
+            except:
+				pass
             return self.f.create_dataset("data", (indexLastDay,size[1],size[0]), dtype='float32', compression="lzf", fillvalue=params.getFillValue(dataType))
         else:
             print 'file exists'

diff --git a/cserv/pythonCode/servirchirpsdjango/CHIRPS/utils/ftp/ftpESIDownloader.py b/cserv/pythonCode/servirchirpsdjango/CHIRPS/utils/ftp/ftpESIDownloader.py
@@ -12,6 +12,8 @@
 import requests
 from bs4 import BeautifulSoup as bs
 import CHIRPS.utils.configuration.parameters as params
+import json
+from datetime import datetime
 
 validFile = re.compile(r"\.tif")
 gzFilePattern = re.compile(r"\.tif\.gz$")
@@ -66,20 +68,24 @@ def getFilesForYear(files_urls,yearToGet):
 				fileToWriteTo = open(rootoutputdir4WK+str(yearToGet)+"/"+fileToProcess, 'wb')
 				fileToWriteTo.write(res.read())
 				fileToWriteTo.close()
+				os.chmod(rootoutputdir4WK+str(yearToGet)+"/"+fileToProcess, 0o777)
 				if (gzFilePattern.search(fileToProcess)):
 					try :
 						print "Gunzipping the file: ",fileToProcess
 						gunzipFile(rootoutputdir4WK+str(yearToGet)+"/"+fileToProcess)
+						os.chmod(rootoutputdir4WK+str(yearToGet)+"/"+fileToProcess.replace(".gz", ""), 0o777)
 					except IOError:
 						print "************error processing "+fileToProcess
 			if "12WK" in fileToProcess:
 				fileToWriteTo = open(rootoutputdir12WK+str(yearToGet)+"/"+fileToProcess, 'wb')
 				fileToWriteTo.write(res.read())
 				fileToWriteTo.close()
+				os.chmod(rootoutputdir12WK+str(yearToGet)+"/"+fileToProcess, 0o777)
 				if (gzFilePattern.search(fileToProcess)):
 					try :
 						print "Gunzipping the file: ",fileToProcess
 						gunzipFile(rootoutputdir12WK+str(yearToGet)+"/"+fileToProcess)
+						os.chmod(rootoutputdir12WK+str(yearToGet)+"/"+fileToProcess.replace(".gz", ""), 0o777)
 					except IOError:
 						print "************error processing "+fileToProcess
 
@@ -102,23 +108,34 @@ def createEndDirectory(year):
         os.makedirs(fullPath)
     else:
         print "Directory already exists "+fullPath
-
+
+def getESIDate(item):
+	if item['name'] == 'esi4week':
+		return item
+def getDatePattern(url):
+	return url.split('_')[2].split('.')[0]
 def processYear(yearToGet):
     '''
     
     :param yearToGet:
     '''
-
+    filePattern = None
     print "-------------------------------Working on ",yearToGet,"------------------------------------"
+    with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
+		data = json.load(f)
+		theDate = filter(getESIDate, data['items'])[0]['Latest']
+		filePattern = theDate.split(' ')[2] + str("%03d" % ((datetime.strptime(theDate, '%d %M %Y') - datetime(2019,1,1)).days + 1,))
     response = requests.get('https://geo.nsstc.nasa.gov/SPoRT/outgoing/crh/4servir/')
     soup = bs(response.text,"html.parser")
     urls = []
     names = []
     for i, link in enumerate(soup.findAll('a')):
 		_FULLURL = "https://geo.nsstc.nasa.gov/SPoRT/outgoing/crh/4servir/" + link.get('href')
 		if _FULLURL.endswith('.tif.gz'):
-			urls.append(_FULLURL)
-			names.append(soup.select('a')[i].attrs['href'])
+			#check if datepattern is greater than filePattern
+			if int(getDatePattern(link.get('href'))) > int(filePattern):
+				urls.append(_FULLURL)
+				names.append(soup.select('a')[i].attrs['href'])
     names_urls = zip(names, urls)
     getFilesForYear(names_urls, yearToGet)
     print "-----------------------------Done working on ",yearToGet,"---------------------------------"

diff --git a/cserv/pythonCode/servirchirpsdjango/CHIRPS/utils/ingest/HDFIngestESIData.py b/cserv/pythonCode/servirchirpsdjango/CHIRPS/utils/ingest/HDFIngestESIData.py
@@ -12,6 +12,13 @@
 import json
 import datetime
 
+def getESIDate(item):
+	if item['name'] == 'esi4week':
+		return item
+
+def getDatePattern(url):
+	return url.split('_')[2].split('.')[0]
+
 def processYearByDirectory(dataType,year, inputdir):
     '''
     
@@ -20,11 +27,17 @@ def processYearByDirectory(dataType,year, inputdir):
     :param inputdir:
     '''
     ###Process the incoming data
-
+
+    filePattern = None
+    with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
+		data = json.load(f)
+		theDate = filter(getESIDate, data['items'])[0]['Latest']
+		filePattern = theDate.split(' ')[2] + str("%03d" % ((datetime.datetime.strptime(theDate, '%d %M %Y') - datetime.datetime(2019,1,1)).days + 1,))
+
     dataStore = dataS.datastorage(dataType, year, forWriting=True)
     indexer = params.dataTypes[dataType]['indexer']
     for filename in os.listdir(inputdir):
-        if filename.endswith(".tif"):
+        if filename.endswith(".tif") and int(getDatePattern(filename)) > int(filePattern):
 
             fileToProcess = inputdir+"/"+filename
             print "Processing "+fileToProcess
@@ -73,7 +86,7 @@ def processYearByDirectory(dataType,year, inputdir):
             dataStore.putData(index, img)
 
     dataStore.close()
-    dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
+    #dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
 
 
 if __name__ == '__main__':