
# FINN Preprocessor: Loads Local MODIS raster

## 1. Setting Envoronments

### Systems settings

Most likely no need to be edited.

In [None]:
# python libraries
import sys
import os
import re
import glob
import datetime
import subprocess
import shlex
from urllib.parse import urlparse
from importlib import reload
import gdal
import matplotlib.pylab as plt


# finn preproc codes
sys.path = sys.path + ['../code_anaconda']
import downloader
import af_import
import rst_import
import polygon_import
import run_step1
import run_step2
import export_shp
import plotter

In [None]:
# database settings
os.environ['PGDATABASE'] = 'finn'
os.environ['PGUSER'] = 'finn'
os.environ['PGPASSWORD'] = 'finn'
os.environ['PGHOST'] = 'localhost'
os.environ['PGPORT'] = '5432'

Make sure that the PostGIS database is ready.

In [None]:
# show info for the database
!psql postgres -c 'SELECT version();'
!pg_lsclusters

In [None]:
# TODO i want to move this to Dockerfile somehow
# create plpython, needed only once for the database
try:
    p = subprocess.run(shlex.split("psql -d %s -c 'CREATE LANGUAGE plpython3u;'" % os.environ['PGDATABASE']), 
                       check=True, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
    if 'already exists' in e.stderr.decode():
        print(e.stderr.decode().replace('ERROR','OK').strip())
        pass

### Settings for Land Surface Datasets (land cover, vegetation continuous field)

MODIS land cover types, MODIS vegetation continuous field will be downloaded, if needed, for the region of AF input
1. `year_rst`: MODIS raster data year to be used for the analysis
2. either  
  `four_corners`: (LowerLeft_lon, LowerLeft_Lat, UpperRight_lon, UpperRight_lat) or   
  `extent_shp`:  shape file (could be polygon of area of interest, points of fires)

Other parameters such as `tag_lct`, `tag_vcf`, `tag_regnum` (identify landcover, vcf and region number dataset) are set automatically for MODIS dataset.  
The datasets are imported into database schema "raster", with table names "rst_<i>tag_lct</i>", or "rst_modlct_2017", for example.  
An overview raster "o_32_rst_modlct_2007" is created as well, as the real dataset is difficult to handle for QA.

In [None]:
# MODIS raster datasets' year
year_rst = 2017

In [None]:
# Geographic extent of download
# specify either one of below (comment out one line with #)

four_corners = (-140, 20, 80, 60) # LL corner lon, LL corner LAT, UR corner Lon, UR corner Lat)
#extent_shp = './north_central_america.shp'  # shape file of North and Central America (i can create this from AllRegion polygon)

In [None]:
# tag to identify datasets, automatically set to be modlct_YYYY, modvcf_YYYY
tag_lct = 'modlct_%d' % year_rst
tag_vcf = 'modvcf_%d' % year_rst

# tag for the region number polygon
tag_regnum = 'regnum'

# definition of variables in the raster files
rasters = [
        {
            'tag': tag_lct,
            'kind': 'thematic',
            'variable': 'lct'
        },
        {
            'tag': tag_vcf,
            'kind': 'continuous',
            'variables': ['tree', 'herb', 'bare'],
        },
        {
            'tag': tag_regnum,
            'kind': 'polygons',
            'variable_in': 'region_num',
            'variable': 'regnum',
        },
]

---
## 2. Download raster datasets

Raster files URL and directories to save data

In [None]:
# all downloads are stored in following dir
download_rootdir = '../downloads'

In [None]:
# earthdata's URL for landcover and VCF
is_leap = (year_rst % 4 == 0)
url_lct = 'https://e4ftl01.cr.usgs.gov/MOTA/MCD12Q1.006/%d.01.01/' % year_rst
url_vcf = 'https://e4ftl01.cr.usgs.gov/MOLT/MOD44B.006/%d.03.%02d/' % (year_rst, 5 if is_leap else 6)

ddir_lct = download_rootdir +'/'+ ''.join(urlparse(url_lct)[1:3])
ddir_vcf = download_rootdir +'/'+ ''.join(urlparse(url_vcf)[1:3])

print('LCT downloads goes to %s' % ddir_lct)
print('VCF downloads goes to %s' % ddir_vcf)

Download land cover type raster, <b>only for the tiles needed for the active fire file</b>

In [None]:
if 'four_corners' in locals() and not four_corners is None:
    # use four corner
    poly = "POLYGON((%f %f, %f %f, %f %f, %f %f, %f %f))" % (
        four_corners[0], four_corners[1], 
        four_corners[0], four_corners[3], 
        four_corners[2], four_corners[3], 
        four_corners[2], four_corners[1],
        four_corners[0], four_corners[1], 
    )
elif 'extent_shp' in locals() and not extent_shp is None:
    # use shape file
    poly = extent_shp
else:
    raise RuntimeError('Specify region of interest!')
        

In [None]:
reload(downloader)
downloader.download_only_needed(url = url_lct, droot = download_rootdir, pnts=poly)

Verify LCT files' checksum.  If a file is correpted, the file is downloaded again.

In [None]:
downloader.purge_corrupted(ddir = ddir_lct, url=url_lct)

Do similar for vegetation continuous field data

In [None]:
downloader.download_only_needed(url = url_vcf, droot = download_rootdir, pnts=poly)

In [None]:
downloader.purge_corrupted(ddir_vcf, url=url_vcf)

## 3. Import raster datasets

Downloaded files need preprocessing, which is to extract the only raster band needed, and also make coordinate system to be WGS84.  Intermediate files are created in following directories.

In [None]:
workdir_lct = '../proc_rst_%s' % tag_lct
workdir_vcf = '../proc_rst_%s' % tag_vcf
workdir_regnum = '../proc_rst_%s' % tag_regnum

print('LCT preprocessing occurs in %s' % workdir_lct)
print('VCF preprocessing occurs in %s' % workdir_vcf)
print('RegNum preprocessing occurs in %s' % workdir_regnum)

### Import land cover type

First grab hdf file names from the download directory

In [None]:
search_string = "%(ddir_lct)s/MCD12Q1.A%(year_rst)s001.h??v??.006.*.hdf" % dict(
        ddir_lct = ddir_lct, year_rst=year_rst)
fnames_lct = sorted(glob.glob(search_string))
print('found %d hdf files' % len(fnames_lct) )
if len(fnames_lct) == 0:
    raise RuntimeError("check if downloads are successful and search string to be correct: %s" % search_string)

The next command performs three tasks, "merge", "resample" and "import".  First two task creates intermediate GeoTiff files in <i>work_dir</i>.  Last task actually import the data into database's <i>raster</i> schema.

You can run only selected tasks with run_XXX flags to `False`, when you know that processing failed in the middle and you resolved the issue.

In [None]:
reload(rst_import)

rst_import.main(tag_lct, fnames=fnames_lct, workdir = workdir_lct, run_merge=True, run_resample=True, run_import=True)

At this point you should able to see the raster in the database using QGIS.  
I am also trying to make quick check here creating simple image for QA, but use of GIS tool is encouraged.

In [None]:
%matplotlib inline
import plotter
reload(plotter)
try:
    plotter.plot('raster.o_32_rst_%s' % tag_lct, '../code_anaconda/modlct.clr')
except Exception as e:
    print("Got this error: " + str(e))
    print("Didn't work, use QGIS!")
    pass

### Import vegetation continuous fields

Analogous steps repeated for vegetation continous fields.

In [None]:
# grab hdf file names
search_string = "%(ddir_vcf)s/MOD44B.A%(year)s065.h??v??.006.*.hdf" % dict(
        ddir_vcf = ddir_vcf, year=year_rst)
fnames_vcf = sorted(glob.glob(search_string))
print('found %d hdf files' % len(fnames_vcf) )
if len(fnames_vcf) == 0:
    raise RuntimeError("check if downloads are successfull and search string to be correct: %s" % search_string)

In [None]:
reload(rst_import)
rst_import.main(tag_vcf, fnames=fnames_vcf, workdir = workdir_vcf, run_merge=True, run_resample=True, run_import=True)

In [None]:
%matplotlib inline
import plotter
reload(plotter)
try:
    plotter.plot('raster.o_32_rst_%s' % tag_vcf)
except Exception as e:
    print("Got this error: " + str(e))
    print("Didn't work, use QGIS!")
    pass

### Import countries of the world shapefile

This is actually not a raster but vector data of polygons.  But since it serves conceptually similar function as raster (specify attribute for a given geographic location), I treat it as if it is a raster dataset.  

In [None]:
if not os.path.exists(os.path.join(workdir_regnum, 'All_Countries.shp')):
    subprocess.run(['wget', '-P', workdir_regnum, 
                    'https://s3-us-west-2.amazonaws.com/earthlab-finn/All_Countries.zip'], 
                   check=True)
    subprocess.run(['unzip', os.path.join(workdir_regnum, 'All_Countries.zip'), '-d' , workdir_regnum ], 
                  check=True)

In [None]:
reload(polygon_import)
polygon_import.main('regnum', shpname = os.path.join(workdir_regnum, 'All_Countries.shp'))