# Spatial Features - Run Spatial Features

Spatial features (spfeas) is a landcover classification tool focused on leveraging imagery textures for classifying landcover. It has been used in the World Bank for both quantifying informal housing and estimating poverty

Running spatial features is a 5 part process:

0. Setting up gbdx for spatial features
1. Finding imagery
2. **Running spfeas**
    - **Checking spfeas results**
3. Downloading results from AWS
4. Stacking results
5. Running classification

### Running Spatial Features

Link to spfeas GITHUB for further reading  - https://github.com/jgrss/spfeas

In [8]:
# Define input datasets
AOI = "C:/Temp/inputAOI.shp"
spfeas_triggers = ['hog','lbpm','orb','seg','dmp','fourier','gabor','lac','mean','pantex','saliency','sfs','grad']
imageDefinition = [
    #catID, sensorType, spfeas_triggers
    ['104001004828C700', "WORLDVIEW03_VNIR", spfeas_triggers],
    ['10400100461E3600', "WORLDVIEW03_VNIR", spfeas_triggers],
    ['1030010088D20100', "WORLDVIEW02", spfeas_triggers],
    ['1040010043B94600', "WORLDVIEW03_VNIR", spfeas_triggers],
    ]
scales = '8 16 32'
block = '8'
saveImages = False #Set to true if you want to save the processed raw imagery to the S3 bucket
baseFolder = "Initials/AOIName" #Set the folder for the output. Found at s3browser.geobigdata.io. ie - bps/Mauritania/Nouakchott
outputFolder = "C:/Temp/Addis" #Used for evaluating spfeas results

In [9]:
import sys, os, inspect
import pandas as pd
import geopandas as gpd

from gbdxtools import Interface
from gbdxtools import CatalogImage
from shapely.geometry import box

cmd_folder = os.path.dirname(os.getcwd())
if cmd_folder not in sys.path:
    sys.path.insert(0, cmd_folder)

from GOST_GBDx_Tools import gbdxTasks
from GOST_GBDx_Tools import gbdxURL_misc
from GOST_GBDx_Tools import imagery_search

#In order for the interface to be properly authenticated, follow instructions here:
#   http://gbdxtools.readthedocs.io/en/latest/user_guide.html
#   For Ben, the .gbdx-config file belongs in C:\Users\WB411133 (CAUSE no one else qill f%*$&ing tell you that)
gbdx = Interface()
gbdx.s3.info
curTasks = gbdxTasks.GOSTTasks(gbdx)
gbdxUrl = gbdxURL_misc.gbdxURL(gbdx)

In [4]:
# Make sure the selected image is in the S3 buckets. If any of the 'state' are not **delivered**, then wait a day and run again
for img in imageDefinition:
    catID = img[0]
    print(gbdx.ordering.status(gbdx.ordering.order(catID)))

[{'state': 'delivered', 'acquisition_id': '103001008951A400', 'location': 's3://receiving-dgcs-tdgplatform-com/059308859010_01_003'}]
[{'state': 'delivered', 'acquisition_id': '1040010036917C00', 'location': 's3://receiving-dgcs-tdgplatform-com/058972576010_01_003'}]


In [None]:
inS = gpd.read_file(AOI)
inS = inS.to_crs({'init': u'epsg:4326'})
inGeom = inS['geometry'][0]

for curImg in imageDefinition:
    catID = curImg[0]
    sensor = curImg[1]
    curTriggers = curImg[2]
    
    #Get the intersecting area with the current image
    cImg = CatalogImage(catID)
    b = cImg.bounds
    bGeom = box(b[0], b[1], b[2], b[3])
    inGeom = bGeom.intersection(inS.geometry[0])   
    if inGeom.area > 0:
        for cJob in curTriggers:
            outFolder = "%s/%s/spfeas/%s" % (baseFolder, catID, cJob)
            imageFolder = "%s/%s/%s" % (baseFolder, catID, "clippedRaster")
            x = curTasks.createWorkflow(catID, str(inGeom.wkt), sensor, outFolder,
                        runCarFinder = 0, runSpfeas = 1, spfeasLoop = 0, downloadImages = int(saveImages),
                        aopPan=False, aopDra=False, aopAcomp=False, aopBands='PAN',
                        spfeasParams={"triggers":'%s' % cJob,"scales":scales, "block":block, 
                            "gdal_cache":'1024', "section_size":'5000', "n_jobs":'1'}, 
                            inRaster = '')
            id1 = x.execute()
    else:
        print("The selected image does not intersect the defined AOI")

In [5]:
#This line of code monitors your GBDx workflows
xx = gbdxUrl.monitorWorkflows(sleepTime=60)

In [6]:
xx

{'FAILED': {}, 'SUCCEEDED': {}}

# Checking SPFEAS results
There are often errors in processing spfeas, some are legitimate, some are random errors. The code below will generate a summary of the spfeas processing

In [None]:
location = r's3://gbd-customer-data/1c080e9c-02cc-4e2e-a8a2-bf05b8369eee/%s/' % baseFolder
spFile = os.path.join(outputFolder, "s3Contents_all.txt")
logging.basicConfig(level=logging.INFO)

# New version
if not os.path.exists(spFile):
    xx = gbdxUrl.listS3Contents(location, outFile=spFile, recursive=True)    
    gbdxUrl.executeAWS_file(xx, "C:/temp/s3Contents.bat")

#Open the outputFile and find all the yaml files
allYaml = []
with open(spFile, 'r') as inFile:
    for line in inFile:
        splitFolder = line.split(" ")
        cFile = splitFolder[-1].replace("\n", "")
        if cFile[-4:] == 'yaml':
            allYaml.append(cFile)

#Download the yaml files
for cYaml in allYaml:
    outYaml = os.path.join(outputFolder, os.path.basename(cYaml))
    cYaml = os.path.join("s3://gbd-customer-data/", cYaml)
    #if not os.path.exists(outYaml):
    xx = gbdxUrl.downloadS3Contents(cYaml, outYaml, recursive=False)
    gbdxUrl.executeAWS_file(xx, "C:/temp/s3Execution.bat")    

#Process the yaml files
curData = {}
for cYaml in allYaml:
    nameSplit = cYaml.split("/")
    if not len(nameSplit[-3]) > 8:
        outYaml = os.path.join(outputFolder, os.path.basename(cYaml))
        try:
            with open(outYaml, 'r') as yamlContents:
                yamlRes = yaml.load(yamlContents)
            #curData.append([nameSplit[-5], nameSplit[-3], yamlRes['ALL_FINISHED']])
            curRes = {nameSplit[-3]:yamlRes['ALL_FINISHED']}
            try:
                curData[nameSplit[-5]] = {**curData[nameSplit[-5]], **curRes}
            except:
                curData[nameSplit[-5]] = curRes
        except:
            print("Could not process %s" % cYaml)

finalPD = pd.DataFrame(curData)#, columns=["CAT_ID", "spfeas", "Finished"])
finalPD.to_csv("C:/temp/spfeasCheck.csv")    