## Land Cover Attribution

In [1]:
import os
import arcpy
import exactextract
from tqdm import tqdm
import gc

<class 'ModuleNotFoundError'>: No module named 'exactextract'

In [2]:
import sys
print(sys.path)

['C:\\Program Files\\ArcGIS\\Pro\\Resources\\ArcPy', 'C:\\Users\\olivoshj\\AppData\\Local\\ESRI\\conda\\envs\\arcgispro-py3-clone-3\\DLLs', 'C:\\Users\\olivoshj\\AppData\\Local\\ESRI\\conda\\envs\\arcgispro-py3-clone-3\\Lib', 'C:\\Program Files\\ArcGIS\\Pro\\bin', 'C:\\Users\\olivoshj\\AppData\\Local\\ESRI\\conda\\envs\\arcgispro-py3-clone-3', '', 'C:\\Users\\olivoshj\\AppData\\Local\\ESRI\\conda\\envs\\arcgispro-py3-clone-3\\Lib\\site-packages', 'C:\\Program Files\\ArcGIS\\Pro\\Resources\\ArcToolbox\\Scripts', 'C:\\Program Files\\ArcGIS\\Pro\\Resources\\ArcToolBox\\Scripts', 'C:\\Program Files\\ArcGIS\\Pro\\Resources\\ArcToolBox\\Scripts\\archydro', 'C:\\Program Files\\ArcGIS\\Pro\\Resources\\ArcToolBox\\Scripts\\GRAIP', 'C:\\Users\\olivoshj\\AppData\\Local\\ESRI\\conda\\envs\\arcgispro-py3-clone-3\\Lib\\site-packages\\win32', 'C:\\Users\\olivoshj\\AppData\\Local\\ESRI\\conda\\envs\\arcgispro-py3-clone-3\\Lib\\site-packages\\win32\\lib', 'C:\\Users\\olivoshj\\AppData\\Local\\ESRI\\con

In [2]:
arcpy.env.overwriteOutput = True

In [None]:
# Path to PROBA-V Land Cover rasters (epoch 2019). Downloaded from https://zenodo.org/records/3939050
probav_folder = r"R:\FWL\Arismendi-Lab\Andres\Gilbert_Freshwater_Fish_Analysis\Revised_Analysis_NatureCommunications\Input_datasets\Environmental_datasets\PROBA-V_LandCover\CoverFraction"
# Path to feature class with species ranges
input_ranges = "Global_Grid_wgs84"
# Directory for intermediate processing file
temp_folder = r"D:\Andres\Dam_Project_D\scratch"

In [2]:
# create list with land cover rasters 
with arcpy.EnvManager(workspace=probav_folder):
    lc_rasters = arcpy.ListRasters()

lc_rasters # check list

<class 'NameError'>: name 'probav_folder' is not defined

In [5]:
# isolate names of variables of interest based on source files convention
lc_fields = ["LC_" + lc_ras.split("_")[5].replace("-","_").replace("_CoverFraction_layer","") for lc_ras in lc_rasters]
lc_fields

['LC_Crops',
 'LC_Grass',
 'LC_PermanentWater',
 'LC_SeasonalWater',
 'LC_Shrub',
 'LC_Snow',
 'LC_Tree']

In [6]:
add_fields_str = " FLOAT; ".join(lc_fields) + " FLOAT"
add_fields_str

'LC_Crops FLOAT; LC_Grass FLOAT; LC_PermanentWater FLOAT; LC_SeasonalWater FLOAT; LC_Shrub FLOAT; LC_Snow FLOAT; LC_Tree FLOAT'

In [7]:
# add fields to feature class to collect summary stats
arcpy.management.AddFields(input_ranges, add_fields_str)

In [8]:
# create temporary SQLite geopackage for better compatibility with exactextract library
arcpy.management.CreateSQLiteDatabase(
    out_database_name=fr"{temp_folder}\sqlite.gpkg",
    spatial_type="GEOPACKAGE"
)

<class 'arcgisscripting.ExecuteError'>: ERROR 000601: Cannot delete D:\Andres\Dam_Project_D\scratch\sqlite.gpkg.  May be locked by another application.
Failed to execute (CreateSQLiteDatabase).


In [9]:
# By far this seems to be the fastest method for the resolution (100m) and number of rasters of PROBA-V land cover
# not worried about projecting the layers, as exactextract's area-weighting is based on pixel counts

fields = ['OBJECTID'] + lc_fields

with arcpy.da.UpdateCursor(input_ranges, fields) as cursor:
    
    for row in tqdm(cursor,total=int(arcpy.management.GetCount(input_ranges)[0])):
        
        if None not in row[1:]:
            continue
            
        currRange=fr"{temp_folder}\sqlite.gpkg\currRange"
        
        arcpy.conversion.ExportFeatures(
            in_features=input_ranges,
            out_features=currRange,
            where_clause=f"OBJECTID = {row[0]}",
            field_mapping=fr'sci_name "sci_name" true true false 100 Text 0 0,First,#,{input_ranges},sci_name,0,99',

        )
        
        for lc_ras in lc_rasters:
            
            currStats = exactextract.exact_extract(rast=fr"{probav_folder}\{lc_ras}",
                                                   vec=currRange,
                                                   ops="mean",
                                                   include_cols="sci_name",
                                                   include_geom=False,
                                                   output='pandas'
                                                  )
            
            currField = "LC_" + lc_ras.split("_")[5].replace("-","_").replace("_CoverFraction_layer","")
            
            row[fields.index(currField)] = currStats['mean'][0]

            cursor.updateRow(row)

            # an attempt to handle the apparent memory leak of exactextract
            currStats = None
            gc.collect()

 46%|████▌     | 7618/16528 [1:23:56<1:38:11,  1.51it/s]﻿


<class 'MemoryError'>: bad allocation

In [8]:
# export csv table
arcpy.conversion.ExportTable(
    in_table=input_ranges,
    out_table=r"R:\FWL\Arismendi-Lab\Andres\Gilbert_Freshwater_Fish_Analysis\Revised_Analysis_NatureCommunications\Input_datasets\PROBAV_Attributes.csv",
    field_mapping=f'sci_name "sci_name" true true false 100 Text 0 0,First,#,{input_ranges},sci_name,0,99;LC_Crops "LC_Crops" true true false 4 Float 0 0,First,#,{input_ranges},LC_Crops,-1,-1;LC_Grass "LC_Grass" true true false 4 Float 0 0,First,#,{input_ranges},LC_Grass,-1,-1;LC_PermanentWater "LC_PermanentWater" true true false 4 Float 0 0,First,#,{input_ranges},LC_PermanentWater,-1,-1;LC_SeasonalWater "LC_SeasonalWater" true true false 4 Float 0 0,First,#,{input_ranges},LC_SeasonalWater,-1,-1;LC_Shrub "LC_Shrub" true true false 4 Float 0 0,First,#,{input_ranges},LC_Shrub,-1,-1;LC_Snow "LC_Snow" true true false 4 Float 0 0,First,#,{input_ranges},LC_Snow,-1,-1;LC_Tree "LC_Tree" true true false 4 Float 0 0,First,#,{input_ranges},LC_Tree,-1,-1'
)

In [None]:
# delete intermediate files
os.remove(f"{temp_folder}\sqlite.gpkg")