In [1]:
import re
import urllib
import datetime as dt
from time import perf_counter
from pathlib import Path


import pandas as pd
import pyodbc
import sqlalchemy as sqla

import arcpy
from arcgis.features import GeoAccessor, GeoSeriesAccessor

def get_odbc_driver():
    # gets name of ODBC driver, with name "ODBC Driver <version> for SQL Server"
    drivers = [d for d in pyodbc.drivers() if 'ODBC Driver ' in d]
    
    if len(drivers) == 0:
        errmsg = f"ERROR. No usable ODBC Driver found for SQL Server." \
        f"drivers found include {drivers}. Check ODBC Administrator program" \
        "for more information."
        
        raise Exception (errmsg)
    else:
        d_versions = [re.findall('\d+', dv)[0] for dv in drivers] # [re.findall('\d+', dv)[0] for dv in drivers]
        latest_version = max([int(v) for v in d_versions])
        driver = f"ODBC Driver {latest_version} for SQL Server"
    
        return driver

driver = get_odbc_driver()

print("modules loaded.")

modules loaded.


In [18]:
# BASED ON TESTING, THIS IS A STILL SLOW BUT FASTER THAN ESRI CHUNK OF CODE TO RUN
# A SQL QUERY AND LOAD DIRECTLY INTO AN ESRI FEATURE CLASS

#-------------DEFINE PARAMETERS AND BUILD QUERY-----------------
ilut_tbl = 'ilut_combined2020_63_DPS'  # "TEST_DC_ilut_combined2020_63_DPS"
eto_tbl = 'raw_eto2020_DPS_latest'

jnkey_ilut = 'PARCELID'
ilut_cols = [jnkey_ilut, 'XCOORD', 'YCOORD', 'GISAc', 'JURIS', 'County', 'DU',
             'POP_TOT', 'HH_hh', 'ENR_K12', 'EMPTOT', 'EMPFOOD', 'EMPRET', 'EMPSVC', 'EMPIND', 
            'PT_TOT_RES', 'SOV_TOT_RES', 'HOV_TOT_RES', 'TRN_TOT_RES', 'BIK_TOT_RES', 'WLK_TOT_RES', 
             'VMT_TOT_RES']
ilut_cols = ', '.join(f"ilut.{fname}" for fname in ilut_cols)

jnkey_eto = 'PARCELID'
eto_cols = ['LU'] # exclude join key because you do not want duplicate columns
eto_cols = ', '.join(f"ilut.{fname}" for fname in eto_cols)

query_str = f"""SELECT
    {ilut_cols},
    {eto_cols}
    FROM {ilut_tbl} ilut
        JOIN {eto_tbl} eto
            ON ilut.{jnkey_ilut} = eto.{jnkey_eto}"""

rename_dict = {'DU': 'DU_TOT', 'LU': 'LUTYPE'}
output_gdb = r'I:\Projects\Darren\PPA3_GIS\PPA3_GIS.gdb'

lutag = ilut_tbl.split('ilut_combined')[1]
out_tbl_name = f"ppa_pclpnt{lutag}"
output_tbl = str(Path(output_gdb).joinpath(out_tbl_name))

In [19]:
#---------CREATE ITERABLE DATAFRAME WITH CHUNKS----------------
servername = 'SQL-SVR'
dbname = 'MTP2024'
trustedconn = 'yes'

conn_str = f"DRIVER={driver};" \
        f"SERVER={servername};" \
        f"DATABASE={dbname};" \
        f"Trusted_Connection={trustedconn}"

conn_str = urllib.parse.quote_plus(conn_str)
engine = sqla.create_engine(f"mssql+pyodbc:///?odbc_connect={conn_str}")

df_itr = pd.read_sql_query(sql=query_str, con=engine, chunksize=1000)
print("df chunk iterator created.")

#------GO THROUGH CHUNKS AND LOAD INTO FEATURE CLASS-----------------
st = perf_counter()

rowcnt = 0
for i, chunk in enumerate(df_itr):
    chunk = chunk.rename(columns=rename_dict)
    rowcnt += chunk.shape[0]
    chunk_s = pd.DataFrame.spatial.from_xy(chunk, x_column='XCOORD', y_column='YCOORD', sr=2226)
    if i == 0:
        print(f"creating feature class {output_tbl}...")
        chunk_s.spatial.to_featureclass(output_tbl, sanitize_columns=False)
        out_tbl_fnames = [f.name for f in arcpy.ListFields(output_tbl)]
        print("loading rows...")
    else:
        drecs = chunk.to_dict(orient='records')
        fields_to_use = [f for f in out_tbl_fnames if f in chunk.columns]
        fields_to_use.append('SHAPE@XY')
        with arcpy.da.InsertCursor(output_tbl, field_names=fields_to_use) as inscur:
            for rec in drecs:
                try:
                    coords = (float(rec['XCOORD']), float(rec['YCOORD']))
                    row = [rec[fname] for fname in out_tbl_fnames if fname in fields_to_use] # put into correct output order
                    row.append(coords)
                    inscur.insertRow(row)
                except RuntimeError as rte:
                    vlengths = {fname: len(v) for fname, v in rec.items() if isinstance(v, str)} # length of string vals in current row
                    fclengths = {f.name: f.length for f in arcpy.ListFields(output_tbl)} # defined field lengths in feature class
                    try:
                        for f, fc_flen in fclengths.items():
                            rowvlen = vlengths.get(f)
                            if rowvlen and rowvlen > fc_flen: # if needed, update the fc field length to accommodate the new string val
                                arcpy.management.AlterField(output_tbl, field=f, field_length=len(val))
                                print(f"\tupdated field {f} to accommodate longer string value.")
                        inscur.insertRow(row)
                    except:
                        import pdb; pdb.set_trace()
                    
                    continue
                    
                    
        if rowcnt % 100_000 == 0:
            print(f"\t{rowcnt} rows loaded...")
                
elapsed = round((perf_counter() - st) / 60, 1)
print(f"{rowcnt} rows inserted in {elapsed} mins.")
print(f"output table - {output_tbl}")

df chunk iterator created.
creating feature class I:\Projects\Darren\PPA3_GIS\PPA3_GIS.gdb\ppa_pclpnt2020_63_DPS...
loading rows...
	updated field LUTYPE to accommodate longer string value.
	updated field JURIS to accommodate longer string value.
	100000 rows loaded...
	updated field County to accommodate longer string value.
	200000 rows loaded...
	300000 rows loaded...
	400000 rows loaded...
	500000 rows loaded...
	600000 rows loaded...
	700000 rows loaded...
	800000 rows loaded...
830280 rows inserted in 7.8 mins.
output table - I:\Projects\Darren\PPA3_GIS\PPA3_GIS.gdb\ppa_pclpnt2020_63_DPS


In [28]:
# SPATIAL JOIN EJ DATA WITH FILTER APPLIED

pcltbl = output_tbl
pcl_ej_field = 'EJ_AREA'
ej_layer = r'I:\Projects\Darren\PPA3_GIS\PPA3_GIS.gdb\EJ_2025_final'

fl_ej = 'fl_ej'
fl_pcl = 'fl_pcl'

for fl in [fl_ej, fl_pcl]:
    if arcpy.Exists(fl): arcpy.management.Delete(fl)

ej_fields = ['EJ_Label', 'Notes']
arcpy.management.MakeFeatureLayer(ej_layer, fl_ej)

ej_filter = "EJ_Label <> 'Minority' Or Notes = 'Equity Priority Area'"
arcpy.management.SelectLayerByAttribute(fl_ej, where_clause=ej_filter)

if pcl_ej_field not in [f.name for f in arcpy.ListFields(pcltbl)]:
    arcpy.management.AddField(pcltbl, pcl_ej_field, 'SHORT')

print("updating EJ area tags for parcels...")
arcpy.management.MakeFeatureLayer(pcltbl, fl_pcl)
# set default to not be EJ area (0)
with arcpy.da.UpdateCursor(fl_pcl, [pcl_ej_field]) as ucur:
    for row in ucur:
        row[0] = 0 # 1 = EJ area
        ucur.updateRow(row)

# then for parcels within EJ areas, set EJ=1
arcpy.management.SelectLayerByLocation(fl_pcl, overlap_type='WITHIN', select_features=fl_ej)
with arcpy.da.UpdateCursor(fl_pcl, [pcl_ej_field]) as ucur:
    for row in ucur:
        row[0] = 1 # 1 = is EJ area
        ucur.updateRow(row)


arcpy.management.SelectLayerByAttribute(fl_pcl, selection_type='CLEAR_SELECTION')

print("updated EJ tags.")

updating EJ area tags for parcels...
updated EJ tags.


In [22]:
fi = arcpy.Describe(fl_ej).fieldInfo
dir(fi)

<geoprocessing field info object at 0x2d27e363990>