From ab3eb9c7a540af3e4f5d8a455a65d648b58a6716 Mon Sep 17 00:00:00 2001 From: Evan Thoms Date: Thu, 7 Dec 2023 12:16:55 -0900 Subject: [PATCH] - avoids now deprecated method in Pandas, DataFrame.append - sorts dmu --- Scripts/GeMS_GeolexCheck.py | 88 +++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 47 deletions(-) diff --git a/Scripts/GeMS_GeolexCheck.py b/Scripts/GeMS_GeolexCheck.py index 87ecb80..b80bffc 100644 --- a/Scripts/GeMS_GeolexCheck.py +++ b/Scripts/GeMS_GeolexCheck.py @@ -36,14 +36,10 @@ import GeMS_utilityFunctions as guf -versionString = "GeMS_GeolexCheck.py, 8/21/23" -rawurl = "https://raw.githubusercontent.com/DOI-USGS/gems-tools-pro/master/Scripts/GeMS_GeolexCheck.py" +versionString = "GeMS_GeolexCheck.py, 12/7/23" +rawurl = "https://raw.githubusercontent.com/DO/I-USGS/gems-tools-pro/master/Scripts/GeMS_GeolexCheck.py" guf.checkVersion(versionString, rawurl, "gems-tools-pro") -# initialize empty list to collect usage matches in order to avoid -# displaying redundant matches. -usages = [] - # STRING AND USAGE def sanitize_text(usage_text): @@ -255,6 +251,11 @@ def frame_it(d_path, ext_format): # in the input dmu dmu_df.columns = [c.lower() for c in dmu_df.columns] + # hope there is hierarchykey to sort on + if "hierarchykey" in dmu_df.columns: + print("sorting") + dmu_df.sort_values("hierarchykey", inplace=True) + return dmu_df @@ -370,7 +371,7 @@ def format_excel(xlf): # START -# ------------------------------------------------------------------------ +# ----------------------------------------------------------------------- if len(sys.argv) == 1: print(__doc__) quit() @@ -435,39 +436,32 @@ def format_excel(xlf): else: open_xl = True -# units table of geolex db -this_py = os.path.realpath(__file__) -geolex_db = os.path.join( - os.path.dirname(this_py), "..", "Resources", "geolex_units.json" -) - -# set up a pandas data frame -d = {} -df = pd.DataFrame( - columns=[ - "HierarchyKey", - "MapUnit", - "Name", - "Fullname", - "Age", - "Extent", # DMU Contents - "GeolexID", - "Name", - "Usage", - "Age", - "Extent", - "URL", # Geolex Results - "Extent Match?", - "Usage Match?", - "Age Match?", - "Remarks", - "References", - ] -) # Author Review - -df["HierarchyKey"] = df["HierarchyKey"].astype("object") - -fields = ["hierarchykey", "mapunit", "name", "fullname", "age"] +cols = [ + "HierarchyKey", + "MapUnit", + "Name", + "Fullname", + "Age", + "Extent", # DMU Contents + "GeolexID", + "Name", + "Usage", + "Age", + "Extent", + "URL", # Geolex Results + "Extent Match?", + "Usage Match?", + "Age Match?", + "Remarks", + "References", +] + +# initialize an empty list that will hold all of data frame data +data = [] + +# initialize empty list to collect usage matches in order to avoid +# displaying redundant matches. +usages = [] n = 0 for row in dmu_df.itertuples(): @@ -545,7 +539,6 @@ def format_excel(xlf): # initiate this row filling out the first 6 columns # needs to be defined outside of 'if matches' statement below for the case where # there are no valid matches - # unit_list = [mu, fn, fm, age, ext] unit_list = [hkey, mu, sn, fn, age, ", ".join(dmu_exts)] # initialize counter to determine contents of unit_list as matches are recorded @@ -618,9 +611,8 @@ def format_excel(xlf): ] ) - # add list to dataframe - unit_series = pd.Series(unit_list, index=df.columns) - df = df.append(unit_series, ignore_index=True) + # add list to data list + data.append(unit_list) n = 1 i = 1 @@ -649,9 +641,11 @@ def format_excel(xlf): ["", "", "", "", "", "", "no", "", "", "", ""] ) - # add list to dataframe - unit_series = pd.Series(unit_list, index=df.columns) - df = df.append(unit_series, ignore_index=True) + # add list to data list + data.append(unit_list) + +# make the data frame +df = pd.DataFrame(data, columns=cols, dtype="string") xl_path = os.path.join(dmu_home, f"{out_name}_namescheck.xlsx") arcpy.AddMessage(f"Saving {xl_path}")