From ab3eb9c7a540af3e4f5d8a455a65d648b58a6716 Mon Sep 17 00:00:00 2001
From: Evan Thoms <ethoms@usgs.gov>
Date: Thu, 7 Dec 2023 12:16:55 -0900
Subject: [PATCH] - avoids now deprecated method in Pandas, DataFrame.append -
 sorts dmu

---
 Scripts/GeMS_GeolexCheck.py | 88 +++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 47 deletions(-)

diff --git a/Scripts/GeMS_GeolexCheck.py b/Scripts/GeMS_GeolexCheck.py
index 87ecb80..b80bffc 100644
--- a/Scripts/GeMS_GeolexCheck.py
+++ b/Scripts/GeMS_GeolexCheck.py
@@ -36,14 +36,10 @@
 import GeMS_utilityFunctions as guf
 
 
-versionString = "GeMS_GeolexCheck.py, 8/21/23"
-rawurl = "https://raw.githubusercontent.com/DOI-USGS/gems-tools-pro/master/Scripts/GeMS_GeolexCheck.py"
+versionString = "GeMS_GeolexCheck.py, 12/7/23"
+rawurl = "https://raw.githubusercontent.com/DO/I-USGS/gems-tools-pro/master/Scripts/GeMS_GeolexCheck.py"
 guf.checkVersion(versionString, rawurl, "gems-tools-pro")
 
-# initialize empty list to collect usage matches in order to avoid
-# displaying redundant matches.
-usages = []
-
 
 # STRING AND USAGE
 def sanitize_text(usage_text):
@@ -255,6 +251,11 @@ def frame_it(d_path, ext_format):
     # in the input dmu
     dmu_df.columns = [c.lower() for c in dmu_df.columns]
 
+    # hope there is  hierarchykey to sort on
+    if "hierarchykey" in dmu_df.columns:
+        print("sorting")
+        dmu_df.sort_values("hierarchykey", inplace=True)
+
     return dmu_df
 
 
@@ -370,7 +371,7 @@ def format_excel(xlf):
 
 
 # START
-# ------------------------------------------------------------------------
+# -----------------------------------------------------------------------
 if len(sys.argv) == 1:
     print(__doc__)
     quit()
@@ -435,39 +436,32 @@ def format_excel(xlf):
 else:
     open_xl = True
 
-# units table of geolex db
-this_py = os.path.realpath(__file__)
-geolex_db = os.path.join(
-    os.path.dirname(this_py), "..", "Resources", "geolex_units.json"
-)
-
-# set up a pandas data frame
-d = {}
-df = pd.DataFrame(
-    columns=[
-        "HierarchyKey",
-        "MapUnit",
-        "Name",
-        "Fullname",
-        "Age",
-        "Extent",  # DMU Contents
-        "GeolexID",
-        "Name",
-        "Usage",
-        "Age",
-        "Extent",
-        "URL",  # Geolex Results
-        "Extent Match?",
-        "Usage Match?",
-        "Age Match?",
-        "Remarks",
-        "References",
-    ]
-)  # Author Review
-
-df["HierarchyKey"] = df["HierarchyKey"].astype("object")
-
-fields = ["hierarchykey", "mapunit", "name", "fullname", "age"]
+cols = [
+    "HierarchyKey",
+    "MapUnit",
+    "Name",
+    "Fullname",
+    "Age",
+    "Extent",  # DMU Contents
+    "GeolexID",
+    "Name",
+    "Usage",
+    "Age",
+    "Extent",
+    "URL",  # Geolex Results
+    "Extent Match?",
+    "Usage Match?",
+    "Age Match?",
+    "Remarks",
+    "References",
+]
+
+# initialize an empty list that will hold all of data frame data
+data = []
+
+# initialize empty list to collect usage matches in order to avoid
+# displaying redundant matches.
+usages = []
 
 n = 0
 for row in dmu_df.itertuples():
@@ -545,7 +539,6 @@ def format_excel(xlf):
         # initiate this row filling out the first 6 columns
         # needs to be defined outside of 'if matches' statement below for the case where
         # there are no valid matches
-        # unit_list = [mu, fn, fm, age, ext]
         unit_list = [hkey, mu, sn, fn, age, ", ".join(dmu_exts)]
 
         # initialize counter to determine contents of unit_list as matches are recorded
@@ -618,9 +611,8 @@ def format_excel(xlf):
                                 ]
                             )
 
-                            # add list to dataframe
-                            unit_series = pd.Series(unit_list, index=df.columns)
-                            df = df.append(unit_series, ignore_index=True)
+                            # add list to data list
+                            data.append(unit_list)
 
                             n = 1
                             i = 1
@@ -649,9 +641,11 @@ def format_excel(xlf):
                     ["", "", "", "", "", "", "no", "", "", "", ""]
                 )
 
-            # add list to dataframe
-            unit_series = pd.Series(unit_list, index=df.columns)
-            df = df.append(unit_series, ignore_index=True)
+            # add list to data list
+            data.append(unit_list)
+
+# make the data frame
+df = pd.DataFrame(data, columns=cols, dtype="string")
 
 xl_path = os.path.join(dmu_home, f"{out_name}_namescheck.xlsx")
 arcpy.AddMessage(f"Saving {xl_path}")