Skip to content

Commit

Permalink
- avoids now deprecated method in Pandas, DataFrame.append
Browse files Browse the repository at this point in the history
- sorts dmu
  • Loading branch information
ethoms-usgs committed Dec 7, 2023
1 parent 5457f7a commit ab3eb9c
Showing 1 changed file with 41 additions and 47 deletions.
88 changes: 41 additions & 47 deletions Scripts/GeMS_GeolexCheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,10 @@
import GeMS_utilityFunctions as guf


versionString = "GeMS_GeolexCheck.py, 8/21/23"
rawurl = "https://raw.githubusercontent.com/DOI-USGS/gems-tools-pro/master/Scripts/GeMS_GeolexCheck.py"
versionString = "GeMS_GeolexCheck.py, 12/7/23"
rawurl = "https://raw.githubusercontent.com/DO/I-USGS/gems-tools-pro/master/Scripts/GeMS_GeolexCheck.py"
guf.checkVersion(versionString, rawurl, "gems-tools-pro")

# initialize empty list to collect usage matches in order to avoid
# displaying redundant matches.
usages = []


# STRING AND USAGE
def sanitize_text(usage_text):
Expand Down Expand Up @@ -255,6 +251,11 @@ def frame_it(d_path, ext_format):
# in the input dmu
dmu_df.columns = [c.lower() for c in dmu_df.columns]

# hope there is hierarchykey to sort on
if "hierarchykey" in dmu_df.columns:
print("sorting")
dmu_df.sort_values("hierarchykey", inplace=True)

return dmu_df


Expand Down Expand Up @@ -370,7 +371,7 @@ def format_excel(xlf):


# START
# ------------------------------------------------------------------------
# -----------------------------------------------------------------------
if len(sys.argv) == 1:
print(__doc__)
quit()
Expand Down Expand Up @@ -435,39 +436,32 @@ def format_excel(xlf):
else:
open_xl = True

# units table of geolex db
this_py = os.path.realpath(__file__)
geolex_db = os.path.join(
os.path.dirname(this_py), "..", "Resources", "geolex_units.json"
)

# set up a pandas data frame
d = {}
df = pd.DataFrame(
columns=[
"HierarchyKey",
"MapUnit",
"Name",
"Fullname",
"Age",
"Extent", # DMU Contents
"GeolexID",
"Name",
"Usage",
"Age",
"Extent",
"URL", # Geolex Results
"Extent Match?",
"Usage Match?",
"Age Match?",
"Remarks",
"References",
]
) # Author Review

df["HierarchyKey"] = df["HierarchyKey"].astype("object")

fields = ["hierarchykey", "mapunit", "name", "fullname", "age"]
cols = [
"HierarchyKey",
"MapUnit",
"Name",
"Fullname",
"Age",
"Extent", # DMU Contents
"GeolexID",
"Name",
"Usage",
"Age",
"Extent",
"URL", # Geolex Results
"Extent Match?",
"Usage Match?",
"Age Match?",
"Remarks",
"References",
]

# initialize an empty list that will hold all of data frame data
data = []

# initialize empty list to collect usage matches in order to avoid
# displaying redundant matches.
usages = []

n = 0
for row in dmu_df.itertuples():
Expand Down Expand Up @@ -545,7 +539,6 @@ def format_excel(xlf):
# initiate this row filling out the first 6 columns
# needs to be defined outside of 'if matches' statement below for the case where
# there are no valid matches
# unit_list = [mu, fn, fm, age, ext]
unit_list = [hkey, mu, sn, fn, age, ", ".join(dmu_exts)]

# initialize counter to determine contents of unit_list as matches are recorded
Expand Down Expand Up @@ -618,9 +611,8 @@ def format_excel(xlf):
]
)

# add list to dataframe
unit_series = pd.Series(unit_list, index=df.columns)
df = df.append(unit_series, ignore_index=True)
# add list to data list
data.append(unit_list)

n = 1
i = 1
Expand Down Expand Up @@ -649,9 +641,11 @@ def format_excel(xlf):
["", "", "", "", "", "", "no", "", "", "", ""]
)

# add list to dataframe
unit_series = pd.Series(unit_list, index=df.columns)
df = df.append(unit_series, ignore_index=True)
# add list to data list
data.append(unit_list)

# make the data frame
df = pd.DataFrame(data, columns=cols, dtype="string")

xl_path = os.path.join(dmu_home, f"{out_name}_namescheck.xlsx")
arcpy.AddMessage(f"Saving {xl_path}")
Expand Down

0 comments on commit ab3eb9c

Please sign in to comment.