Skip to content

Commit

Permalink
v.db.join: speed up processing by using fewer db.execute commands (#3286
Browse files Browse the repository at this point in the history
)

* speed_up
* add comments
* remove addcolumn part
* remove unnecessary import
* use chunks
* remove not required import
* review MN; add TRANSACTION
* review tmszi

---------

Co-authored-by: Edouard Choinière <27212526+echoix@users.noreply.github.com>
  • Loading branch information
griembauer and echoix committed Apr 9, 2024
1 parent 2c93249 commit f98c3f2
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 42 deletions.
54 changes: 41 additions & 13 deletions scripts/v.db.addcolumn/v.db.addcolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,29 @@
# % key_desc: name type
# %end

import atexit
import os
from grass.exceptions import CalledModuleError
import grass.script as grass
from grass.script.utils import encode

rm_files = []


def cleanup():
for file in rm_files:
if os.path.isfile(file):
try:
os.remove(file)
except Exception as e:
grass.warning(
_("Unable to remove file {file}: {message}").format(
file=file, message=e
)
)


def main():
global rm_files
map = options["map"]
layer = options["layer"]
columns = options["columns"]
Expand All @@ -55,7 +73,7 @@ def main():
exists = bool(grass.find_file(map, element="vector", mapset=mapset)["file"])

if not exists:
grass.fatal(_("Vector map <%s> not found in current mapset") % map)
grass.fatal(_("Vector map <{}> not found in current mapset").format(map))

try:
f = grass.vector_db(map)[int(layer)]
Expand All @@ -79,28 +97,38 @@ def main():
driver = f["driver"]
column_existing = grass.vector_columns(map, int(layer)).keys()

add_str = "BEGIN TRANSACTION\n"
for col in columns:
if not col:
grass.fatal(_("There is an empty column. Did you leave a trailing comma?"))
col_name = col.split(" ")[0].strip()
if col_name in column_existing:
grass.error(_("Column <%s> is already in the table. Skipping.") % col_name)
grass.error(
_("Column <{}> is already in the table. Skipping.").format(col_name)
)
continue
grass.verbose(_("Adding column <%s> to the table") % col_name)
p = grass.feed_command(
"db.execute", input="-", database=database, driver=driver
grass.verbose(_("Adding column <{}> to the table").format(col_name))
add_str += f"ALTER TABLE {table} ADD COLUMN {col};\n"
add_str += "END TRANSACTION"
sql_file = grass.tempfile()
rm_files.append(sql_file)
cols_add_str = ",".join([col[0] for col in columns])
with open(sql_file, "w") as write_file:
write_file.write(add_str)
try:
grass.run_command(
"db.execute",
input=sql_file,
database=database,
driver=driver,
)
res = "ALTER TABLE {} ADD COLUMN {}".format(table, col)
p.stdin.write(encode(res))
grass.debug(res)
p.stdin.close()
if p.wait() != 0:
grass.fatal(_("Unable to add column <%s>.") % col)

except CalledModuleError:
grass.fatal(_("Error adding columns {}").format(cols_add_str))
# write cmd history:
grass.vector_history(map)


if __name__ == "__main__":
options, flags = grass.parser()
atexit.register(cleanup)
main()
102 changes: 73 additions & 29 deletions scripts/v.db.join/v.db.join.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,30 @@
# % description: Columns to exclude from the other table
# %end

import atexit
import os
import sys
import string
import grass.script as grass
from grass.exceptions import CalledModuleError

rm_files = []


def cleanup():
for file in rm_files:
if os.path.isfile(file):
try:
os.remove(file)
except Exception as e:
grass.warning(
_("Unable to remove file {file}: {message}").format(
file=file, message=e
)
)


def main():
global rm_files
map = options["map"]
layer = options["layer"]
column = options["column"]
Expand Down Expand Up @@ -109,14 +126,22 @@ def main():

# check if column is in map table
if column not in grass.vector_columns(map, layer):
grass.fatal(_("Column <%s> not found in table <%s>") % (column, maptable))
grass.fatal(
_("Column <{column}> not found in table <{table}>").format(
column=column, table=maptable
)
)

# describe other table
all_cols_ot = grass.db_describe(otable, driver=driver, database=database)["cols"]

# check if ocolumn is on other table
if ocolumn not in [ocol[0] for ocol in all_cols_ot]:
grass.fatal(_("Column <%s> not found in table <%s>") % (ocolumn, otable))
grass.fatal(
_("Column <{column}> not found in table <{table}>").format(
column=ocolumn, table=otable
)
)

# determine columns subset from other table
if not scolumns:
Expand All @@ -133,7 +158,11 @@ def main():
cols_to_add.append(col_ot)
break
if not found:
grass.warning(_("Column <%s> not found in table <%s>") % (scol, otable))
grass.warning(
_("Column <{column}> not found in table <{table}>").format(
column=scol, table=otable
)
)

# exclude columns from other table
if ecolumns:
Expand All @@ -145,15 +174,12 @@ def main():
# is SQL, so we lowercase the names here and in the test.
all_cols_tt = [name.lower() for name in all_cols_tt]

select = "SELECT $colname FROM $otable WHERE $otable.$ocolumn=$table.$column"
template = string.Template("UPDATE $table SET $colname=(%s);" % select)

cols_to_add_final = []
for col in cols_to_add:
# skip the vector column which is used for join
colname = col[0]
if colname == column:
continue

use_len = False
if len(col) > 2:
use_len = True
Expand All @@ -173,28 +199,45 @@ def main():

# add only the new column to the table
if colname.lower() not in all_cols_tt:
try:
grass.run_command(
"v.db.addcolumn", map=map, columns=colspec, layer=layer
)
except CalledModuleError:
grass.fatal(_("Error creating column <%s>") % colname)

stmt = template.substitute(
table=maptable,
column=column,
otable=otable,
ocolumn=ocolumn,
colname=colname,
cols_to_add_final.append(colspec)

cols_added = [col.split(" ")[0] for col in cols_to_add_final]
cols_added_str = ",".join(cols_added)
try:
grass.run_command(
"v.db.addcolumn", map=map, columns=cols_to_add_final, layer=layer
)
grass.debug(stmt, 1)
grass.verbose(_("Updating column <%s> of vector map <%s>...") % (colname, map))
try:
grass.write_command(
"db.execute", stdin=stmt, input="-", database=database, driver=driver
)
except CalledModuleError:
grass.fatal(_("Error filling column <%s>") % colname)
except CalledModuleError:
grass.fatal(_("Error creating columns <{}>").format(cols_added_str))

update_str = "BEGIN TRANSACTION\n"
for col in cols_added:
cur_up_str = (
f"UPDATE {maptable} SET {col} = (SELECT {col} FROM "
f"{otable} WHERE "
f"{otable}.{ocolumn}={maptable}.{column});\n"
)
update_str += cur_up_str
update_str += "END TRANSACTION"
grass.debug(update_str, 1)
grass.verbose(
_("Updating columns {columns} of vector map {map_name}...").format(
columns=cols_added_str, map_name=map
)
)
sql_file = grass.tempfile()
rm_files.append(sql_file)
with open(sql_file, "w") as write_file:
write_file.write(update_str)
try:
grass.run_command(
"db.execute",
input=sql_file,
database=database,
driver=driver,
)
except CalledModuleError:
grass.fatal(_("Error filling columns {}").format(cols_added_str))

# write cmd history
grass.vector_history(map)
Expand All @@ -204,4 +247,5 @@ def main():

if __name__ == "__main__":
options, flags = grass.parser()
atexit.register(cleanup)
sys.exit(main())

0 comments on commit f98c3f2

Please sign in to comment.