Skip to content

Commit

Permalink
Speeds up the import of ofx files by only doing one query at the end.
Browse files Browse the repository at this point in the history
The previous ofx import code performed one query for each imported transactions, which
was quite slow. The change consists of gathering all ofx transactions before doing the
query. The query must be wider to search for all matching accounts (in case the imported
transactions come from different accounts) and an enlarge date range (according to the
earliest and latest imported transaction). The rest of the code is identical to what was
done before. The final query is performed just before the matching dialog is displayed.
  • Loading branch information
jean authored and jeanlaroche committed Aug 2, 2020
1 parent 12ab85f commit 8551521
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 102 deletions.
69 changes: 1 addition & 68 deletions gnucash/import-export/import-backend.c
Expand Up @@ -607,7 +607,7 @@ matchmap_store_destination (GncImportMatchMap *matchmap,

/** @brief The transaction matching heuristics are here.
*/
static void split_find_match (GNCImportTransInfo * trans_info,
void split_find_match (GNCImportTransInfo * trans_info,
Split * split,
gint display_threshold,
double fuzzy_amount_difference)
Expand Down Expand Up @@ -821,66 +821,6 @@ static void split_find_match (GNCImportTransInfo * trans_info,
}
}/* end split_find_match */


/** /brief Iterate through all splits of the originating account of the given
transaction, and find all matching splits there. */
void gnc_import_find_split_matches(GNCImportTransInfo *trans_info,
gint process_threshold,
double fuzzy_amount_difference,
gint match_date_hardlimit)
{
GList * list_element;
Query *query = qof_query_create_for(GNC_ID_SPLIT);
g_assert (trans_info);

/* Get list of splits of the originating account. */
{
/* We used to traverse *all* splits of the account by using
xaccAccountGetSplitList, which is a bad idea because 90% of these
splits are outside the date range that is interesting. We should
rather use a query according to the date region, which is
implemented here.
*/
Account *importaccount =
xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (trans_info));
time64 download_time = xaccTransGetDate (gnc_import_TransInfo_get_trans (trans_info));

qof_query_set_book (query, gnc_get_current_book());
xaccQueryAddSingleAccountMatch (query, importaccount,
QOF_QUERY_AND);
xaccQueryAddDateMatchTT (query,
TRUE, download_time - match_date_hardlimit * 86400,
TRUE, download_time + match_date_hardlimit * 86400,
QOF_QUERY_AND);
list_element = qof_query_run (query);
/* Sigh. Doesn't help too much. We still create and run one query
for each imported transaction. Maybe it would improve
performance further if there is one single (master-)query at
the beginning, matching the full date range and all accounts in
question. However, this doesn't quite work because this function
here is called from each gnc_gen_trans_list_add_trans(), which
is called one at a time. Therefore the whole importer would
have to change its behaviour: Accept the imported txns via
gnc_gen_trans_list_add_trans(), and only when
gnc_gen_trans_list_run() is called, then calculate all the
different match candidates. That's too much work for now.
*/
}

/* Traverse that list, calling split_find_match on each one. Note
that xaccAccountForEachSplit is declared in Account.h but
implemented nowhere :-( */
while (list_element != NULL)
{
split_find_match (trans_info, list_element->data,
process_threshold, fuzzy_amount_difference);
list_element = g_list_next (list_element);
}

qof_query_destroy (query);
}


/***********************************************************************
*/

Expand Down Expand Up @@ -1222,13 +1162,6 @@ gnc_import_TransInfo_init_matches (GNCImportTransInfo *trans_info,
GNCImportMatchInfo * best_match = NULL;
g_assert (trans_info);


/* Find all split matches in originating account. */
gnc_import_find_split_matches(trans_info,
gnc_import_Settings_get_display_threshold (settings),
gnc_import_Settings_get_fuzzy_amount (settings),
gnc_import_Settings_get_match_date_hardlimit (settings));

if (trans_info->match_list != NULL)
{
trans_info->match_list = g_list_sort(trans_info->match_list,
Expand Down
32 changes: 9 additions & 23 deletions gnucash/import-export/import-backend.h
Expand Up @@ -66,34 +66,20 @@ typedef enum _action
* online_id. */
gboolean gnc_import_exists_online_id (Transaction *trans);

/** Iterate through all splits of the originating account of the given
* transaction, find all matching splits there, and store them in the
* GNCImportTransInfo structure.
/** Evaluates the match between trans_info and split using the provided parameters.
*
* @param trans_info The TransInfo for which the corresponding
* matching existing transactions should be found.
* @param trans_info The TransInfo for the imported transaction
*
* @param process_threshold Each match whose heuristics are smaller
* than this value is totally ignored.
* @param split The register split that should be evaluated for a match.
*
* @param fuzzy_amount_difference For fuzzy amount matching, a certain
* fuzzyness in the matching amount is allowed up to this value. May
* be e.g. 3.00 dollars for ATM fees, or 0.0 if you only want to allow
* exact matches.
* @param display_threshold Minimum match score to include split in the list of matches.
*
* @param match_date_hardlimit The number of days that a matching
* split may differ from the given transaction before it is discarded
* immediately. In other words, any split that is more distant from
* the given transaction than this match_date_hardlimit days will be
* ignored altogether. For use cases without paper checks (e.g. HBCI),
* values like 14 (days) might be appropriate, whereas for use cases
* with paper checks (e.g. OFX, QIF), values like 42 (days) seem more
* appropriate.
* @param fuzzy_amount_difference Maximum amount difference to consider the match good.
*/
void gnc_import_find_split_matches(GNCImportTransInfo *trans_info,
gint process_threshold,
double fuzzy_amount_difference,
gint match_date_hardlimit);
void split_find_match (GNCImportTransInfo * trans_info,
Split * split,
gint display_threshold,
double fuzzy_amount_difference);

/** Iterates through all splits of the originating account of
* trans_info. Sorts the resulting list and sets the selected_match
Expand Down
114 changes: 103 additions & 11 deletions gnucash/import-export/import-main-matcher.c
Expand Up @@ -52,6 +52,7 @@
#include "gnc-component-manager.h"
#include "guid.h"
#include "gnc-session.h"
#include "Query.h"

#define GNC_PREFS_GROUP "dialogs.import.generic.transaction-list"
#define IMPORT_MAIN_MATCHER_CM_CLASS "transaction-matcher-dialog"
Expand Down Expand Up @@ -109,6 +110,8 @@ gboolean on_matcher_delete_event (GtkWidget *widget, GdkEvent *event, gpointer d
void on_matcher_help_clicked (GtkButton *button, gpointer user_data);
void on_matcher_help_close_clicked (GtkButton *button, gpointer user_data);

static void gnc_gen_trans_list_finalize (GNCImportMainMatcher *gui);

/* Local prototypes */
static void gnc_gen_trans_assign_transfer_account (
GtkTreeView *treeview,
Expand Down Expand Up @@ -191,6 +194,7 @@ gboolean gnc_gen_trans_list_empty(GNCImportMainMatcher *info)

void gnc_gen_trans_list_show_all(GNCImportMainMatcher *info)
{
gnc_gen_trans_list_finalize (info);
gtk_widget_show_all (GTK_WIDGET (info->main_widget));
}

Expand Down Expand Up @@ -1435,8 +1439,6 @@ void gnc_gen_trans_list_add_trans_with_ref_id (GNCImportMainMatcher *gui, Transa
GNCImportTransInfo * transaction_info = NULL;
GtkTreeModel *model;
GtkTreeIter iter;
GNCImportMatchInfo *selected_match;
gboolean match_selected_manually;
g_assert (gui);
g_assert (trans);

Expand All @@ -1447,26 +1449,116 @@ void gnc_gen_trans_list_add_trans_with_ref_id (GNCImportMainMatcher *gui, Transa
{
transaction_info = gnc_import_TransInfo_new (trans, NULL);
gnc_import_TransInfo_set_ref_id (transaction_info, ref_id);
model = gtk_tree_view_get_model (gui->view);
gtk_list_store_append (GTK_LIST_STORE(model), &iter);
refresh_model_row (gui, model, &iter, transaction_info);
}
return;
}

// This creates a list of all splits that could match one of the imported transactions based on their
// account and date.
static GList*
create_list_of_potential_matches (GtkTreeModel* model, Query *query, gint match_date_hardlimit)
{
GList* list_element = NULL;
GtkTreeIter iter;
Account *import_trans_account;
time64 import_trans_time, min_time=G_MAXINT64, max_time=0;
GList* all_accounts = NULL;

// Go through all imported transactions, gather the list of accounts, and min/max date range.
gboolean valid = gtk_tree_model_get_iter_first (model, &iter);
while(valid)
{
GNCImportTransInfo* transaction_info;
gtk_tree_model_get (model, &iter,
DOWNLOADED_COL_DATA, &transaction_info,
-1);
import_trans_account = xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (transaction_info));
all_accounts = g_list_prepend (all_accounts, import_trans_account);
import_trans_time = xaccTransGetDate (gnc_import_TransInfo_get_trans (transaction_info));
min_time = MIN (min_time, import_trans_time);
max_time = MAX (max_time, import_trans_time);

valid = gtk_tree_model_iter_next (model, &iter);
}

gnc_import_TransInfo_init_matches (transaction_info,
gui->user_settings);
// Make a query to find splits with the right accounts and dates.
qof_query_set_book (query, gnc_get_current_book());
xaccQueryAddAccountMatch (query, all_accounts,
QOF_GUID_MATCH_ANY, QOF_QUERY_AND);
xaccQueryAddDateMatchTT (query,
TRUE, min_time - match_date_hardlimit * 86400,
TRUE, max_time + match_date_hardlimit * 86400,
QOF_QUERY_AND);
list_element = qof_query_run (query);
g_list_free (all_accounts);
return list_element;
}

selected_match =
gnc_import_TransInfo_get_selected_match (transaction_info);
match_selected_manually =
gnc_import_TransInfo_get_match_selected_manually (transaction_info);
static void
gnc_gen_trans_list_finalize (GNCImportMainMatcher *gui)
{
GtkTreeModel* model = gtk_tree_view_get_model (gui->view);
GtkListStore* store = GTK_LIST_STORE (model);
GtkTreeIter iter;
GNCImportMatchInfo *selected_match;
gboolean match_selected_manually;
Account *importaccount;
GList* list_element = NULL;
GList* iter_2 = NULL;
Query *query;
gint match_date_hardlimit = gnc_import_Settings_get_match_date_hardlimit (gui->user_settings);
gint display_threshold = gnc_import_Settings_get_display_threshold (gui->user_settings);
double fuzzy_amount = gnc_import_Settings_get_fuzzy_amount (gui->user_settings);
gboolean valid;

query = qof_query_create_for (GNC_ID_SPLIT);
// Gather the list of splits that could match one of the imported transactions
list_element = create_list_of_potential_matches (model, query, match_date_hardlimit);

// For each imported transaction, go through list_element, select the ones that have the
// right account, and evaluate how good a match they are.
valid = gtk_tree_model_get_iter_first (model, &iter);
while(valid)
{
GNCImportTransInfo* transaction_info;
Account *importaccount;
gtk_tree_model_get (model, &iter,
DOWNLOADED_COL_DATA, &transaction_info,
-1);
importaccount = xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (transaction_info));

iter_2 = list_element;
while (iter_2 != NULL)
{
// One issue here is that the accounts may not match since the query was
// called for all accounts so we need to check here.
Account* split_account = xaccSplitGetAccount(iter_2->data);
if (qof_instance_guid_compare (importaccount, split_account) == 0)
split_find_match (transaction_info, iter_2->data, display_threshold, fuzzy_amount);
iter_2 = g_list_next (iter_2);
}

// Sort the matches, select the best match, and set the action.
gnc_import_TransInfo_init_matches (transaction_info, gui->user_settings);

selected_match = gnc_import_TransInfo_get_selected_match (transaction_info);
match_selected_manually = gnc_import_TransInfo_get_match_selected_manually (transaction_info);

if (selected_match)
gnc_import_PendingMatches_add_match (gui->pending_matches,
selected_match,
match_selected_manually);

model = gtk_tree_view_get_model (gui->view);
gtk_tree_store_append (GTK_TREE_STORE(model), &iter, NULL);
refresh_model_row (gui, model, &iter, transaction_info);
valid = gtk_tree_model_iter_next (model, &iter);
}

qof_query_destroy (query);
return;
}/* end gnc_import_add_trans_with_ref_id() */
}

GtkWidget *gnc_gen_trans_list_widget (GNCImportMainMatcher *info)
{
Expand Down

0 comments on commit 8551521

Please sign in to comment.