Skip to content

Commit 3ee6b94

Browse files
committed
Improve checks for data import
1 parent 5c8d4e2 commit 3ee6b94

File tree

5 files changed

+76
-35
lines changed

5 files changed

+76
-35
lines changed

importer/src/hierarchy.cpp

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -114,43 +114,56 @@ void Hierarchy::finalize()
114114
}
115115

116116
std::cout << "Hierarchy: active items: " << index - 1
117-
<< " / cleared items: " << m_items.size() - (index - 1) << "\n";
117+
<< " / cleared items: " << m_items.size() - (index - 1) << std::flush;
118118
}
119119

120-
void Hierarchy::check_indexing()
120+
bool Hierarchy::check_indexing()
121121
{
122+
std::cout << "Check whether all items are indexed\n";
123+
124+
m_index_check_failed.clear();
125+
bool isok = true;
122126
for (auto itemp : m_items)
123127
{
124128
auto item = itemp.second;
125-
if (item->keep() && !item->indexed())
129+
if (item->keep(false) && !item->indexed())
126130
{
131+
isok = false;
127132
std::cout << "\nItem is not included into hierarchy while it should be\n";
128133
item->print_item(0);
129134

130-
std::cout << "\nItem part of hierarchy (child -> parent):\n" << item->id() << " ";
131-
auto i = item;
135+
std::cout << "\nItem part of hierarchy (child -> parent):\n";
136+
137+
auto i = item;
138+
std::set<hindex> ids;
132139
for (auto parent = item->parent_id(); parent != 0; parent = i->parent_id())
133140
{
134-
if (m_items.find(parent) != m_items.end())
141+
if (m_items.find(parent) == m_items.end())
135142
{
136143
std::cout << "\nCannot find parent with ID " << parent << "\n";
137144
break;
138145
}
139146

140-
if (i->id() == item->id())
147+
i = m_items[parent];
148+
i->print_item(0);
149+
150+
if (ids.count(i->id()) > 0)
141151
{
142152
std::cout << "\nCyclic branch detected\n";
153+
m_index_check_failed.insert(ids.begin(), ids.end());
143154
break;
144155
}
145-
146-
i = m_items[parent];
147-
std::cout << i->id() << " ";
156+
ids.insert(i->id());
148157
}
149158
std::cout << "\n\n";
150-
151-
throw std::runtime_error("Item is not included into hierarchy while it should be");
152159
}
153160
}
161+
162+
if (isok)
163+
std::cout << "Items indexing check passed\n";
164+
else
165+
std::cout << "Items indexing check FAILED\n";
166+
return isok;
154167
}
155168

156169
void Hierarchy::write(sqlite3pp::database &db) const

importer/src/hierarchy.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class Hierarchy
2323
void set_country(const std::string &country, hindex id);
2424
void cleanup();
2525
void finalize();
26-
void check_indexing();
26+
bool check_indexing();
2727
void write(sqlite3pp::database &db) const;
2828

2929
size_t get_missing_count() const { return m_root.size(); }
@@ -33,6 +33,8 @@ class Hierarchy
3333
hindex get_next_nonzero_root_parent() const;
3434
std::set<std::string> get_root_countries() const;
3535

36+
std::set<hindex> get_failed_indexes() const { return m_index_check_failed; }
37+
3638
void print(bool full = true) const;
3739
void print_root_with_parent_id(hindex parent_id) const;
3840

@@ -43,6 +45,7 @@ class Hierarchy
4345
std::map<hindex, std::shared_ptr<HierarchyItem> > m_items;
4446
std::map<hindex, std::set<std::shared_ptr<HierarchyItem> > > m_root;
4547
std::deque<std::shared_ptr<HierarchyItem> > m_root_finalized;
48+
std::set<hindex> m_index_check_failed;
4649
};
4750

4851
#endif

importer/src/hierarchyitem.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,26 @@ void HierarchyItem::load_skip_list(const std::string &fname)
8383
s_skip_types = load_list(fname);
8484
}
8585

86-
bool HierarchyItem::keep() const
86+
void HierarchyItem::drop()
8787
{
88+
m_dropped = true;
89+
}
90+
91+
bool HierarchyItem::keep(bool verbose) const
92+
{
93+
if (m_dropped)
94+
return false;
95+
8896
if (m_type.find_first_not_of(allowed_type_chars) != std::string::npos)
8997
{
90-
std::cout << "Dropping " << m_type << "\n";
98+
if (verbose)
99+
std::cout << "Dropping " << m_type << "\n";
91100
return false;
92101
}
102+
93103
if (s_skip_types.count(m_type) > 0)
94104
return false;
105+
95106
return !m_name.empty() || s_priority_types.count(m_type) > 0;
96107
}
97108

@@ -150,7 +161,7 @@ void HierarchyItem::set_parent(hindex parent, bool force)
150161
// c->set_parent(m_id, force);
151162
}
152163

153-
void HierarchyItem::cleanup_children(bool duplicate_only)
164+
void HierarchyItem::cleanup_children()
154165
{
155166
// as a result of this run, children that are supposed to be kept are staying in children
156167
// property. all disposed ones are still pointed to via Hierarchy map, but should not be accessed
@@ -193,10 +204,11 @@ void HierarchyItem::cleanup_children(bool duplicate_only)
193204
i->m_children.end());
194205
for (auto &i_children : i->m_children)
195206
i_children->set_parent(item->m_id, true);
207+
i->drop();
196208
}
197209

198210
if (had_duplicates)
199-
item->cleanup_children(true);
211+
item->cleanup_children();
200212

201213
m_children = children;
202214
}
@@ -271,8 +283,7 @@ void HierarchyItem::print_item(unsigned int offset) const
271283
std::cout << std::string(offset, ' ') << "- " << m_id << " ";
272284
if (!m_housenumber.empty())
273285
std::cout << "house " << m_housenumber << " ";
274-
for (const auto &i : m_data_name)
275-
std::cout << i.first << ": " << i.second << " ";
286+
std::cout << m_name << " ";
276287
std::cout << "(" << m_my_index << " " << m_last_child_index << ": "
277288
<< m_last_child_index - m_my_index << ": " << m_parent_id << ", " << m_country
278289
<< ", osmid=" << m_osm_id << ")\n";

importer/src/hierarchyitem.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,18 @@ class HierarchyItem
2020
hindex linked_id() const { return m_linked_id; }
2121
hindex parent_id() const { return m_parent_id; }
2222
const std::string &country() const { return m_country; }
23-
bool keep() const;
23+
bool keep(bool verbose = true) const;
2424
bool indexed() const { return m_my_index > 0; }
2525

26+
void drop();
27+
bool dropped() const { return m_dropped; }
28+
2629
const std::deque<std::shared_ptr<HierarchyItem> > &children() { return m_children; }
2730

2831
void add_child(std::shared_ptr<HierarchyItem> child);
2932
void add_linked(std::shared_ptr<HierarchyItem> linked);
3033
void set_parent(hindex parent, bool force = false);
31-
void cleanup_children(bool duplicate_only = false);
34+
void cleanup_children();
3235
sqlid index(sqlid idx, sqlid parent);
3336
void write(sqlite3pp::database &db) const;
3437

@@ -50,6 +53,7 @@ class HierarchyItem
5053
sqlid m_my_index{ 0 };
5154
sqlid m_parent_index{ 0 };
5255
sqlid m_last_child_index{ 0 };
56+
bool m_dropped{ false };
5357

5458
std::string m_type;
5559
float m_latitude;

importer/src/main.cpp

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -212,27 +212,26 @@ int main(int argc, char *argv[])
212212

213213
// find missing parents for root nodes
214214
std::cout << "Fill missing hierarchies. Root size: " << hierarchy.get_root_count() << "\n";
215-
for (hindex parent = hierarchy.get_next_nonzero_root_parent(); parent;)
215+
for (hindex parent = hierarchy.get_next_nonzero_root_parent(); parent;
216+
parent = hierarchy.get_next_nonzero_root_parent())
216217
{
217-
pqxx::result r = txn.exec_params(base_query + "where place_id=$1", parent);
218-
bool found = false;
219-
for (auto row : r)
218+
pqxx::result r = txn.exec_params(base_query + "where place_id=$1", parent);
219+
bool found = false;
220+
for (auto row : r)
220221
{
221-
std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
222-
hierarchy.add_item(item);
223-
found = true;
222+
std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
223+
hierarchy.add_item(item);
224+
found = true;
224225
}
225226

226227
if (!found)
227228
{
228-
std::cerr << "Missing parent with ID " << parent << " . Stopping import\n";
229-
hierarchy.print_root_with_parent_id(parent);
230-
std::cerr << "\nSQL:\n" << base_query + "where place_id=" << parent << "\n";
229+
std::cerr << "Missing parent with ID " << parent << " . Stopping import\n";
230+
hierarchy.print_root_with_parent_id(parent);
231+
std::cerr << "\nSQL:\n" << base_query + "where place_id=" << parent << "\n";
231232

232-
return -1;
233+
return -1;
233234
}
234-
235-
parent = hierarchy.get_next_nonzero_root_parent();
236235
}
237236

238237
// remove all items from hierarchy that are not supposed to be there
@@ -258,7 +257,18 @@ int main(int argc, char *argv[])
258257
}
259258

260259
hierarchy.finalize();
261-
hierarchy.check_indexing();
260+
if (!hierarchy.check_indexing())
261+
{
262+
std::set<hindex> problem = hierarchy.get_failed_indexes();
263+
for (hindex index : problem)
264+
txn.exec_params0("update placex set indexed_status=2 where place_id=$1", index);
265+
266+
txn.commit();
267+
268+
std::cout << "Requested to reindex Nominatim database (run nominatim --index) for "
269+
<< problem.size() << " records\n";
270+
return -3;
271+
}
262272

263273
txn.commit(); // finalize postgres transactions
264274

0 commit comments

Comments
 (0)