Skip to content

Commit

Permalink
fixed numberOfPages
Browse files Browse the repository at this point in the history
  • Loading branch information
RobertStony committed Aug 18, 2016
1 parent 1966d2c commit 2d6f7ba
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 28 deletions.
59 changes: 32 additions & 27 deletions entity_resolution.js
Expand Up @@ -353,34 +353,39 @@ function run (db) {
newData[element.id] = element
return newData
}, {})
updateEntities.forEach(function (matchSchema) {
var firstEntityId = matchSchema['id_entity1']
var secondEntityId = matchSchema['id_entity2']

var dataObject = Object.keys(matchSchema).reduce(function (newDataObject, key) {
if (matchSchema[key] === 1) {
newDataObject[key] = data[firstEntityId][key]
} else if (matchSchema[key] === 2) {
newDataObject[key] = data[secondEntityId][key]
}
return newDataObject
}, {})

dataObject = insertMissingValues(data[firstEntityId], dataObject)
dataObject = insertMissingValues(data[secondEntityId], dataObject)

console.log('replace entities:\n' +
JSON.stringify(data[firstEntityId]) + '\n\n' +
JSON.stringify(data[secondEntityId]) + '\n\n' +
JSON.stringify(dataObject) + '\n')

db.insertRow(dataObject)
db.deleteRow(firstEntityId)
db.deleteRow(secondEntityId)
})
console.log(updateEntities.length + ' entities replaced.')
if (updateEntities.length > 0) {
run(db)
try {
updateEntities.forEach(function (matchSchema) {
var firstEntityId = matchSchema['id_entity1']
var secondEntityId = matchSchema['id_entity2']

var dataObject = Object.keys(matchSchema).reduce(function (newDataObject, key) {
if (matchSchema[key] === 1) {
newDataObject[key] = data[firstEntityId][key]
} else if (matchSchema[key] === 2) {
newDataObject[key] = data[secondEntityId][key]
}
return newDataObject
}, {})

dataObject = insertMissingValues(data[firstEntityId], dataObject)
dataObject = insertMissingValues(data[secondEntityId], dataObject)

console.log('replace entities:\n' +
JSON.stringify(data[firstEntityId]) + '\n\n' +
JSON.stringify(data[secondEntityId]) + '\n\n' +
JSON.stringify(dataObject) + '\n')

db.insertRow(dataObject)
db.deleteRow(firstEntityId)
db.deleteRow(secondEntityId)
})
console.log(updateEntities.length + ' entities replaced.')
if (updateEntities.length > 0) {
run(db)
}
} catch (ex) {
console.log(ex, data[firstEntityId], data[secondEntityId])
}
})
}
Expand Down
2 changes: 1 addition & 1 deletion graduates.com/index.js
Expand Up @@ -46,7 +46,7 @@ function run (db, callbackScraper) {

$ = undefined

numberOfPages += 1
numberOfPages += districts.length

districts.forEach(function (link) {
counter += policy
Expand Down
28 changes: 28 additions & 0 deletions overpass-turbo.eu/address_types.md
@@ -0,0 +1,28 @@
#Address

| tag name | example | country | count of 5436 |
|--------------------|----------------------------------------|--------------------------------|---------------|
| addr\_street | tygerberg street | South Africa / Nigeria / Ghana | 99 / 33 / 6 |
| addr\_city | Kano | South Africa / Nigeria / Ghana | 77 / 30 /12 |
| street\_name | 3rd Avenue | Nigeria | 87 |
| house\_number | 58 | Nigeria | 87 |
| house\_numb | 123 | Nigeria | 33 |
| street\_nam | Dr Enerst Ugo Street | Nigeria | 33 |
| addr\_district | Tafawa-Balewa | Nigeria | 2191 |
| addr\_full | Zadawa, Hausari Zadawa, Askira Uba, BO | Nigeria | 2230 |
| addr\_municipality | Tapshin | Nigeria | 2191 |
| addr\_state | Bauchi | Nigeria | 2191 |
| addr\_housenumber | 124 | South Africa / Nigeria / Ghana | 38 / 3 / 1 |
| addr\_province | Mpumalanga | South Africa | 2 |
| addr\_subdistrict | Summerstrand | South Africa | 2 |
| addr\_ward | 2 | South Africa | 2 |
| addr\_postcode | 400 | South Africa / Nigeria | 56 / 90 |
| postcode | 400 | Nigeria | 33 |

#Telephone

| tag name | country | count of 5436 |
|----------------|--------------------------------|---------------|
| contact\_phone | Nigeria | 13 |
| telephone\_ | Nigeria | 1 |
| phone | South Africa \ Nigeria \ Ghana | 26 \ 7 \ 3 |

0 comments on commit 2d6f7ba

Please sign in to comment.