Skip to content

Commit

Permalink
last checkpoint: include cities in the nested set model calculation
Browse files Browse the repository at this point in the history
Previously, the contents of the country files were read and inserted into the database in chunks of 1000.
Then the Nested Set Model is built by reading the hierarchy.txt file and updating the records in the database(slow).

At this point, the Nested Set Model is built first by reading the hierarchy.txt file, and combining it with the contents
of the admin2Codes.txt file which contains the hierarchy for cities(Even though geonames.org has up to ADM5 division,
it cuts-off at ADM2 for cities). After the nestedSet is built, the country files contents is mapped into a Geoname with
the nestedSet properties(_lft, _rgt, depth and parent_id) and inserted into the database
  • Loading branch information
Parables committed May 30, 2023
1 parent 7e55142 commit 7a33477
Show file tree
Hide file tree
Showing 10 changed files with 370 additions and 201 deletions.
9 changes: 9 additions & 0 deletions src/Actions/BuildNestedSetModelAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ class BuildNestedSetModelAction
*/
public function execute(array $hierarchy = [], bool $nestChildren = false): array
{
ini_set('memory_limit', -1);

if (empty($hierarchy)) {
$this->toastable->toast('Hierarchy is empty... Skipping ...', 'error');
return [];
}

Expand All @@ -31,6 +34,7 @@ public function execute(array $hierarchy = [], bool $nestChildren = false): arra
*/
public function buildTree(array $hierarchy, int &$index = 1, bool $nestChildren = false): array
{
$this->toastable->toast('Building Root Node ...');
$rootId = array_key_first($hierarchy);
$depth = 0;

Expand All @@ -42,6 +46,7 @@ public function buildTree(array $hierarchy, int &$index = 1, bool $nestChildren
'parent_id' => null,
];

$this->toastable->toast('Building Children Nodes ...');
$children = $this->buildNodes(
hierarchy: $hierarchy,
parentId: $rootId,
Expand All @@ -56,6 +61,7 @@ public function buildTree(array $hierarchy, int &$index = 1, bool $nestChildren
$root['children'] = $children;
return $root;
}
$this->toastable->toast('Tree Built successfully...');
return [$rootId => $root] + $children;
// return array_merge([$rootId => $root], $children);
//$children[$rootId] = $root;
Expand All @@ -81,6 +87,7 @@ public function buildNodes(array $hierarchy, string|int $parentId, int &$index,
'parent_id' => $parentId,
];

$this->toastable->toast('Getting sub nodes for: ' . $id);
$children = $this->buildNodes(
hierarchy: $hierarchy,
parentId: $id,
Expand All @@ -98,6 +105,7 @@ public function buildNodes(array $hierarchy, string|int $parentId, int &$index,
$result = $result + [$id => $node] + $children;
}
}
$this->toastable->toast('Done.');
return $result;
}

Expand All @@ -106,6 +114,7 @@ public function buildNodes(array $hierarchy, string|int $parentId, int &$index,
*/
public function children(array $hierarchy, string|int $parentId): array
{
$this->toastable->toast('Getting children for parentId: ' . $parentId);
return Arr::wrap($hierarchy[$parentId] ?? []);
}

Expand Down
2 changes: 2 additions & 0 deletions src/Actions/GetHierarchyAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ class GetHierarchyAction
*/
public function execute(LazyCollection $contentsOfGeonameFiles): array
{
ini_set('memory_limit', -1);

return $this->hierarchyForCitiesTowns(contentsOfGeonameFiles: $contentsOfGeonameFiles);
}

Expand Down
11 changes: 6 additions & 5 deletions src/Actions/LoadGeonamesAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ public function execute(LazyCollection $geonamesCollection, int $chunkSize = 100
DB::table('geonames')->truncate();
}

return $geonamesCollection
->chunk($chunkSize)
->each(function (LazyCollection $collection) {
DB::table('geonames')->insertOrIgnore($collection->all());
});
$chunks = $geonamesCollection->chunk($chunkSize);

return $chunks->each(function (LazyCollection $collection, int $index) use ($chunks) {
$this->toastable->toast("Inserting next batch... " . ($index + 1) . "/" . $chunks->count());
DB::table('geonames')->insertOrIgnore($collection->all());
});
}
}
2 changes: 1 addition & 1 deletion src/Actions/ReadFileAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public function execute(string $fileName): LazyCollection
return LazyCollection::empty();
}

$this->toastable->toast('Reading file: ' . $fileName);
// $this->toastable->toast('Reading file: ' . $fileName);

$collection = LazyCollection::make(function () use ($fileName) {
$fileStream = fopen($fileName, 'r');
Expand Down
28 changes: 28 additions & 0 deletions src/Actions/ReadFilesAction.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

declare(strict_types=1);

namespace Parables\Geo\Actions;

use Illuminate\Support\LazyCollection;
use Parables\Geo\Actions\Concerns\HasToastable;

class ReadFilesAction
{
use HasToastable;

/**
* @param array<int,string> $fileNames
* @return LazyCollection<int, LazyCollection>
*/
public function execute(array $fileNames): LazyCollection
{
$readFileAction = (new ReadFileAction)->toastable($this->toastable);

return LazyCollection::make(function () use ($fileNames, $readFileAction) {
foreach ($fileNames as $fileName) {
yield $readFileAction->execute($fileName);
}
});
}
}
21 changes: 21 additions & 0 deletions src/Actions/ReadFilesActionTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

declare(strict_types=1);

namespace Parables\Geo\Actions;

use Illuminate\Support\Arr;
use Illuminate\Support\LazyCollection;
use Parables\Geo\Actions\Concerns\HasToastable;
use Parables\Geo\Actions\Fixtures\Toastable;

it('can read a list of files into a LazyCollection of file contents', function () {
$cacheFile = storage_path('/geo/countries.json');
$fileNames = array_map(
fn ($fileName) => $fileName . '.txt',
array_keys(Arr::wrap(json_decode(file_get_contents($cacheFile), associative: true)))
);
$files = (new ReadFilesAction)->toastable(new Toastable)->execute($fileNames);

expect($files->count())->toBe(253);
});
18 changes: 16 additions & 2 deletions src/Actions/TransformGeonamesAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,25 @@ class TransformGeonamesAction
use HasToastable;
/**
* @param LazyCollection $lines
* @param array $nestedSet
*/
public function execute(LazyCollection $lines, bool $toPayload = true, bool $idAsindex = true): LazyCollection
public function execute(LazyCollection $lines, array $nestedSet = [], bool $toPayload = true, bool $idAsindex = true): LazyCollection
{
$geonamesCollection = $lines->map(function (string $line, string &$key) use ($toPayload, $idAsindex) {
$geonamesCollection = $lines->map(function (string $line, string &$key) use ($nestedSet, $toPayload, $idAsindex) {
$geoname = GeoName::fromString($line);

$this->toastable->toast('Getting node from nestedSet...' . $geoname->id());
// INFO: Step1: Get parent using id
$node = $nestedSet[$geoname->id()] ?? null;
if (empty($node)) {
// INFO: Step2: Get parent using a concatenation of the countryCode.admin1Code.admin2Code(if present)
// Skip those with admin1Code === 00
} else {
$this->toastable->toast('Node Found...');
$geoname->nodeFromPayload($node);
}

$this->toastable->toast('Node not found for ' . $geoname->id() . ' ...');
$key = $idAsindex ? $geoname->id() : $key;
return $toPayload ? $geoname->toPayload() : $geoname;
});
Expand Down
21 changes: 15 additions & 6 deletions src/Actions/TransformGeonamesActionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,25 @@

$toastable = new Toastable();

$geonamesCollection = (new ReadFileAction)
$toastable->toast('Reading GH.txt... ');
$lines = (new ReadFileAction)
->toastable($toastable)
// ->execute(storage_path('geo/GH.txt'));
->execute(storage_path('geo/allCountries.txt'));
->execute(storage_path('geo/GH.txt'));
//->execute(storage_path('geo/allCountries.txt'));

$geonamesCollection = (new TransformGeonamesAction)
$toastable->toast('Reading hierarchy.txt...');
$nestedSet = (new BuildNestedSetModelAction)->toastable(new Toastable)->execute();

$stream = fopen(storage_path("geo/nestedSet.json"), 'w');
fwrite($stream, json_encode($nestedSet, JSON_PRETTY_PRINT));
fclose($stream);

$toastable->toast('Transforming GeoNames...');
$lines = (new TransformGeonamesAction)
->toastable($toastable)
->execute($geonamesCollection);
->execute($lines, $nestedSet);

print_r($geonamesCollection->all());
print_r($lines->all());

expect('hi')->toBe('hi');
});

0 comments on commit 7a33477

Please sign in to comment.