From c49cd5438655043b487f4d986961746ae58edb3a Mon Sep 17 00:00:00 2001 From: Pim Jansen Date: Tue, 27 Aug 2019 16:43:06 +0200 Subject: [PATCH] Added batch inserts for doctrine orm populate --- readme.md | 2 ++ src/Faker/ORM/Doctrine/Populator.php | 34 +++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/readme.md b/readme.md index 7c8148dfd6..6790c5ea21 100644 --- a/readme.md +++ b/readme.md @@ -452,6 +452,8 @@ print_r($insertedPKs); // ) ``` +**Note:** Due to the fact that `Faker` returns all the primary keys inserted, the memory consumption will go up drastically when you do batch inserts due to the big list of data. + In the previous example, the `Book` and `Author` models share a relationship. Since `Author` entities are populated first, Faker is smart enough to relate the populated `Book` entities to one of the populated `Author` entities. Lastly, if you want to execute an arbitrary function on an entity before insertion, use the fourth argument of the `addEntity()` method: diff --git a/src/Faker/ORM/Doctrine/Populator.php b/src/Faker/ORM/Doctrine/Populator.php index d4fe897c61..d4c5dfb879 100644 --- a/src/Faker/ORM/Doctrine/Populator.php +++ b/src/Faker/ORM/Doctrine/Populator.php @@ -3,6 +3,7 @@ namespace Faker\ORM\Doctrine; use Doctrine\Common\Persistence\ObjectManager; +use Faker\Generator; /** * Service class for populating a database using the Doctrine ORM or ODM. @@ -10,20 +11,35 @@ */ class Populator { + /** @var int */ + protected $batchSize; + + /** @var Generator */ protected $generator; + + /** @var ObjectManager|null */ protected $manager; + + /** @var array */ protected $entities = array(); + + /** @var array */ protected $quantities = array(); + + /** @var array */ protected $generateId = array(); /** - * @param \Faker\Generator $generator + * Populator constructor. + * @param Generator $generator * @param ObjectManager|null $manager + * @param int $batchSize */ - public function __construct(\Faker\Generator $generator, ObjectManager $manager = null) + public function __construct(Generator $generator, ObjectManager $manager = null, $batchSize = 1000) { $this->generator = $generator; $this->manager = $manager; + $this->batchSize = $batchSize; } /** @@ -55,6 +71,9 @@ public function addEntity($entity, $number, $customColumnFormatters = array(), $ /** * Populate the database using all the Entity classes previously added. * + * Please note that large amounts of data will result in more memory usage since the the Populator will return + * all newly created primary keys after executing. + * * @param null|EntityManager $entityManager A Doctrine connection object * * @return array A list of the inserted PKs @@ -72,9 +91,18 @@ public function execute($entityManager = null) foreach ($this->quantities as $class => $number) { $generateId = $this->generateId[$class]; for ($i=0; $i < $number; $i++) { - $insertedEntities[$class][]= $this->entities[$class]->execute($entityManager, $insertedEntities, $generateId); + $insertedEntities[$class][]= $this->entities[$class]->execute( + $entityManager, + $insertedEntities, + $generateId + ); + if (count($insertedEntities) % $this->batchSize === 0) { + $entityManager->flush(); + $entityManager->clear($class); + } } $entityManager->flush(); + $entityManager->clear($class); } return $insertedEntities;