Skip to content

Commit

Permalink
Merge pull request #2719 from bagerard/shane_skip_index_creation_on_save
Browse files Browse the repository at this point in the history
[Clone] Breaking change: Improve save() performance by skipping index creation
  • Loading branch information
bagerard committed Dec 30, 2022
2 parents 8b62b1f + 7094025 commit ae09e9b
Show file tree
Hide file tree
Showing 8 changed files with 162 additions and 46 deletions.
2 changes: 1 addition & 1 deletion benchmarks/test_basic_doc_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
StringField,
)

mongoengine.connect(db="mongoengine_benchmark_test")
mongoengine.connect(db="mongoengine_benchmark_test", w=1)


def timeit(f, n=10000):
Expand Down
15 changes: 5 additions & 10 deletions benchmarks/test_inserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,11 @@ def main():
setup = """
from pymongo import MongoClient
connection = MongoClient()
connection = MongoClient(w=1)
connection.drop_database('mongoengine_benchmark_test')
"""

stmt = """
from pymongo import MongoClient
connection = MongoClient()
db = connection.mongoengine_benchmark_test
noddy = db.noddy
Expand All @@ -29,13 +25,12 @@ def main():
"""

print("-" * 100)
print("PyMongo: Creating 10000 dictionaries.")
print('PyMongo: Creating 10000 dictionaries (write_concern={"w": 1}).')
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{t.timeit(1)}s")

stmt = """
from pymongo import MongoClient, WriteConcern
connection = MongoClient()
from pymongo import WriteConcern
db = connection.mongoengine_benchmark_test
noddy = db.noddy.with_options(write_concern=WriteConcern(w=0))
Expand Down Expand Up @@ -64,7 +59,7 @@ def main():
connection.close()
from mongoengine import Document, DictField, connect
connect("mongoengine_benchmark_test")
connect("mongoengine_benchmark_test", w=1)
class Noddy(Document):
fields = DictField()
Expand All @@ -82,7 +77,7 @@ class Noddy(Document):
"""

print("-" * 100)
print("MongoEngine: Creating 10000 dictionaries.")
print('MongoEngine: Creating 10000 dictionaries (write_concern={"w": 1}).')
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{t.timeit(1)}s")

Expand Down
87 changes: 87 additions & 0 deletions benchmarks/test_save_with_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import timeit


def main():
setup = """
from pymongo import MongoClient
connection = MongoClient()
connection.drop_database("mongoengine_benchmark_test")
connection.close()
from mongoengine import connect, Document, IntField, StringField
connect("mongoengine_benchmark_test", w=1)
class User0(Document):
name = StringField()
age = IntField()
class User1(Document):
name = StringField()
age = IntField()
meta = {"indexes": [["name"]]}
class User2(Document):
name = StringField()
age = IntField()
meta = {"indexes": [["name", "age"]]}
class User3(Document):
name = StringField()
age = IntField()
meta = {"indexes": [["name"]], "auto_create_index_on_save": True}
class User4(Document):
name = StringField()
age = IntField()
meta = {"indexes": [["name", "age"]], "auto_create_index_on_save": True}
"""

stmt = """
for i in range(10000):
User0(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 0 indexes.")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")

stmt = """
for i in range(10000):
User1(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 1 index.")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")

stmt = """
for i in range(10000):
User2(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 2 indexes.")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")

stmt = """
for i in range(10000):
User3(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 1 index (auto_create_index_on_save=True).")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")

stmt = """
for i in range(10000):
User4(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 2 indexes (auto_create_index_on_save=True).")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")


if __name__ == "__main__":
main()
9 changes: 8 additions & 1 deletion docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,18 @@ Changelog
Development
===========
- (Fill this out as you fix issues and develop your features).
- BREAKING CHANGE: Improved the performance of :meth:`~mongoengine.Document.save()`
by removing the call to :meth:`~mongoengine.Document.ensure_indexes` unless
``meta['auto_create_index_on_save']`` is set to True. With the default settings, Document indexes
will still be created on the fly, during the first usage of the collection (query, insert, etc),
they will just not be re-created whenever .save() is called.
- Added meta ``auto_create_index_on_save`` so you can enable index creation
on :meth:`~mongoengine.Document.save()` (as it was < 0.26.0).

Changes in 0.25.0
=================
- Support MONGODB-AWS authentication mechanism (with `authmechanismproperties`) #2507
- Turning off dereferencing for the results of distinct query. #2663
- Bug Fix - distinct query doesn't obey the ``no_dereference()``. #2663
- Add tests against Mongo 5.0 in pipeline
- Drop support for Python 3.6 (EOL)
- Bug fix support for PyMongo>=4 to fix "pymongo.errors.InvalidOperation: Cannot use MongoClient after close"
Expand Down
8 changes: 7 additions & 1 deletion docs/guide/defining-documents.rst
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ There are a few top level defaults for all indexes that can be set::
'index_background': True,
'index_cls': False,
'auto_create_index': True,
'auto_create_index_on_save': False,
}


Expand All @@ -588,10 +589,15 @@ There are a few top level defaults for all indexes that can be set::

:attr:`auto_create_index` (Optional)
When this is True (default), MongoEngine will ensure that the correct
indexes exist in MongoDB each time a command is run. This can be disabled
indexes exist in MongoDB when the Document is first used. This can be disabled
in systems where indexes are managed separately. Disabling this will improve
performance.

:attr:`auto_create_index_on_save` (Optional)
When this is True, MongoEngine will ensure that the correct
indexes exist in MongoDB each time :meth:`~mongoengine.document.Document.save`
is run. Enabling this will degrade performance. The default is False. This
option was added in version 0.25.

Compound Indexes and Indexing sub documents
-------------------------------------------
Expand Down
19 changes: 16 additions & 3 deletions mongoengine/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,7 @@ def _get_collection(cls):
cls._collection = db[collection_name]

# Ensure indexes on the collection unless auto_create_index was
# set to False.
# Also there is no need to ensure indexes on slave.
# set to False. Plus, there is no need to ensure indexes on slave.
db = cls._get_db()
if cls._meta.get("auto_create_index", True) and db.client.is_primary:
cls.ensure_indexes()
Expand Down Expand Up @@ -384,6 +383,10 @@ def save(
meta['cascade'] = True. Also you can pass different kwargs to
the cascade save using cascade_kwargs which overwrites the
existing kwargs with custom values.
.. versionchanged:: 0.26
save() no longer calls :meth:`~mongoengine.Document.ensure_indexes`
unless ``meta['auto_create_index_on_save']`` is set to True.
"""
signal_kwargs = signal_kwargs or {}

Expand All @@ -407,7 +410,13 @@ def save(
# it might be refreshed by the pre_save_post_validation hook, e.g., for etag generation
doc = self.to_mongo()

if self._meta.get("auto_create_index", True):
# Initialize the Document's underlying pymongo.Collection (+create indexes) if not already initialized
# Important to do this here to avoid that the index creation gets wrapped in the try/except block below
# and turned into mongoengine.OperationError
if self._collection is None:
_ = self._get_collection()
elif self._meta.get("auto_create_index_on_save", False):
# ensure_indexes is called as part of _get_collection so no need to re-call it again here
self.ensure_indexes()

try:
Expand Down Expand Up @@ -880,6 +889,10 @@ def ensure_indexes(cls):
Document collection (query, save, etc) so unless you disabled `auto_create_index`, you
shouldn't have to call this manually.
This also gets called upon every call to Document.save if `auto_create_index_on_save` is set to True
If called multiple times, MongoDB will not re-recreate indexes if they exist already
.. note:: You can disable automatic index creation by setting
`auto_create_index` to False in the documents meta data
"""
Expand Down
66 changes: 37 additions & 29 deletions tests/document/test_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,44 +983,52 @@ class Book(Document):

def test_indexes_after_database_drop(self):
"""
Test to ensure that indexes are re-created on a collection even
after the database has been dropped.
Test to ensure that indexes are not re-created on a collection
after the database has been dropped unless auto_create_index_on_save
is enabled.
Issue #812
Issue #812 and #1446.
"""
# Use a new connection and database since dropping the database could
# cause concurrent tests to fail.
connection = connect(
db="tempdatabase", alias="test_indexes_after_database_drop"
)
tmp_alias = "test_indexes_after_database_drop"
connection = connect(db="tempdatabase", alias=tmp_alias)
self.addCleanup(connection.drop_database, "tempdatabase")

class BlogPost(Document):
title = StringField()
slug = StringField(unique=True)
meta = {"db_alias": tmp_alias}

meta = {"db_alias": "test_indexes_after_database_drop"}
BlogPost.drop_collection()
BlogPost(slug="test").save()
with pytest.raises(NotUniqueError):
BlogPost(slug="test").save()

try:
BlogPost.drop_collection()

# Create Post #1
post1 = BlogPost(title="test1", slug="test")
post1.save()

# Drop the Database
connection.drop_database("tempdatabase")

# Re-create Post #1
post1 = BlogPost(title="test1", slug="test")
post1.save()

# Create Post #2
post2 = BlogPost(title="test2", slug="test")
with pytest.raises(NotUniqueError):
post2.save()
finally:
# Drop the temporary database at the end
connection.drop_database("tempdatabase")
# Drop the Database
connection.drop_database("tempdatabase")
BlogPost(slug="test").save()
# No error because the index was not recreated after dropping the database.
BlogPost(slug="test").save()

# Repeat with auto_create_index_on_save: True.
class BlogPost2(Document):
slug = StringField(unique=True)
meta = {
"db_alias": tmp_alias,
"auto_create_index_on_save": True,
}

BlogPost2.drop_collection()
BlogPost2(slug="test").save()
with pytest.raises(NotUniqueError):
BlogPost2(slug="test").save()

# Drop the Database
connection.drop_database("tempdatabase")
BlogPost2(slug="test").save()
# Error because ensure_indexes is run on every save().
with pytest.raises(NotUniqueError):
BlogPost2(slug="test").save()

def test_index_dont_send_cls_option(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _inner(*args, **kwargs):
return func(*args, **kwargs)

pretty_version = ".".join(str(n) for n in mongo_version_req)
pytest.skip(f"Needs MongoDB v{pretty_version}+")
pytest.skip(f"Needs MongoDB {oper.__name__} v{pretty_version}")

_inner.__name__ = func.__name__
_inner.__doc__ = func.__doc__
Expand Down

0 comments on commit ae09e9b

Please sign in to comment.