Skip to content

Commit

Permalink
Fix massive DSO-API performance issue, due to deepcopy()
Browse files Browse the repository at this point in the history
DRF performs a deepcopy() of the serializer._declared_fields before it
will handle the request. This is analogous to how Django forms work, and
allows updating the fields with request state and 'parent' references.

Since the deepcopy() also touched dataset definitions of Amsterdam Schema,
it basically took 14sec to copy all those dicts as well. This can be
avoided, bringing to performance of DSO-API back to reasonable levels.
  • Loading branch information
vdboor committed Jun 17, 2024
1 parent 9e246b8 commit f078098
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
4 changes: 1 addition & 3 deletions src/dso_api/dynamic_api/serializers/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,11 +534,9 @@ def _build_serializer_embedded_field(

embedded_field = EmbeddedFieldClass(
serializer_class=cast(type[base.DynamicSerializer], serializer_class),
# serializer_class=serializer_class,
field_schema=field_schema,
source=model_field.name,
)
# Attach the field schema so access rules can be applied here.
embedded_field.field_schema = field_schema

# The field name is still generated from the model_field, in case this is a reverse field.
serializer_part.add_embedded_field(toCamelCase(model_field.name), embedded_field)
Expand Down
10 changes: 10 additions & 0 deletions src/dso_api/dynamic_api/serializers/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,18 @@ def __init__(self, table_schema: DatasetTableSchema, *args, **kwargs):
# Init adds temporal definitions at construction, removing runtime model lookups.
# It also allows the PK optimization to be used.
super().__init__(*args, **kwargs)

# Link the table to perform temporal query filtering
self.table_schema = table_schema

def __deepcopy__(self, memo):
# Fix a massive performance hit when DRF performs a deepcopy() of all fields
result = self.__class__.__new__(self.__class__)
memo[id(result)] = self
result.__dict__.update({k: v for k, v in self.__dict__.items() if k != "table_schema"})
result.table_schema = self.table_schema
return result

def use_pk_only_optimization(self):
return True # only need to have an "id" here.

Expand Down
11 changes: 11 additions & 0 deletions src/rest_framework_dso/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from rest_framework.exceptions import ValidationError
from rest_framework_gis.fields import GeometryField
from schematools.contrib.django.models import LooseRelationField
from schematools.types import DatasetFieldSchema

from rest_framework_dso.utils import group_dotted_names, unlazy_object

Expand Down Expand Up @@ -289,14 +290,24 @@ def __init__(
self,
serializer_class: type[serializers.Serializer],
*,
field_schema: DatasetFieldSchema | None = None,
source=None,
):
self.serializer_class = serializer_class
self.source = source

self.field_name = None
self.field_schema = field_schema
self.parent_serializer_class = None

def __deepcopy__(self, memo):
# Fix a massive performance hit when DRF performs a deepcopy() of all fields
result = self.__class__.__new__(self.__class__)
memo[id(result)] = self
result.__dict__.update({k: v for k, v in self.__dict__.items() if k != "field_schema"})
result.field_schema = self.field_schema
return result

def __repr__(self):
try:
parent_serializer = self.parent_serializer_class.__name__
Expand Down

1 comment on commit f078098

@vdboor
Copy link
Contributor Author

@vdboor vdboor commented on f078098 Jun 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lokaal getest (Django zonder debug):

bag panden lijst: 4429ms -> 145ms
bag nummeraanduiding: 4850ms -> 59ms
bag nummeraanduiding met expand: 11276ms -> 163ms

Please sign in to comment.