-
Notifications
You must be signed in to change notification settings - Fork 1
/
maintained_model.py
2075 lines (1810 loc) · 102 KB
/
maintained_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import importlib
import warnings
from collections import defaultdict
from contextlib import contextmanager
from threading import local
from typing import Dict, List
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.db import transaction
from django.db.models import Model
from django.db.models.signals import m2m_changed
class MaintainedModelCoordinator:
"""
This class loosely mimmics a "connection" in a Django "database instrumentation" design. But instead of providing a
way to apply custom wrappers to database queries, it provides a way to tell MaintainedModel how it should behave
(i.e. its running mode based on "context") during calls to save, delete, and m2m_propagation_handler. It does this
by providing a context manager.
There are 5 running modes that determine when and if autoupdates should occur:
always: (the default, meaning both lazy and immediate)
lazy: Auto-updates occuras query results are iterated over (see MaintainedModel.from_db()).
immediate: Auto-updates occur immediately upon record creation (e.g. via calls to save()).
deferred: Auto-updates are buffered upon save() and occur when the last nested context has been exited (see
MaintainedModel.deferred).
disabled: No buffering or autoupdates are performed at all (see MaintainedModel.disabled).
Attributes:
None
"""
def __init__(self, auto_update_mode=None, **kwargs):
if auto_update_mode is None:
auto_update_mode = "always"
self.auto_update_mode = auto_update_mode
if auto_update_mode == "always":
# Updates both on save and on query. (Deferred will update, but later.)
self.lazy_updates = True
self.immediate_updates = True
self.buffering = False
elif auto_update_mode == "lazy":
self.lazy_updates = True
self.immediate_updates = False
self.buffering = False
elif auto_update_mode == "immediate":
self.lazy_updates = False
self.immediate_updates = True
self.buffering = False
elif auto_update_mode == "deferred":
self.lazy_updates = False
self.immediate_updates = False
self.buffering = True
elif auto_update_mode == "disabled":
self.lazy_updates = False
self.immediate_updates = False
self.buffering = False
else:
raise ValueError(
f"Invalid auto_update_mode: [{auto_update_mode}]. Valid values are: [always, lazy, immediate, "
"deferred, and disabled]."
)
# This tracks whether the underlying modes (autoupdates and buffering) have been overridden or not, e.g. by a
# parent context. This is only used to override an immediate or lazy mode to a deferred mode. disabled cannot
# be overridden.
self.overridden = False
# These allow the user to turn on or off specific groups of auto-updates.
label_filters = kwargs.pop("label_filters", [])
self.default_label_filters = label_filters
filter_in = kwargs.pop("filter_in", True)
self.default_filter_in = filter_in
self.nondefault_filtering_exists = not (
(label_filters is None or len(label_filters) == 0) and filter_in is True
)
# This is for buffering a large quantity of auto-updates in order to get speed improvements during loading
self.update_buffer = []
def __str__(self):
return self.auto_update_mode
def _defer_override(self):
if self.auto_update_mode in ["always", "immediate", "lazy"]:
print(f"Deferring {self.auto_update_mode} coordinator")
self.auto_update_mode = "deferred"
self.overridden = True
self.lazy_updates = False
self.immediate_updates = False
self.buffering = True
else:
raise ValueError(
f"Cannot set a defer override of a [{self.auto_update_mode}] mode MaintainedModelCoordinator."
)
def _disable_override(self):
current_mode = self.get_mode()
print(f"Disabling {current_mode} coordinator.")
self.auto_update_mode = "disabled"
self.overridden = True
self.lazy_updates = False
self.immediate_updates = False
self.buffering = False
def get_mode(self):
return self.auto_update_mode
def are_immediate_updates_enabled(self):
return self.immediate_updates
def are_lazy_updates_enabled(self):
return self.lazy_updates
def are_autoupdates_enabled(self):
return self.immediate_updates or self.lazy_updates
def buffer_size(self, generation=None, label_filters=None, filter_in=None):
"""
Returns the number of buffered records that contain at least 1 decorated function matching the filter criteria
(generation and label).
"""
if label_filters is None:
label_filters = []
if filter_in is None:
filter_in = True
cnt = 0
for buffered_item in self.update_buffer:
updaters_list = buffered_item._filter_updaters(
buffered_item.get_my_updaters(),
generation=generation,
label_filters=label_filters,
filter_in=filter_in,
)
cnt += 1 if len(updaters_list) else 0
return cnt
def clear_update_buffer(self, generation=None, label_filters=None, filter_in=None):
"""
Clears buffered auto-updates. Use after having performed buffered updates to prevent unintended auto-updates.
This method is called automatically during the execution of mass autoupdates.
If a generation is provided (see the generation argument of the MaintainedModel.setter decorator), only
buffered auto-updates labeled with that generation are cleared. Note that each record can have multiple auto-
update fields and thus multiple generation values. Only the max generation (a leaf) is used in this check
because it is assumed leaves are updated first during a mass update and that an auto-update updates every
maintained field.
If label_filters is supplied, only buffered auto-updates whose "update_label" is in the label_filters are
cleared.
Note that each record can have multiple auto-update fields and thus multiple update_label values. Any label
match will mark this buffered auto-update for removal.
Note that if both generation and label_filters are supplied, only buffered auto-updates that meet both
conditions are cleared.
"""
if filter_in is None:
filter_in = True
if label_filters is None:
label_filters = (
[]
) # Clear everything by default, regardless of default filters
filter_in = True
if generation is None and (label_filters is None or len(label_filters) == 0):
self.update_buffer = []
return
new_buffer = []
gen_warns = 0
for buffered_item in self.update_buffer:
# Buffered items are entire model objects. We are going to filter model objects when they DO match the
# filtering criteria. A model object matches the filtering criteria based on whether ANY of its updaters
# (fields to be updated specified by the decorators on the methods that produce their values) match the
# filtering criteria. If the model object DOES NOT have a field that meets the label filtering criteria,
# it should remain in the buffer. For example, if there are 2 fields that are auto-updated in the buffered
# model object, and one's decorator has a "name" label and the other has an "fcirc_calc" label, and the
# supplied label_filters is ["name"] and filter_in is True, then the matching updater WILL be returned by
# this filter operation and the buffered item will be left out of the new_buffer. If a model object in the
# buffer does NOT have the "name" label in any of its updaters, it will be added to the new_buffer.
matching_updaters = buffered_item._filter_updaters(
buffered_item.get_my_updaters(),
generation=generation,
label_filters=label_filters,
filter_in=filter_in,
)
max_gen = 0
# We should issue a warning if the remaining updaters left in the buffer contain a greater generation,
# because updates and buffer clears should happen from leaf to root. And we should only check those which
# have a target label.
if generation is not None:
max_gen = buffered_item.get_max_generation(
matching_updaters, label_filters, filter_in
)
# If the buffered item didn't have any updaters that met the filtering criteria, keep it in the buffer
if len(matching_updaters) == 0:
new_buffer.append(buffered_item)
# There are no matching filters among the updaters of the buffered_item, but the max generation MUST be
# auto-updated first in order for breadth-first mass autoupdates to happen in the proper order, so if
# we're keeping a generation higher than the current filter generation being cleared, this is a problem.
if generation is not None and max_gen > generation:
gen_warns += 1
if gen_warns > 0:
label_str = ""
if label_filters is not None and len(label_filters) > 0:
label_str = f"with labels: [{', '.join(label_filters)}] "
print(
f"WARNING: {gen_warns} records {label_str}in the buffer are younger than the generation supplied: "
f"{generation}. Generations should be cleared in order from leaf (largest generation number) to root "
"(0)."
)
# populate the buffer with what's left
self.update_buffer = new_buffer
def _peek_update_buffer(self, index=0):
return self.update_buffer[index]
def buffer_update(self, mdl_obj):
"""
This is called when MaintainedModel.save (or delete) is called (if immediate_updates is False), so that
maintained fields can be updated after loading code finishes (by calling the global method:
perform_buffered_updates).
It will only buffer the model object if the filters attached to it (originally from a coordinator (possibly a
child coordinator with different labels) match any of the labels in the class's autoupdate fields, set in their
decorators.
"""
# See if this class contains a field with a matching label (if a populated label_filters array was supplied)
if (
mdl_obj.label_filters is not None
and len(mdl_obj.label_filters) > 0
and not mdl_obj.updater_list_has_matching_labels(
mdl_obj.get_my_updaters(),
mdl_obj.label_filters,
mdl_obj.filter_in,
)
):
# Do not buffer - nothing to update
return
# Do not buffer if it's already buffered. Note, this class isn't designed to support auto-updates in a
# sepecific order. All auto-update functions should use non-auto-update fields.
if self.buffering:
if mdl_obj not in self.update_buffer:
self.update_buffer.append(mdl_obj)
else:
# This allows the same object to be updated more than once (in the order encountered) if the fields to
# be auto-updated in each instance, differ. This can cause redundant updates (e.g. when a field matches
# the filters in both cases), but given the possibility that update order may depend on the update of
# related records, it's better to be on the safe side and do each auto-update, so...
# If this is the same object but a different set of fields will be updated...
# Note, Django model object equivalence (obj1 == obj2) compares primary key values
for same_obj in [bo for bo in self.update_buffer if bo == mdl_obj]:
if (
same_obj.filter_in != mdl_obj.filter_in
or same_obj.label_filters != same_obj.label_filters
):
self.update_buffer.append(mdl_obj)
break
# Added transaction.atomic, because even after catching an intentional AutoUpdateFailed in test
# DataRepo.tests.models.test_infusate.MaintainedModelImmediateTests.test_error_when_buffer_not_clear and ending the
# test successfully, the django post test teardown code was re-encountering the exception and I'm not entirely sure
# why. It probably has to do with the context manager code. The entire trace had no reference to any code in this
# repo. Adding transaction.atomic here prevents that exception from happening...
@transaction.atomic
def perform_buffered_updates(self, label_filters=None, filter_in=None):
"""
Performs a mass update of records in the buffer in a depth-first fashion without repeated updates to the same
record over and over. It goes through the buffer in the order added and triggers each record's DFS updates,
which returns the signatures of every updated record. Those updates are maintained through the traversal of
the entire buffer and checked before each update, thereby preventing repeated updates. If a record has already
been updated, the records it triggers updates to are not propagated either. The goal is to trigger the updates
in the order they were designed to follow governed by the parent/child links created in each decorator.
Note that this can fail if a record is changed and then its child (who triggers its parent) is changed (each
being added to the buffer during a mass auto-update). This however is not expected to happen, as mass auto-
update is used for loading, which if done right, doesn't change child records after parent records have been
added.
WARNING: label_filters and filter_in should only be supplied if you know what you are doing. Every model
object buffered for autoupdate saved its filtering criteria that were in effect when it was buffered and that
filtering criteria will be applied to selectively update only the fields matching the filtering criteria as
applied to each field's "update_label" in its method's decorator.
"""
# The default is to use the filters saved with the buffered model objects
# This is so that a parent deferred coordinator who receives a child coordinor's buffer, can do those updates,
# which may only be for certain fields whose update dicts have the given labels.
use_object_label_filters = True
# If filters were explicitly supplied
if label_filters is not None:
use_object_label_filters = False
if self.filter_in is None:
filter_in = self.default_filter_in
# Else - the filters will be set at each iteration of the buffered item loop below
if self.are_autoupdates_enabled():
raise StaleAutoupdateMode()
if len(self.update_buffer) == 0:
return
# Track what's been updated to prevent repeated updates triggered by multiple child updates
updated = []
new_buffer = []
no_filters = label_filters is None or len(label_filters) == 0
# For each record in the buffer
for buffer_item in self.update_buffer:
updater_dicts = buffer_item.get_my_updaters()
if use_object_label_filters:
label_filters = buffer_item.label_filters
filter_in = buffer_item.filter_in
if label_filters is None:
label_filters = self.default_label_filters
filter_in = self.default_filter_in
# Track updated records to avoid repeated updates
key = f"{buffer_item.__class__.__name__}.{buffer_item.pk}"
# Try to perform the update. It could fail if the affected record was deleted
try:
if key not in updated and (
no_filters
or buffer_item.updater_list_has_matching_labels(
updater_dicts, label_filters, filter_in
)
):
# Saving the record while mass_updates is True, causes auto-updates of every field
# included among the model's decorated functions. It does not only update the fields indicated in
# decorators that contain the labels indicated in the label_filters. The filters are only used to
# decide which records should be updated. Currently, this is not an issue because we only have 1
# update_label in use. And if/when we add another label, it will only end up causing extra
# repeated updates of the same record.
buffer_item.save(mass_updates=True)
# Propagate the changes (if necessary), keeping track of what is updated and what's not.
# Note: all the manual changes are assumed to have been made already, so auto-updates only need to
# be issued once per record
updated = buffer_item.call_dfs_related_updaters(
updated=updated, mass_updates=True
)
elif key not in updated and buffer_item not in new_buffer:
new_buffer.append(buffer_item)
except Exception as e:
# Any exception can be raised from the derived model's decorated updater function
raise AutoUpdateFailed(buffer_item, e, updater_dicts)
# Eliminate the updated items from the buffer
self.update_buffer = new_buffer
class MaintainedModel(Model):
"""
This class maintains database field values for a django.models.Model class whose values can be derived using a
function. If a record changes, the decorated function/class is used to update the field value. It can also
propagate changes of records in linked models. Every function in the derived class decorated with the
`@MaintainedModel.setter` decorator (defined above, outside this class) will be called and the associated field
will be updated. Only methods that take no arguments are supported. This class overrides the class's save and
delete methods and uses m2m_changed signals as triggers for the updates.
"""
# Thread-safe mutable class attributes. Thread data is initialized via _check_set_coordinator_thread_data
data = local()
# Track whether the fields from the decorators have been validated
# This is only ever initialized once, the first time every derived class is ever instantiated, so it's loosely an
# immutable class attribute (though technically mutable), as it would only ever change if new models are added and
# is only used to decide when a derived class's usage of MaintainedModel is invalid
maintained_model_initialized: Dict[str, bool] = {}
# Track the metadata recorded by each derived model class's setter and relation decorators
# Similar to maintained_model_initialized, this is loosely an immutable class attribute (though technically
# mutable), as it is only ever set when the setter and relation decorators are created, which only ever happens once
# This does not need to be thread-safe because it will not ever change after all the decorators have been registered
updater_list: Dict[str, List] = defaultdict(list)
# A dict of class name keys and class values used by get_classes needed for rebuild_maintained_fields. This is
# initialized via MaintainedModel's decorators as a way to avoid needing the module path of all the models as was
# formerly done.
model_classes: Dict[str, Model] = defaultdict(Model)
# A dict saving the package (referenced elsewhere as "model_path") that holds each model so that it's class can be
# retrieved when needed.
model_packages: Dict[str, str] = defaultdict(str)
def __init__(self, *args, **kwargs):
"""
This over-ride of the constructor is to prevent developers from explicitly setting values for automatically
maintained fields. It also performs a one-time validation check of the updater_dicts.
"""
self._maintained_model_setup(**kwargs)
super().__init__(*args, **kwargs)
def _maintained_model_setup(self, **kwargs):
"""
This method exists because if a developer calls Model.objects.create(), __init__ is not called. This method is
called both from __init__() and save().
"""
# Make sure the class has been fully initialized
self._check_set_coordinator_thread_data()
# The coordinator keeps track of the running mode, buffer and filters in use
coordinator = self.get_coordinator()
# Members added by MaintainedModel - the coordinator values are set when the coordinator is instantiated. They
# are recorded in the object so that during perform_buffered_updates, we will know what field(s) to update when
# it processes the object. An update would not have been buffered if the model did not contain a maintained
# field matching the label filtering. And label filtering can change during the buffering process (e.g.
# different loaders), which is why this is necessary.
self.label_filters = coordinator.default_label_filters
self.filter_in = coordinator.default_filter_in
class_name = self.__class__.__name__
# Register the class with the coordinator if not already registered
if class_name not in MaintainedModel.model_classes.keys():
print(
f"Registering class {class_name} as a MaintainedModel from _maintained_model_setup: {type(self)}"
)
MaintainedModel.model_classes[class_name] = type(self)
MaintainedModel.model_packages[class_name] = type(self).__module__
for updater_dict in MaintainedModel.updater_list[class_name]:
# Ensure the field being set is not a maintained field
update_fld = updater_dict["update_field"]
if update_fld and update_fld in kwargs:
raise MaintainedFieldNotSettable(
class_name, update_fld, updater_dict["update_function"]
)
# Validate the field values in the updater_list
# First, create a signature to use to make sure we only check once
# The creation of a decorator signature allows multiple decorators to be added to 1 class (or function) and
# only have each one's updater info validated once.
decorator_signature = ".".join(
[
str(x)
for x in [
updater_dict["update_label"],
updater_dict["update_function"],
updater_dict["update_field"],
str(updater_dict["generation"]),
updater_dict["parent_field"],
",".join(updater_dict["child_fields"]),
]
]
)
if decorator_signature not in MaintainedModel.maintained_model_initialized:
if settings.DEBUG:
print(
f"Validating {self.__class__.__name__} updater: {updater_dict}"
)
MaintainedModel.maintained_model_initialized[decorator_signature] = True
# Now we can validate the fields
flds = {}
if updater_dict["update_field"]:
flds[updater_dict["update_field"]] = "update field"
if updater_dict["parent_field"]:
flds[updater_dict["parent_field"]] = "parent field"
for cfld in updater_dict["child_fields"]:
flds[cfld] = "child field"
bad_fields = []
for field in flds.keys():
try:
getattr(self.__class__, field)
except AttributeError:
bad_fields.append({"field": field, "type": flds[field]})
if len(bad_fields) > 0:
raise BadModelFields(
self.__class__.__name__,
bad_fields,
updater_dict["update_function"],
)
try:
# Connect the m2m_propagation_handler to any m2m field change events
for m2m_field in self.__class__._meta.many_to_many:
m2m_field_ref = getattr(self.__class__, m2m_field.name)
through_model = getattr(m2m_field_ref, "through")
if settings.DEBUG:
print(
f"Adding propagation handler to {self.__class__.__name__}.{m2m_field.name}.through"
)
m2m_changed.connect(
self.m2m_propagation_handler,
sender=through_model,
)
# m2m_changed.connect(toppings_changed, sender=Pizza.toppings.through)
except AttributeError as ae:
if "has no attribute 'many_to_many'" not in str(ae):
raise ae
# Else - no propagation handler needed
def save(self, *args, **kwargs):
"""
This is an override of the derived model's save method that is being used here to automatically update
maintained fields.
"""
# The following custom arguments are used internally. Do not supply unless you know what you're doing.
# mass_updates: Whether auto-updating buffered model objects - default False
mass_updates = kwargs.pop("mass_updates", False)
# propagate: Whether to propagate updates to related model objects - default True
propagate = kwargs.pop("propagate", not mass_updates)
# fields_to_autoupdate: List of fields to auto-update. - default None = update all maintained fields
fields_to_autoupdate = kwargs.pop("fields_to_autoupdate", None)
# via_query: Whether this is coming from the from_db method or not (implying no record change) - default False
via_query = kwargs.pop("via_query", False)
# If the object is None, then what has happened is, there was a call to create an object off of the class. That
# means that __init__ was not called, so we are going to handle the initialization of MaintainedModel (including
# the setting of the coordinator and the disallowing of setting values for maintained fields with a call to
# _maintained_model_setup).
if self is None:
# The coordinator keeps track of the running mode, buffer and filters in use
self._maintained_model_setup(**kwargs)
# Retrieve the current coordinator
coordinator = self.get_coordinator()
# Record whether/when we have made the super-save call, so that we don't do it twice when the developer's code
# is calling save just to trigger an auto-update.
# Note, super_save_called will already have been initialized if the object was saved and buffered and mass auto-
# update is being performed. Save on an object can be called a second time from the developer's code
# (presumably after having made subsequent changes), so to support that we must reset to False, but we don't
# want to do it during a mass auto-update.
if not hasattr(self, "super_save_called") or not mass_updates:
self.super_save_called = False
# If auto-updates are turned on, a cascade of updates to linked models will occur, but if they are turned off,
# the update will be buffered, to be manually triggered later (e.g. upon completion of loading), which
# mitigates repeated updates to the same record
if not coordinator.are_autoupdates_enabled():
# If autoupdates are happening (and it's not a mass-autoupdate (assumed because mass_updates
# can only be true if immediate_updates is False)), set the label filters based on the currently set global
# conditions so that only fields matching the filters will be updated.
self.label_filters = coordinator.default_label_filters
self.filter_in = coordinator.default_filter_in
if not mass_updates:
# Set the changed value triggering this update
super().save(*args, **kwargs)
self.super_save_called = True
# The global label filters applied above will be remembered during mass autoupdate of the buffer
coordinator.buffer_update(self)
return
# Otherwise, we are performing a mass auto-update and want to update using the previously set filter conditions
# Calling super.save (if necessary) so that the call to update_decorated_fields can traverse reverse
# relations without a ValueError exception that is a new behavior/constraint as of Django 4.2. But, if we got
# here due to a call to Model.objects.create(), (which calls super.save() from deep in the Django code (and that
# sets the primary key)), so it means that if we were to call it here again, it would cause a unique constraint-
# related exception. That's why we do not call it here if the primary key is set - so we can avoid those
# exceptions. Note, self.pk is None if the record was deleted after being buffered and we encounter it during
# mass update.
if self.pk is None and mass_updates is False:
super().save(*args, **kwargs)
self.super_save_called = True
# Note, we should not save during mass autoupdate because while the object was in the buffer, it could have been
# deleted from the database and saving it would unintentionally re-add it to the database.
if (
# If this was triggered from a query and lazy mode is off
(via_query and not coordinator.are_lazy_updates_enabled())
# If this was not triggered from a query and lazy mode is on (and immediate mode is off)
or (
not via_query
and coordinator.are_lazy_updates_enabled()
and not coordinator.are_immediate_updates_enabled()
)
):
# Remove the super_save_called if it was added (see the delattr comment near the bottom)
if hasattr(self, "super_save_called"):
delattr(self, "super_save_called")
# We don't want to perform autoupdates when lazy updates is False and via_query is True, so return
return
# Update the fields that change due to the the triggering change (if any)
# This only executes either when immediate_updates or mass_updates is true - both cannot be true
changed = self.update_decorated_fields(fields_to_autoupdate)
# This either saves both explicit changes and auto-update changes (when immediate_updates is true) or it only
# saves the auto-updated values (when mass_updates is true)
if changed is True or self.super_save_called is False:
if self.super_save_called or mass_updates is True:
if mass_updates is True:
# Intentionally trigger an exception if the buffer is stale (i.e. if the record was deleted)
self.exists_in_db(raise_exception=True)
# This is a subsequent call to save due to the auto-update, so we don't want to use the original
# arguments (which may direct save that it needs to do an insert). If you do supply arguments in this
# case, you can end up with an IntegrityError due to unique constraints from the ID being the same.
super().save()
else:
super().save(*args, **kwargs)
# If the developer wants to make more changes to this object and call save again, we need to remove the
# super_save_called attribute. This will happen when autoupdate mode is immediate or if deferred (but when
# deferred, only)
delattr(self, "super_save_called")
# We don't need to check mass_updates, because propagating changes during buffered updates is handled elsewhere
# to mitigate repeated updates of the same related record.
# Only propagate in immediate mode, not lazy. In lazy updates, no data in the record is changing other than
# maintained fields, and updates only need to propagate if not-maintained fields have changed, because
# propagation is intended to only trigger when other values depend on the values in the triggering record. And
# since updater methods SHOULD NOT rely on maintained fields, there is no change in a query that should affect
# other maintained fields.
if coordinator.are_immediate_updates_enabled() and propagate:
# Percolate (non-maintained field) record changes up to the related models so they can change their
# maintained fields whose values are dependent on this record's non-maintained fields
self.call_dfs_related_updaters()
def delete(self, *args, **kwargs):
"""
This is an override of the derived model's delete method that is being used here to automatically update
maintained fields.
"""
# Custom argument: propagate - Whether to propagate updates to related model objects - default True
# Used internally. Do not supply unless you know what you're doing.
propagate = kwargs.pop("propagate", True)
# Custom argument: mass_updates - Whether auto-updating biffered model objects - default False
# Used internally. Do not supply unless you know what you're doing.
mass_updates = kwargs.pop("mass_updates", False)
self_sig = self.get_record_signature()
# Delete the record triggering this update
super().delete(*args, **kwargs) # Call the "real" delete() method.
# Retrieve the current coordinator
coordinator = self.get_coordinator()
# If auto-updates are turned on, a cascade of updates to linked models will occur, but if they are turned off,
# the update will be buffered, to be manually triggered later (e.g. upon completion of loading), which
# mitigates repeated updates to the same record
# mass_updates is checked for consistency, but perform_buffered_updates does not call delete()
if (
coordinator.are_immediate_updates_enabled() is False
and mass_updates is False
):
# When buffering only, apply the global label filters, to be remembered during mass autoupdate
self.label_filters = coordinator.default_label_filters
self.filter_in = coordinator.default_filter_in
if coordinator.buffering:
parents = self.get_parent_instances()
for parent_inst in parents:
coordinator.buffer_update(parent_inst)
return
elif coordinator.are_immediate_updates_enabled():
# If autoupdates are happening (and it's not a mass-autoupdate (assumed because mass_updates
# can only be true if immediate_updates is False)), set the label filters based on the currently set global
# conditions so that only fields matching the filters will be updated.
self.label_filters = coordinator.default_label_filters
self.filter_in = coordinator.default_filter_in
# Otherwise, we are performing a mass auto-update and want to update the previously set filter conditions
if coordinator.are_immediate_updates_enabled() and propagate:
# Percolate changes up to the parents (if any) and mark the deleted record as updated
self.call_dfs_related_updaters(updated=[self_sig])
@classmethod
def from_db(cls, *args, **kwargs):
"""
This is an override of Model.from_db. Model.from_db takes arguments: db, field_names, values. It is used to
convert SQL query results into Model objects. This over-ride uses this opportunity to perform lazy-auto-updates
of Model fields. Note, it will not change the query results in terms of records. I.e. if the maintained field
value is stale (e.g. it should be "x", but instead is null, and the query is "WHERE field = 'x'", the record
will NOT be updated because it would not have been returned by the SQL query. This lazy-auto-update will occur
when a QuerySet is iterated over. That's when `from_db` is called - at each iteration.
This method checks the field_names for the presence of maintained fields, and if the corresponding value is
None, it will trigger an auto-update and set the new value for the superclass method's model object creation.
Note, one down-side of this implementation of lazy auto-updates is that if the auto-update results in the value
being null/None, the code to auto-update the field will always execute (a waste of effort).
This will not lazy-update DEFERRED field values.
"""
# Instantiate the model object
rec = super().from_db(*args, **kwargs)
# If autoupdates are not enabled (i.e. we're not in "lazy" mode)
if not cls.get_coordinator().are_lazy_updates_enabled():
return rec
# Get the field names
queryset_fields = set(args[1]) # field_names
# Intersect the queryset fields with the maintained fields
common_fields = set(cls.get_my_update_fields()).intersection(queryset_fields)
# Look for maintained field values that are None
lazy_update_fields = [fld for fld in common_fields if getattr(rec, fld) is None]
# If any maintained fields are to be lazy-updated
if len(lazy_update_fields) > 0:
cs = ", "
print(
f"Triggering lazy auto-update of fields: {cls.__name__}.{{{cs.join(lazy_update_fields)}}}"
)
# Trigger an auto-update
rec.save(fields_to_autoupdate=lazy_update_fields, via_query=True)
return rec
@staticmethod
def relation(
generation, parent_field_name=None, child_field_names=[], update_label=None
):
"""
Use this decorator to add connections between classes when it does not have any maintained fields. For example,
if you only want to maintain 1 field in 1 class, but you want changes in a related class to trigger updates to
that field, apply this decorator to the class and set either the parent_field_name and/or the child_field_names
to trigger those updates of the maintained fields in that related model class.
Refer to the doc string of the MaintainedModel.setter decorator below for a description of the parameters.
Example:
class ModelA(MaintainedModel):
...
@relation(
generation=1,
parent_field_name="modela",
child_field_names=["modelcs", "modelds"],
update_label="values",
)
class ModelB(MaintainedModel):
modela=ForeignKey(...)
class ModelC(MaintainedModel):
modelb = ForeignKey(... related_name="modelcs")
class ModelD(MaintainedModel):
modelb = ForeignKey(... related_name="modelds")
The class decorator in the above example links ModelB to Models A, C, and D. So if a ModelB object changes, it
will trigger auto-updated to maintained fields (not shown) in its child model records (first) and it's parent
modelA records. Likewise, it will pass on triggered updates from those classes if they are set to pass on
changes to modelB though the parent/chold fields in their decorators.
"""
# Validate args
if generation != 0:
# Make sure internal nodes have parent fields
if parent_field_name is None:
raise ConditionallyRequiredArgumentError(
"parent_field is required if generation is not 0."
)
elif generation == 0 and parent_field_name is not None:
raise ValueError("parent_field must not have a value when generation is 0.")
if parent_field_name is None and len(child_field_names) == 0:
raise ValueError(
"One or both of parent_field_name or child_field_names is required."
)
def decorator(cls):
func_dict = {
"update_function": None,
"update_field": None,
"parent_field": parent_field_name,
"child_fields": child_field_names,
"update_label": update_label, # Used as a filter to trigger specific series' of (mass) updates
"generation": generation, # Used to update from leaf to root for mass updates
}
class_name = cls.__name__
# Register the class (and the module) with the coordinator if not already registered
if class_name not in MaintainedModel.model_classes.keys():
print(
f"Registering class {class_name} as a MaintainedModel from the relation decorator: {cls}"
)
MaintainedModel.model_classes[class_name] = cls
MaintainedModel.model_packages[class_name] = cls.__module__
# Register the updater with the coordinator
MaintainedModel.updater_list[class_name].append(func_dict)
# No way to ensure supplied fields exist because the models aren't actually loaded yet, so while that would
# be nice to handle here, it will have to be handled in MaintanedModel when objects are created
# Provide some debug feedback
if settings.DEBUG:
msg = f"Added MaintainedModel.relation decorator {class_name} to update"
if update_label is not None:
msg += f" '{update_label}'-related"
if parent_field_name is not None:
msg += f" parent: {class_name}.{parent_field_name}"
if len(child_field_names) > 0:
msg += " and"
if len(child_field_names) > 0:
msg += f"children: [{', '.join([class_name + '.' + c for c in child_field_names])}]"
print(f"{msg}.")
return cls
return decorator
@staticmethod
def setter(
generation,
update_field_name=None,
parent_field_name=None,
update_label=None,
child_field_names=[],
):
"""
This is a decorator factory for functions in a Model class that are identified to be used to update a supplied
field and field of any linked parent/child record (for when the record is changed). This function returns a
decorator that takes the decorated function. That function should not use the value of another maintained field
in its calculation because the order of update is not guaranteed to occur in a favorable series. It should
return a value compatible with the field type supplied.
These decorated functions are identified by the MaintainedModel class, whose save and delete methods override
the parent model and call the decorated functions to update field supplied to the factory function. It also
propagates the updates to the linked dependent model's save methods (if the parent and/or child field name is
supplied), the assumption being that a change to "this" record's maintained field necessitates a change to
another maintained field in the linked parent record. Parent and child field names should only be supplied if a
change to "this" record means that related foields in parent/child records will need to be recomputed. There is
no need to supply parent/child field names if that is not the case.
The generation input is an integer indicating the hierarchy level. E.g. if there is no parent, `generation`
should be 0. Each subsequence generation should increment generation. It is used to populate update_buffer
when immediate_updates is False, so that mass updates can be triggered after all data is loaded.
Note that a class can have multiple fields to update and that those updates (according to their decorators) can
trigger subsequent updates in different "parent"/"child" records. If multiple update fields trigger updates to
different parents, they are triggered in a depth-first fashion. Child records are updated first, then parents.
If a child links back to a parent, already-updated records prevent repeated/looped updates. However, this only
becomes relevant when the global variable `immediate_updates` is False, mass database changes are made
(buffering the auto-updates), and then auto-updates are explicitly triggered.
Note, if there are many decorated methods updating different fields, and all of the "parent"/"child" fields are
the same, only 1 of those decorators needs to set a parent field.
"""
if update_field_name is None and (
parent_field_name is None and generation != 0
):
raise ConditionallyRequiredArgumentError(
"Either an update_field_name or parent_field_name argument is required."
)
# The actual decorator (because a decorator can only take 1 argument (the decorated function). The "decorator"
# above is more akin to a global function call that returns this decorator that is immediately applied to the
# decorated function.
def decorator(fn):
# Get the name of the class the function belongs to
class_name = fn.__qualname__.split(".")[0]
if parent_field_name is None and generation != 0:
raise InvalidRootGeneration(
class_name, update_field_name, fn.__name__, generation
)
func_dict = {
"update_function": fn.__name__,
"update_field": update_field_name,
"parent_field": parent_field_name,
"child_fields": child_field_names,
"update_label": update_label, # Used as a filter to trigger specific series' of (mass) updates
"generation": generation, # Used to update from leaf to root for mass updates
}
# Try to register the model class. If this fails, fallback methods will be used when it is needed later.
if class_name not in MaintainedModel.model_packages.keys():
models_path = (
MaintainedModel.get_model_package_name_from_member_function(fn)
)
# Register the class with the coordinator if not already registered
if models_path is not None:
MaintainedModel.model_packages[class_name] = models_path
# No way to ensure supplied fields exist because the models aren't actually loaded yet, so while that would
# be nice to handle here, it will have to be handled in MaintanedModel when objects are created
# Add this info to our global updater_list
MaintainedModel.updater_list[class_name].append(func_dict)
# It would be nice if we could register the class here, but getting the surrounding class from the function
# is tricky and fragile. The class will be registered when its first instance is created.
# Provide some debug feedback
if settings.DEBUG:
msg = f"Added MaintainedModel.setter decorator to function {fn.__qualname__} to"
if update_field_name is not None:
msg += f" maintain {class_name}.{update_field_name}"
if parent_field_name is not None or len(child_field_names) > 0:
msg += " and"
if parent_field_name is not None:
msg += (
f" trigger updates to parent: {class_name}."
f"{parent_field_name}"
)
if parent_field_name is not None and len(child_field_names) > 0:
msg += " and "
if child_field_names is not None and len(child_field_names) > 0:
msg += (
f" trigger updates to children: "
f"{', '.join([class_name + '.' + c for c in child_field_names])}"
)
print(f"{msg}.")
return fn
# This returns the actual decorator function which will immediately run on the decorated function
return decorator
def exists_in_db(self, raise_exception=False):
"""
Asserts that the object hasn't been deleted from the database while it was in the buffer. This can
intentionally raise a DoesNotExist exception (e.g. during perform_buffered_updates) so that we don't end up re-
saving a deleted object.
https://stackoverflow.com/a/16613258/2057516
"""
try:
type(self).objects.get(pk__exact=self.pk)
except Exception as e:
if raise_exception is True:
raise e
if issubclass(type(e), ObjectDoesNotExist):
return False
raise e
return True
@staticmethod
def get_model_package_name_from_member_function(fn):
"""
This will TRY to obtain the package name (aka "models_path") from the supplied model's member function. It
does so by using the function's class attribute '__globals__' dict to access its __package__ (a string
showing the path to where the model's class is defined). If it fails in any particular version of Django,
the fallback is to simply require the package name wherever it is needed.
"""
try:
models_path = fn.__globals__["__package__"]
return models_path
except Exception as e:
print(
"WARNING: MaintainedModel was unable to retrieve the model class's package from its member function: "
f"{fn.__qualname__}. MaintainedModel (or MaintainedModelCoordinator) functions which require the "
"model class type before an instance of the model class has been created, will need to be supplied a "
f"models_path, e.g. 'AppName.models'. The error encoutnered was: {e.__class__.__name__}: {e}"
)
return None
@classmethod
def m2m_propagation_handler(cls, **kwargs):
"""
Additions to M:M related models do not require a .save() to be called afterwards, thus additions like:
peakgroup.compounds.add(cmpd)
do not propagate a change to MSRunSample as is necessary for automatic field maintenance, expressly because
peakgroup.save() is not called. To deal with this, and trigger the necessary automatic updates of maintained
fields, an m2m_changed signal is attached to all M:M fields in MaintainedModel.__init__ to tell us when a
MaintainedModel has an M:M field that has been added to. That causes this method to be called, and from here
we can propagate the changes.
"""
obj = kwargs.pop("instance", None)
act = kwargs.pop("action", None)
# Retrieve the current coordinator
coordinator = cls.get_coordinator()
if (
act.startswith("post_")
and isinstance(obj, MaintainedModel)
and coordinator.are_immediate_updates_enabled()
):
obj.call_dfs_related_updaters()
@classmethod
def get_coordinator(cls):
return cls._get_current_coordinator()
@classmethod
def _get_current_coordinator(cls):
coordinator_stack = cls._get_coordinator_stack()
if len(coordinator_stack) > 0:
# Get the current coordinator
current_coordinator = coordinator_stack[-1]
# Call the last coordinator on the stack
return current_coordinator
else:
return cls._get_default_coordinator()
@classmethod
def _get_default_coordinator(cls):