Skip to content

Commit

Permalink
SQL event.timestamp is not always unique (#13019)
Browse files Browse the repository at this point in the history
* return ID instead of timestamp

* order by both timestamp and ID

* order events by ID

* add timestamp ordering to events

* fix linting error

* change event timestamps in test

* add context in docstring and comment

* add test to tracker store

* undo changes to assertion

* add changelog
  • Loading branch information
vcidst committed Mar 4, 2024
1 parent a0a9571 commit 0df5212
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 5 deletions.
1 change: 1 addition & 0 deletions changelog/13019.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Changed the ordering of returned events to order by ID (previously timestamp) in SQL Tracker Store
8 changes: 7 additions & 1 deletion rasa/core/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def _validate_all_requested_ids_exist(
self, conversation_ids_in_tracker_store: Set[Text]
) -> None:
"""Warn user if `self.requested_conversation_ids` contains IDs not found in
`conversation_ids_in_tracker_store`
`conversation_ids_in_tracker_store`.
Args:
conversation_ids_in_tracker_store: Set of conversation IDs contained in
Expand Down Expand Up @@ -241,6 +241,12 @@ async def _fetch_events_within_time_range(self) -> AsyncIterator[Dict[Text, Any]
continue

events = self._get_events_for_conversation_id(_events, conversation_id)

# the order of events was changed after ATO-2192
# more context: https://github.com/RasaHQ/rasa/pull/13019
# we should sort the events by timestamp to keep the order
events.sort(key=lambda x: x["timestamp"])

# the conversation IDs are needed in the event publishing
for event in events:
if (
Expand Down
5 changes: 4 additions & 1 deletion rasa/core/tracker_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,9 @@ def _event_query(
self, session: "Session", sender_id: Text, fetch_events_from_all_sessions: bool
) -> "Query":
"""Provide the query to retrieve the conversation events for a specific sender.
The events are ordered by ID to ensure correct sequence of events.
As `timestamp` is not guaranteed to be unique and low-precision (float), it
cannot be used to order the events.
Args:
session: Current database session.
Expand Down Expand Up @@ -1325,7 +1328,7 @@ def _event_query(
)
)

return event_query.order_by(self.SQLEvent.timestamp)
return event_query.order_by(self.SQLEvent.id)

async def save(self, tracker: DialogueStateTracker) -> None:
"""Update database with events from the current conversation."""
Expand Down
6 changes: 3 additions & 3 deletions tests/core/test_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ async def test_fetch_events_within_time_range():
conversation_ids = ["some-id", "another-id"]

# prepare events from different senders and different timestamps
event_1 = random_user_uttered_event(3)
event_2 = random_user_uttered_event(2)
event_3 = random_user_uttered_event(1)
event_1 = random_user_uttered_event(1)
event_2 = random_user_uttered_event(3)
event_3 = random_user_uttered_event(2)
events = {conversation_ids[0]: [event_1, event_2], conversation_ids[1]: [event_3]}

def _get_tracker(conversation_id: Text) -> DialogueStateTracker:
Expand Down
30 changes: 30 additions & 0 deletions tests/core/test_tracker_stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,36 @@ async def test_sql_additional_events_with_session_start(domain: Domain):
assert isinstance(additional_events[0], UserUttered)


async def test_tracker_store_retrieve_ordered_by_id(
domain: Domain,
):
tracker_store_kwargs = {"host": "sqlite:///"}
tracker_store = SQLTrackerStore(domain, **tracker_store_kwargs)
events = [
SessionStarted(timestamp=1),
UserUttered("Hola", {"name": "greet"}, timestamp=2),
BotUttered("Hi", timestamp=2),
UserUttered("How are you?", {"name": "greet"}, timestamp=2),
BotUttered("I am good, whats up", timestamp=2),
UserUttered("Ciao", {"name": "greet"}, timestamp=2),
BotUttered("Bye", timestamp=2),
]
sender_id = "test_sql_tracker_store_events_order"
tracker = DialogueStateTracker.from_events(sender_id, events)
await tracker_store.save(tracker)

# Save other tracker to ensure that we don't run into problems with other senders
other_tracker = DialogueStateTracker.from_events("other-sender", [SessionStarted()])
await tracker_store.save(other_tracker)

# Retrieve tracker with events since latest SessionStarted
tracker = await tracker_store.retrieve(sender_id)

assert len(tracker.events) == 7
# assert the order of events is same as the order in which they were added
assert all((event == tracker.events[i] for i, event in enumerate(events)))


@pytest.mark.parametrize(
"tracker_store_type,tracker_store_kwargs",
[(MockedMongoTrackerStore, {}), (SQLTrackerStore, {"host": "sqlite:///"})],
Expand Down

0 comments on commit 0df5212

Please sign in to comment.