New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor analytics queries #1280
Merged
Merged
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
d2b0b38
Refactor analytics queries
timgl a0539e0
Fix timedelta issue
timgl 77ef0ab
Fix tests
timgl 0507bab
Improve test
timgl ce06eab
Fix stickiness action
timgl e3b1de9
Add default shape
timgl 5c32d76
Refactor tests
timgl 116e297
Add types to filter func
timgl b3cda72
Add comments to functions
timgl 94705fc
Merge branch 'master' into refactor-queries
timgl 792dfa5
Fix tests
timgl 635af3b
remove unused import
timgl 7a60cd3
remove more unused imports
timgl cc29a30
Fix retention issues
timgl File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import copy | ||
from typing import Any, Callable, Dict, List, Optional | ||
|
||
from dateutil.relativedelta import relativedelta | ||
from django.db.models import Q, QuerySet | ||
|
||
from posthog.constants import TREND_FILTER_TYPE_ACTIONS, TREND_FILTER_TYPE_EVENTS, TRENDS_CUMULATIVE, TRENDS_STICKINESS | ||
from posthog.models import Entity, Event, Filter, Team | ||
from posthog.utils import get_compare_period_dates | ||
|
||
""" | ||
process_entity_for_events takes in an Entity and team_id, and returns an Event QuerySet that's correctly filtered | ||
""" | ||
|
||
|
||
def process_entity_for_events(entity: Entity, team_id: int, order_by="-id") -> QuerySet: | ||
if entity.type == TREND_FILTER_TYPE_ACTIONS: | ||
events = Event.objects.filter(action__pk=entity.id).add_person_id(team_id) | ||
if order_by: | ||
events = events.order_by(order_by) | ||
return events | ||
elif entity.type == TREND_FILTER_TYPE_EVENTS: | ||
return Event.objects.filter_by_event_with_people(event=entity.id, team_id=team_id, order_by=order_by) | ||
return QuerySet() | ||
|
||
|
||
def _determine_compared_filter(filter: Filter) -> Filter: | ||
if not filter.date_to or not filter.date_from: | ||
raise ValueError("You need date_from and date_to to compare") | ||
date_from, date_to = get_compare_period_dates(filter.date_from, filter.date_to) | ||
compared_filter = copy.deepcopy(filter) | ||
compared_filter._date_from = date_from.date().isoformat() | ||
compared_filter._date_to = date_to.date().isoformat() | ||
return compared_filter | ||
|
||
|
||
def _convert_to_comparison(trend_entity: List[Dict[str, Any]], filter: Filter, label: str) -> List[Dict[str, Any]]: | ||
for entity in trend_entity: | ||
days = [i for i in range(len(entity["days"]))] | ||
labels = [ | ||
"{} {}".format(filter.interval if filter.interval is not None else "day", i) | ||
for i in range(len(entity["labels"])) | ||
] | ||
entity.update( | ||
{"labels": labels, "days": days, "label": label, "dates": entity["days"], "compare": True,} | ||
) | ||
return trend_entity | ||
|
||
|
||
""" | ||
handle_compare takes an Entity, Filter and a callable. | ||
It'll automatically create a new entity with the 'current' and 'previous' labels and automatically pick the right date_from and date_to filters . | ||
It will then call func(entity, filter, team_id). | ||
""" | ||
|
||
|
||
def handle_compare(entity: Entity, filter: Filter, func: Callable, team_id: int) -> List: | ||
entities_list = [] | ||
trend_entity = func(entity=entity, filter=filter, team_id=team_id) | ||
if filter.compare: | ||
trend_entity = _convert_to_comparison(trend_entity, filter, "{} - {}".format(entity.name, "current")) | ||
entities_list.extend(trend_entity) | ||
|
||
compared_filter = _determine_compared_filter(filter) | ||
compared_trend_entity = func(entity=entity, filter=compared_filter, team_id=team_id) | ||
|
||
compared_trend_entity = _convert_to_comparison( | ||
compared_trend_entity, compared_filter, "{} - {}".format(entity.name, "previous"), | ||
) | ||
entities_list.extend(compared_trend_entity) | ||
else: | ||
entities_list.extend(trend_entity) | ||
return entities_list | ||
|
||
|
||
""" | ||
filter_events takes team_id, filter, entity and generates a Q objects that you can use to filter a QuerySet | ||
""" | ||
|
||
|
||
def filter_events(team_id: int, filter: Filter, entity: Optional[Entity] = None) -> Q: | ||
filters = Q() | ||
if filter.date_from: | ||
filters &= Q(timestamp__gte=filter.date_from) | ||
if filter.date_to: | ||
relativity = relativedelta(days=1) | ||
if filter.interval == "hour": | ||
relativity = relativedelta(hours=1) | ||
elif filter.interval == "minute": | ||
relativity = relativedelta(minutes=1) | ||
elif filter.interval == "week": | ||
relativity = relativedelta(weeks=1) | ||
elif filter.interval == "month": | ||
relativity = relativedelta(months=1) - relativity # go to last day of month instead of first of next | ||
filters &= Q(timestamp__lte=filter.date_to + relativity) | ||
if filter.properties: | ||
filters &= filter.properties_to_Q(team_id=team_id) | ||
if entity and entity.properties: | ||
filters &= entity.properties_to_Q(team_id=team_id) | ||
return filters | ||
|
||
|
||
class BaseQuery: | ||
""" | ||
Run needs to be implemented in the individual Query class. It takes in a Filter, Team | ||
and optionally other arguments within kwargs (though use sparingly!) | ||
|
||
The output is a List comprised of Dicts. What those dicts looks like depend on the needs of the frontend. | ||
""" | ||
|
||
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: | ||
raise NotImplementedError("You need to implement run") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import datetime | ||
from datetime import timedelta | ||
from typing import Any, Dict, List, Optional | ||
|
||
from posthog.models import Entity, Event, Filter, Team | ||
from posthog.queries.base import BaseQuery | ||
|
||
|
||
class Retention(BaseQuery): | ||
def calculate_retention(self, filter: Filter, team: Team, start_entity: Optional[Entity] = None, total_days=11): | ||
date_from: datetime.datetime = filter.date_from # type: ignore | ||
filter._date_to = (date_from + timedelta(days=total_days)).isoformat() | ||
labels_format = "%a. %-d %B" | ||
resultset = Event.objects.query_retention(filter, team, start_entity=start_entity) | ||
|
||
result = [ | ||
{ | ||
"values": [ | ||
resultset.get((first_day, day), {"count": 0, "people": []}) for day in range(total_days - first_day) | ||
], | ||
"label": "Day {}".format(first_day), | ||
"date": (date_from + timedelta(days=first_day)).strftime(labels_format), | ||
} | ||
for first_day in range(total_days) | ||
] | ||
|
||
return result | ||
|
||
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: | ||
return self.calculate_retention( | ||
filter=filter, | ||
team=team, | ||
start_entity=filter.entities[0] if len(filter.entities) > 0 else None, | ||
EDsCODE marked this conversation as resolved.
Show resolved
Hide resolved
|
||
total_days=kwargs.get("total_days", 11), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
from typing import List, Dict, Any | ||
from .base import filter_events, handle_compare, process_entity_for_events, BaseQuery | ||
from posthog.models import Entity, Filter, Team, Event, Action | ||
from posthog.constants import TREND_FILTER_TYPE_ACTIONS | ||
from django.db.models import QuerySet, Count, functions | ||
from django.utils.timezone import now | ||
from django.db import connection | ||
import copy | ||
|
||
|
||
def execute_custom_sql(query, params): | ||
cursor = connection.cursor() | ||
cursor.execute(query, params) | ||
return cursor.fetchall() | ||
|
||
|
||
class Stickiness(BaseQuery): | ||
def _serialize_entity(self, entity: Entity, filter: Filter, team_id: int) -> List[Dict[str, Any]]: | ||
if filter.interval is None: | ||
filter.interval = "day" | ||
|
||
serialized: Dict[str, Any] = { | ||
"action": entity.to_dict(), | ||
"label": entity.name, | ||
"count": 0, | ||
"data": [], | ||
"labels": [], | ||
"days": [], | ||
} | ||
response = [] | ||
events = process_entity_for_events(entity=entity, team_id=team_id, order_by=None,) | ||
events = events.filter(filter_events(team_id, filter, entity)) | ||
new_dict = copy.deepcopy(serialized) | ||
new_dict.update(self.stickiness(filtered_events=events, entity=entity, filter=filter, team_id=team_id)) | ||
response.append(new_dict) | ||
return response | ||
|
||
def stickiness(self, filtered_events: QuerySet, entity: Entity, filter: Filter, team_id: int) -> Dict[str, Any]: | ||
if not filter.date_to or not filter.date_from: | ||
raise ValueError("_stickiness needs date_to and date_from set") | ||
range_days = (filter.date_to - filter.date_from).days + 2 | ||
|
||
events = ( | ||
filtered_events.filter(filter_events(team_id, filter, entity)) | ||
.values("person_id") | ||
.annotate(day_count=Count(functions.TruncDay("timestamp"), distinct=True)) | ||
.filter(day_count__lte=range_days) | ||
) | ||
|
||
events_sql, events_sql_params = events.query.sql_with_params() | ||
aggregated_query = "select count(v.person_id), v.day_count from ({}) as v group by v.day_count".format( | ||
events_sql | ||
) | ||
aggregated_counts = execute_custom_sql(aggregated_query, events_sql_params) | ||
|
||
response: Dict[int, int] = {} | ||
for result in aggregated_counts: | ||
response[result[1]] = result[0] | ||
|
||
labels = [] | ||
data = [] | ||
|
||
for day in range(1, range_days): | ||
label = "{} day{}".format(day, "s" if day > 1 else "") | ||
labels.append(label) | ||
data.append(response[day] if day in response else 0) | ||
|
||
return { | ||
"labels": labels, | ||
"days": [day for day in range(1, range_days)], | ||
"data": data, | ||
"count": sum(data), | ||
} | ||
|
||
def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: | ||
response = [] | ||
|
||
if not filter.date_from: | ||
filter._date_from = ( | ||
Event.objects.filter(team_id=team.pk) | ||
.order_by("timestamp")[0] | ||
.timestamp.replace(hour=0, minute=0, second=0, microsecond=0) | ||
.isoformat() | ||
) | ||
if not filter.date_to: | ||
filter._date_to = now().isoformat() | ||
|
||
for entity in filter.entities: | ||
if entity.type == TREND_FILTER_TYPE_ACTIONS: | ||
entity.name = Action.objects.only("name").get(team=team, pk=entity.id).name | ||
|
||
entity_resp = handle_compare(entity=entity, filter=filter, func=self._serialize_entity, team_id=team.pk) | ||
response.extend(entity_resp) | ||
return response |
Empty file.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps in a followup refactor since this refactor is overall restructuring. We should standardize how/when to handle filters/params. For example, in this instance above we don't check any conditions and just write in something extra to the filter whereas in trends.py line 298 we do explicit checks before reassigning
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep agree, something I will tackle. It's annoying b/c the way they change the dates isn't consistent