From bfb258cbe937d2daf865fb7ccbbf16aeca002e1a Mon Sep 17 00:00:00 2001 From: Tim Glaser Date: Wed, 29 Jul 2020 21:07:02 +0200 Subject: [PATCH] Refactor analytics queries (#1280) * Refactor analytics queries * Fix timedelta issue * Fix tests * Improve test * Fix stickiness action * Add default shape * Refactor tests * Add types to filter func * Add comments to functions * Fix tests * remove unused import * remove more unused imports * Fix retention issues --- posthog/api/action.py | 431 +-------------- posthog/api/team.py | 5 +- posthog/api/test/test_action.py | 667 +----------------------- posthog/api/test/test_dashboard.py | 1 - posthog/api/test/test_team.py | 9 +- posthog/queries/__init__.py | 0 posthog/queries/base.py | 112 ++++ posthog/queries/retention.py | 35 ++ posthog/queries/stickiness.py | 94 ++++ posthog/queries/test/__init__.py | 0 posthog/queries/test/test_retention.py | 157 ++++++ posthog/queries/test/test_stickiness.py | 86 +++ posthog/queries/test/test_trends.py | 587 +++++++++++++++++++++ posthog/queries/trends.py | 322 ++++++++++++ posthog/tasks/update_cache.py | 6 +- tsconfig.json | 56 +- 16 files changed, 1452 insertions(+), 1116 deletions(-) create mode 100644 posthog/queries/__init__.py create mode 100644 posthog/queries/base.py create mode 100644 posthog/queries/retention.py create mode 100644 posthog/queries/stickiness.py create mode 100644 posthog/queries/test/__init__.py create mode 100644 posthog/queries/test/test_retention.py create mode 100644 posthog/queries/test/test_stickiness.py create mode 100644 posthog/queries/test/test_trends.py create mode 100644 posthog/queries/trends.py diff --git a/posthog/api/action.py b/posthog/api/action.py index 2355c0194f902..3022eba4f48c9 100644 --- a/posthog/api/action.py +++ b/posthog/api/action.py @@ -46,13 +46,12 @@ Team, User, ) +from posthog.queries import base, retention, stickiness, trends from posthog.tasks.calculate_action import calculate_action from posthog.utils import TemporaryTokenAuthentication, append_data, get_compare_period_dates from .person import PersonSerializer -FREQ_MAP = {"minute": "60S", "hour": "H", "day": "D", "week": "W", "month": "M"} - class ActionStepSerializer(serializers.HyperlinkedModelSerializer): class Meta: @@ -99,11 +98,6 @@ def get_count(self, action: Action) -> Optional[int]: def get_actions(queryset: QuerySet, params: dict, team_id: int) -> QuerySet: - if params.get(TREND_FILTER_TYPE_ACTIONS): - queryset = queryset.filter( - pk__in=[action.id for action in Filter({"actions": json.loads(params.get("actions", "[]"))}).actions] - ) - if params.get("include_count"): queryset = queryset.annotate(count=Count(TREND_FILTER_TYPE_EVENTS)) @@ -187,10 +181,11 @@ def trends(self, request: request.Request, *args: Any, **kwargs: Any) -> Respons @cached_function(cache_type=TRENDS_ENDPOINT) def _calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]: team = request.user.team_set.get() - actions = self.get_queryset() - params = request.GET.dict() filter = Filter(request=request) - result = calculate_trends(filter, team.pk, actions) + if filter.shown_as == "Stickiness": + result = stickiness.Stickiness().run(filter, team) + else: + result = trends.Trends().run(filter, team) dashboard_id = request.GET.get("from_dashboard", None) if dashboard_id: @@ -202,16 +197,17 @@ def _calculate_trends(self, request: request.Request) -> List[Dict[str, Any]]: def retention(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team_set.get() properties = request.GET.get("properties", "{}") + + filter = Filter(data={"properties": json.loads(properties)}) + start_entity_data = request.GET.get("start_entity", None) - start_entity: Optional[Entity] = None if start_entity_data: data = json.loads(start_entity_data) - start_entity = Entity({"id": data["id"], "type": data["type"]}) + filter.entities = [Entity({"id": data["id"], "type": data["type"]})] - filter = Filter(data={"properties": json.loads(properties)}) filter._date_from = "-11d" - result = calculate_retention(filter, team, start_entity=start_entity) - return Response(result) + result = retention.Retention().run(filter, team) + return Response({"data": result}) @action(methods=["GET"], detail=False) def people(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: @@ -260,7 +256,7 @@ def _calculate_people(events: QuerySet, offset: int): filtered_events: QuerySet = QuerySet() if request.GET.get("session"): filtered_events = ( - Event.objects.filter(team=team).filter(filter_events(team.pk, filter)).add_person_id(team.pk) + Event.objects.filter(team=team).filter(base.filter_events(team.pk, filter)).add_person_id(team.pk) ) else: if len(filter.entities) >= 1: @@ -269,8 +265,8 @@ def _calculate_people(events: QuerySet, offset: int): entity = Entity({"id": request.GET["entityId"], "type": request.GET["type"]}) if entity.type == TREND_FILTER_TYPE_EVENTS: - filtered_events = process_entity_for_events(entity, team_id=team.pk, order_by=None).filter( - filter_events(team.pk, filter, entity) + filtered_events = base.process_entity_for_events(entity, team_id=team.pk, order_by=None).filter( + base.filter_events(team.pk, filter, entity) ) elif entity.type == TREND_FILTER_TYPE_ACTIONS: actions = super().get_queryset() @@ -279,8 +275,8 @@ def _calculate_people(events: QuerySet, offset: int): action = actions.get(pk=entity.id) except Action.DoesNotExist: return Response([]) - filtered_events = process_entity_for_events(entity, team_id=team.pk, order_by=None).filter( - filter_events(team.pk, filter, entity) + filtered_events = base.process_entity_for_events(entity, team_id=team.pk, order_by=None).filter( + base.filter_events(team.pk, filter, entity) ) people = _calculate_people(events=filtered_events, offset=offset) @@ -301,401 +297,6 @@ def _calculate_people(events: QuerySet, offset: int): return Response({"results": [people], "next": next_url, "previous": current_url[1:]}) -def calculate_trends(filter: Filter, team_id: int, actions: QuerySet) -> List[Dict[str, Any]]: - entities_list = [] - actions = actions.filter(deleted=False) - - if len(filter.entities) == 0: - # If no filters, automatically grab all actions and show those instead - filter.entities = [ - Entity({"id": action.id, "name": action.name, "type": TREND_FILTER_TYPE_ACTIONS,}) for action in actions - ] - - if not filter.date_from: - filter._date_from = ( - Event.objects.filter(team_id=team_id) - .order_by("timestamp")[0] - .timestamp.replace(hour=0, minute=0, second=0, microsecond=0) - .isoformat() - ) - if not filter.date_to: - filter._date_to = now().isoformat() - - compared_filter = None - if filter.compare: - compared_filter = determine_compared_filter(filter) - - for entity in filter.entities: - if entity.type == TREND_FILTER_TYPE_ACTIONS: - try: - db_action = [action for action in actions if action.id == entity.id][0] - entity.name = db_action.name - except IndexError: - continue - trend_entity = serialize_entity(entity=entity, filter=filter, team_id=team_id) - if filter.compare and compared_filter: - trend_entity = convert_to_comparison(trend_entity, filter, "{} - {}".format(entity.name, "current")) - entities_list.extend(trend_entity) - - compared_trend_entity = serialize_entity(entity=entity, filter=compared_filter, team_id=team_id) - - compared_trend_entity = convert_to_comparison( - compared_trend_entity, compared_filter, "{} - {}".format(entity.name, "previous"), - ) - entities_list.extend(compared_trend_entity) - else: - entities_list.extend(trend_entity) - return entities_list - - -def calculate_retention(filter: Filter, team: Team, start_entity: Optional[Entity] = None, total_days=11): - date_from: datetime.datetime = filter.date_from # type: ignore - filter._date_to = (date_from + timedelta(days=total_days)).isoformat() - labels_format = "%a. %-d %B" - resultset = Event.objects.query_retention(filter, team, start_entity=start_entity) - - result = { - "data": [ - { - "values": [ - resultset.get((first_day, day), {"count": 0, "people": []}) for day in range(total_days - first_day) - ], - "label": "Day {}".format(first_day), - "date": (date_from + timedelta(days=first_day)).strftime(labels_format), - } - for first_day in range(total_days) - ] - } - - return result - - -def build_dataframe(aggregates: QuerySet, interval: str, breakdown: Optional[str] = None) -> pd.DataFrame: - if breakdown == "cohorts": - cohort_keys = [key for key in aggregates[0].keys() if key.startswith("cohort_")] - # Convert queryset with day, count, cohort_88, cohort_99, ... to multiple rows, for example: - # 2020-01-01..., 1, cohort_88 - # 2020-01-01..., 3, cohort_99 - dataframe = pd.melt( - pd.DataFrame(aggregates), id_vars=[interval, "count"], value_vars=cohort_keys, var_name="breakdown", - ).rename(columns={interval: "date"}) - # Filter out false values - dataframe = dataframe[dataframe["value"] == True] - # Sum dates with same cohort - dataframe = dataframe.groupby(["breakdown", "date"], as_index=False).sum() - else: - dataframe = pd.DataFrame( - [ - {"date": a[interval], "count": a["count"], "breakdown": a[breakdown] if breakdown else "Total",} - for a in aggregates - ] - ) - if interval == "week": - dataframe["date"] = dataframe["date"].apply(lambda x: x - pd.offsets.Week(weekday=6)) - elif interval == "month": - dataframe["date"] = dataframe["date"].apply(lambda x: x - pd.offsets.MonthEnd(n=1)) - return dataframe - - -def group_events_to_date( - date_from: Optional[datetime.datetime], - date_to: Optional[datetime.datetime], - aggregates: QuerySet, - interval: str, - breakdown: Optional[str] = None, -) -> Dict[str, Dict[datetime.datetime, int]]: - response = {} - - if interval == "day": - if date_from: - date_from = date_from.replace(hour=0, minute=0, second=0, microsecond=0) - if date_to: - date_to = date_to.replace(hour=0, minute=0, second=0, microsecond=0) - - time_index = pd.date_range(date_from, date_to, freq=FREQ_MAP[interval]) - if len(aggregates) > 0: - dataframe = build_dataframe(aggregates, interval, breakdown) - - # extract top 20 if more than 20 breakdowns - if breakdown and dataframe["breakdown"].nunique() > 20: - counts = ( - dataframe.groupby(["breakdown"])["count"] - .sum() - .reset_index(name="total") - .sort_values(by=["total"], ascending=False)[:20] - ) - top_breakdown = counts["breakdown"].to_list() - dataframe = dataframe[dataframe.breakdown.isin(top_breakdown)] - dataframe = dataframe.astype({"breakdown": str}) - for value in dataframe["breakdown"].unique(): - filtered = ( - dataframe.loc[dataframe["breakdown"] == value] - if value - else dataframe.loc[dataframe["breakdown"].isnull()] - ) - df_dates = pd.DataFrame(filtered.groupby("date").mean(), index=time_index) - df_dates = df_dates.fillna(0) - response[value] = {key: value[0] if len(value) > 0 else 0 for key, value in df_dates.iterrows()} - else: - dataframe = pd.DataFrame([], index=time_index) - dataframe = dataframe.fillna(0) - response["total"] = {key: value[0] if len(value) > 0 else 0 for key, value in dataframe.iterrows()} - - return response - - -def get_interval_annotation(key: str) -> Dict[str, Any]: - map: Dict[str, Any] = { - "minute": functions.TruncMinute("timestamp"), - "hour": functions.TruncHour("timestamp"), - "day": functions.TruncDay("timestamp"), - "week": functions.TruncWeek("timestamp"), - "month": functions.TruncMonth("timestamp"), - } - func = map.get(key) - if func is None: - return {"day": map.get("day")} # default - - return {key: func} - - -def add_cohort_annotations(team_id: int, breakdown: List[Union[int, str]]) -> Dict[str, Union[Value, Exists]]: - cohorts = Cohort.objects.filter(team_id=team_id, pk__in=[b for b in breakdown if b != "all"]) - annotations: Dict[str, Union[Value, Exists]] = {} - for cohort in cohorts: - annotations["cohort_{}".format(cohort.pk)] = Exists( - CohortPeople.objects.filter(cohort=cohort.pk, person_id=OuterRef("person_id")).only("id") - ) - if "all" in breakdown: - annotations["cohort_all"] = Value(True, output_field=BooleanField()) - return annotations - - -def add_person_properties_annotations(team_id: int, breakdown: str) -> Dict[str, Subquery]: - person_properties = Subquery( - Person.objects.filter(team_id=team_id, id=OuterRef("person_id")).values("properties__{}".format(breakdown)) - ) - annotations = {} - annotations["properties__{}".format(breakdown)] = person_properties - return annotations - - -def aggregate_by_interval( - filtered_events: QuerySet, team_id: int, entity: Entity, filter: Filter, breakdown: Optional[str] = None, -) -> Dict[str, Any]: - interval = filter.interval if filter.interval else "day" - interval_annotation = get_interval_annotation(interval) - values = [interval] - if breakdown: - if filter.breakdown_type == "cohort": - cohort_annotations = add_cohort_annotations( - team_id, json.loads(filter.breakdown) if filter.breakdown else [] - ) - values.extend(cohort_annotations.keys()) - filtered_events = filtered_events.annotate(**cohort_annotations) - breakdown = "cohorts" - elif filter.breakdown_type == "person": - person_annotations = add_person_properties_annotations( - team_id, filter.breakdown if filter.breakdown else "" - ) - filtered_events = filtered_events.annotate(**person_annotations) - values.append(breakdown) - else: - values.append(breakdown) - aggregates = filtered_events.annotate(**interval_annotation).values(*values).annotate(count=Count(1)).order_by() - - if breakdown: - aggregates = aggregates.order_by("-count") - - aggregates = process_math(aggregates, entity) - - dates_filled = group_events_to_date( - date_from=filter.date_from, - date_to=filter.date_to, - aggregates=aggregates, - interval=interval, - breakdown=breakdown, - ) - - return dates_filled - - -def process_math(query: QuerySet, entity: Entity) -> QuerySet: - math_to_aggregate_function = {"sum": Sum, "avg": Avg, "min": Min, "max": Max} - if entity.math == "dau": - # In daily active users mode count only up to 1 event per user per day - query = query.annotate(count=Count("person_id", distinct=True)) - elif entity.math in math_to_aggregate_function: - # Run relevant aggregate function on specified event property, casting it to a double - query = query.annotate( - count=math_to_aggregate_function[entity.math]( - Cast(RawSQL('"posthog_event"."properties"->>%s', (entity.math_property,)), output_field=FloatField(),) - ) - ) - # Skip over events where the specified property is not set or not a number - # It may not be ideally clear to the user what events were skipped, - # but in the absence of typing, this is safe, cheap, and frictionless - query = query.extra( - where=['jsonb_typeof("posthog_event"."properties"->%s) = \'number\''], params=[entity.math_property], - ) - return query - - -def execute_custom_sql(query, params): - cursor = connection.cursor() - cursor.execute(query, params) - return cursor.fetchall() - - -def stickiness(filtered_events: QuerySet, entity: Entity, filter: Filter, team_id: int) -> Dict[str, Any]: - if not filter.date_to or not filter.date_from: - raise ValueError("_stickiness needs date_to and date_from set") - range_days = (filter.date_to - filter.date_from).days + 2 - - events = ( - filtered_events.filter(filter_events(team_id, filter, entity)) - .values("person_id") - .annotate(day_count=Count(functions.TruncDay("timestamp"), distinct=True)) - .filter(day_count__lte=range_days) - ) - - events_sql, events_sql_params = events.query.sql_with_params() - aggregated_query = "select count(v.person_id), v.day_count from ({}) as v group by v.day_count".format(events_sql) - aggregated_counts = execute_custom_sql(aggregated_query, events_sql_params) - - response: Dict[int, int] = {} - for result in aggregated_counts: - response[result[1]] = result[0] - - labels = [] - data = [] - - for day in range(1, range_days): - label = "{} day{}".format(day, "s" if day > 1 else "") - labels.append(label) - data.append(response[day] if day in response else 0) - - return { - "labels": labels, - "days": [day for day in range(1, range_days)], - "data": data, - "count": sum(data), - } - - -def breakdown_label(entity: Entity, value: Union[str, int]) -> Dict[str, Optional[Union[str, int]]]: - ret_dict: Dict[str, Optional[Union[str, int]]] = {} - if not value or not isinstance(value, str) or "cohort_" not in value: - ret_dict["label"] = "{} - {}".format( - entity.name, value if value and value != "None" and value != "nan" else "Other", - ) - ret_dict["breakdown_value"] = value if value and not pd.isna(value) else None - else: - if value == "cohort_all": - ret_dict["label"] = "{} - all users".format(entity.name) - ret_dict["breakdown_value"] = "all" - else: - cohort = Cohort.objects.get(pk=value.replace("cohort_", "")) - ret_dict["label"] = "{} - {}".format(entity.name, cohort.name) - ret_dict["breakdown_value"] = cohort.pk - return ret_dict - - -def serialize_entity(entity: Entity, filter: Filter, team_id: int) -> List[Dict[str, Any]]: - if filter.interval is None: - filter.interval = "day" - - serialized: Dict[str, Any] = { - "action": entity.to_dict(), - "label": entity.name, - "count": 0, - "data": [], - "labels": [], - "days": [], - } - response = [] - events = process_entity_for_events( - entity=entity, team_id=team_id, order_by=None if filter.shown_as == "Stickiness" else "-timestamp", - ) - events = events.filter(filter_events(team_id, filter, entity)) - if not filter.shown_as or filter.shown_as == "Volume": - items = aggregate_by_interval( - filtered_events=events, - team_id=team_id, - entity=entity, - filter=filter, - breakdown="properties__{}".format(filter.breakdown) if filter.breakdown else None, - ) - for value, item in items.items(): - new_dict = copy.deepcopy(serialized) - if value != "Total": - new_dict.update(breakdown_label(entity, value)) - new_dict.update(append_data(dates_filled=list(item.items()), interval=filter.interval)) - if filter.display == TRENDS_CUMULATIVE: - new_dict["data"] = np.cumsum(new_dict["data"]) - response.append(new_dict) - elif filter.shown_as == TRENDS_STICKINESS: - new_dict = copy.deepcopy(serialized) - new_dict.update(stickiness(filtered_events=events, entity=entity, filter=filter, team_id=team_id)) - response.append(new_dict) - - return response - - def serialize_people(people: QuerySet, request: request.Request) -> Dict: people_dict = [PersonSerializer(person, context={"request": request}).data for person in people] return {"people": people_dict, "count": len(people_dict)} - - -def process_entity_for_events(entity: Entity, team_id: int, order_by="-id") -> QuerySet: - if entity.type == TREND_FILTER_TYPE_ACTIONS: - events = Event.objects.filter(action__pk=entity.id).add_person_id(team_id) - if order_by: - events = events.order_by(order_by) - return events - elif entity.type == TREND_FILTER_TYPE_EVENTS: - return Event.objects.filter_by_event_with_people(event=entity.id, team_id=team_id, order_by=order_by) - return QuerySet() - - -def filter_events(team_id: int, filter: Filter, entity: Optional[Entity] = None) -> Q: - filters = Q() - if filter.date_from: - filters &= Q(timestamp__gte=filter.date_from) - if filter.date_to: - relativity = relativedelta(days=1) - if filter.interval == "hour": - relativity = relativedelta(hours=1) - elif filter.interval == "minute": - relativity = relativedelta(minutes=1) - elif filter.interval == "week": - relativity = relativedelta(weeks=1) - elif filter.interval == "month": - relativity = relativedelta(months=1) - relativity # go to last day of month instead of first of next - filters &= Q(timestamp__lte=filter.date_to + relativity) - if filter.properties: - filters &= filter.properties_to_Q(team_id=team_id) - if entity and entity.properties: - filters &= entity.properties_to_Q(team_id=team_id) - return filters - - -def determine_compared_filter(filter): - date_from, date_to = get_compare_period_dates(filter.date_from, filter.date_to) - compared_filter = copy.deepcopy(filter) - compared_filter._date_from = date_from.date().isoformat() - compared_filter._date_to = date_to.date().isoformat() - return compared_filter - - -def convert_to_comparison(trend_entity: List[Dict[str, Any]], filter: Filter, label: str) -> List[Dict[str, Any]]: - for entity in trend_entity: - days = [i for i in range(len(entity["days"]))] - labels = [ - "{} {}".format(filter.interval if filter.interval is not None else "day", i) - for i in range(len(entity["labels"])) - ] - entity.update( - {"labels": labels, "days": days, "label": label, "dates": entity["days"], "compare": True,} - ) - return trend_entity diff --git a/posthog/api/team.py b/posthog/api/team.py index 48aab7219bd4b..541a7407f255b 100644 --- a/posthog/api/team.py +++ b/posthog/api/team.py @@ -1,7 +1,8 @@ from django.db.models import QuerySet -from rest_framework import viewsets, mixins, exceptions, response, status -from posthog.models import User, Team +from rest_framework import exceptions, mixins, response, status, viewsets + from posthog.api.user import UserSerializer +from posthog.models import Team, User class TeamUserViewSet(mixins.DestroyModelMixin, mixins.ListModelMixin, viewsets.GenericViewSet): diff --git a/posthog/api/test/test_action.py b/posthog/api/test/test_action.py index 6da30e9487b78..8ce2a9abe9b00 100644 --- a/posthog/api/test/test_action.py +++ b/posthog/api/test/test_action.py @@ -4,7 +4,6 @@ from freezegun import freeze_time -from posthog.api.action import calculate_retention from posthog.constants import TREND_FILTER_TYPE_ACTIONS, TREND_FILTER_TYPE_EVENTS from posthog.models import ( Action, @@ -255,448 +254,6 @@ def _compare_entity_response(self, response1, response2, remove=("action", "labe return False return str(response1[0]) == str(response2[0]) - def test_trends_per_day(self): - self._create_events() - with freeze_time("2020-01-04T13:00:01Z"): - with self.assertNumQueries(16): - action_response = self.client.get("/api/action/trends/?date_from=-7d").json() - event_response = self.client.get( - "/api/action/trends/", - data={"date_from": "-7d", "events": jdumps([{"id": "sign up"}, {"id": "no events"}]),}, - ).json() - - self.assertEqual(action_response[0]["label"], "sign up") - self.assertEqual(action_response[0]["labels"][4], "Wed. 1 January") - self.assertEqual(action_response[0]["data"][4], 3.0) - self.assertEqual(action_response[0]["labels"][5], "Thu. 2 January") - self.assertEqual(action_response[0]["data"][5], 1.0) - self.assertEqual(event_response[0]["label"], "sign up") - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_trends_per_day_48hours(self): - self._create_events() - with freeze_time("2020-01-03T13:00:01Z"): - action_response = self.client.get("/api/action/trends/?date_from=-48h&interval=day").json() - event_response = self.client.get( - "/api/action/trends/", - data={ - "date_from": "-48h", - "events": jdumps([{"id": "sign up"}, {"id": "no events"}]), - "interval": "day", - }, - ).json() - - self.assertEqual(action_response[0]["data"][1], 1.0) - self.assertEqual(action_response[0]["labels"][1], "Thu. 2 January") - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_trends_per_day_cumulative(self): - self._create_events() - with freeze_time("2020-01-04T13:00:01Z"): - with self.assertNumQueries(16): - action_response = self.client.get( - "/api/action/trends/?date_from=-7d&display=ActionsLineGraphCumulative" - ).json() - event_response = self.client.get( - "/api/action/trends/", - data={ - "date_from": "-7d", - "events": jdumps([{"id": "sign up"}, {"id": "no events"}]), - "display": "ActionsLineGraphCumulative", - }, - ).json() - - self.assertEqual(action_response[0]["label"], "sign up") - self.assertEqual(action_response[0]["labels"][4], "Wed. 1 January") - self.assertEqual(action_response[0]["data"][4], 3.0) - self.assertEqual(action_response[0]["labels"][5], "Thu. 2 January") - self.assertEqual(action_response[0]["data"][5], 4.0) - self.assertEqual(event_response[0]["label"], "sign up") - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_trends_compare(self): - self._create_events() - with freeze_time("2020-01-04T13:00:01Z"): - action_response = self.client.get("/api/action/trends/?date_from=-7d&compare=true").json() - event_response = self.client.get( - "/api/action/trends/", - data={ - "date_from": "-7d", - "events": jdumps([{"id": "sign up"}, {"id": "no events"}]), - "compare": "true", - }, - ).json() - - self.assertEqual(action_response[0]["label"], "sign up - current") - self.assertEqual(action_response[0]["labels"][4], "day 4") - self.assertEqual(action_response[0]["data"][4], 3.0) - self.assertEqual(action_response[0]["labels"][5], "day 5") - self.assertEqual(action_response[0]["data"][5], 1.0) - - self.assertEqual(action_response[1]["label"], "sign up - previous") - self.assertEqual(action_response[1]["labels"][4], "day 4") - self.assertEqual(action_response[1]["data"][4], 1.0) - self.assertEqual(action_response[1]["labels"][5], "day 5") - self.assertEqual(action_response[1]["data"][5], 0.0) - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_property_filtering(self): - self._create_events() - with freeze_time("2020-01-04"): - action_response = self.client.get( - "/api/action/trends/", data={"properties": jdumps({"$some_property": "value"}),}, - ).json() - event_response = self.client.get( - "/api/action/trends/", - data={ - "events": jdumps([{"id": "sign up"}, {"id": "no events"}]), - "properties": jdumps({"$some_property": "value"}), - }, - ).json() - self.assertEqual(action_response[0]["labels"][4], "Wed. 1 January") - self.assertEqual(action_response[0]["data"][4], 1.0) - self.assertEqual(action_response[0]["labels"][5], "Thu. 2 January") - self.assertEqual(action_response[0]["data"][5], 0) - self.assertEqual(action_response[1]["count"], 0) - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_filter_events_by_cohort(self): - person1 = Person.objects.create(team=self.team, distinct_ids=["person_1"], properties={"name": "John"}) - person2 = Person.objects.create(team=self.team, distinct_ids=["person_2"], properties={"name": "Jane"}) - - event1 = Event.objects.create( - event="event_name", - team=self.team, - distinct_id="person_1", - properties={"$browser": "Safari"}, - timestamp=datetime.now(), - ) - event2 = Event.objects.create( - event="event_name", - team=self.team, - distinct_id="person_2", - properties={"$browser": "Chrome"}, - timestamp=datetime.now(), - ) - event3 = Event.objects.create( - event="event_name", - team=self.team, - distinct_id="person_2", - properties={"$browser": "Safari"}, - timestamp=datetime.now(), - ) - - cohort = Cohort.objects.create(team=self.team, groups=[{"properties": {"name": "Jane"}}]) - cohort.calculate_people() - - with self.assertNumQueries(6): - response = self.client.get( - "/api/action/trends/", - data={ - "properties": jdumps([{"key": "id", "value": cohort.pk, "type": "cohort"}]), - "events": jdumps([{"id": "event_name"}]), - }, - ).json() - self.assertEqual(response[0]["count"], 2) - self.assertEqual(response[0]["data"][-1], 2) - - def test_date_filtering(self): - self._create_events() - with freeze_time("2020-01-02"): - action_response = self.client.get("/api/action/trends/?date_from=2019-12-21").json() - event_response = self.client.get( - "/api/action/trends/", - data={"date_from": "2019-12-21", "events": jdumps([{"id": "sign up"}, {"id": "no events"}]),}, - ).json() - self.assertEqual(action_response[0]["labels"][3], "Tue. 24 December") - self.assertEqual(action_response[0]["data"][3], 1.0) - self.assertEqual(action_response[0]["data"][12], 1.0) - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_interval_filtering(self): - self._create_events(use_time=True) - - # test minute - with freeze_time("2020-01-02"): - action_response = self.client.get("/api/action/trends/?date_from=2020-01-01&interval=minute").json() - self.assertEqual(action_response[0]["labels"][6], "Wed. 1 January, 00:06") - self.assertEqual(action_response[0]["data"][6], 3.0) - - # test hour - with freeze_time("2020-01-02"): - action_response = self.client.get("/api/action/trends/?date_from=2019-12-24&interval=hour").json() - self.assertEqual(action_response[0]["labels"][3], "Tue. 24 December, 03:00") - self.assertEqual(action_response[0]["data"][3], 1.0) - # 217 - 24 - 1 - self.assertEqual(action_response[0]["data"][192], 3.0) - - # test week - with freeze_time("2020-01-02"): - action_response = self.client.get("/api/action/trends/?date_from=2019-11-24&interval=week").json() - self.assertEqual(action_response[0]["labels"][4], "Sun. 22 December") - self.assertEqual(action_response[0]["data"][4], 1.0) - self.assertEqual(action_response[0]["labels"][5], "Sun. 29 December") - self.assertEqual(action_response[0]["data"][5], 4.0) - - # test month - with freeze_time("2020-01-02"): - action_response = self.client.get("/api/action/trends/?date_from=2019-9-24&interval=month").json() - self.assertEqual(action_response[0]["labels"][2], "Sat. 30 November") - self.assertEqual(action_response[0]["data"][2], 1.0) - self.assertEqual(action_response[0]["labels"][3], "Tue. 31 December") - self.assertEqual(action_response[0]["data"][3], 4.0) - - with freeze_time("2020-01-02 23:30"): - Event.objects.create(team=self.team, event="sign up", distinct_id="blabla") - - # test today + hourly - with freeze_time("2020-01-02T23:31:00Z"): - action_response = self.client.get( - "/api/action/trends/", data={"date_from": "dStart", "interval": "hour",}, - ).json() - self.assertEqual(action_response[0]["labels"][23], "Thu. 2 January, 23:00") - self.assertEqual(action_response[0]["data"][23], 1.0) - - def test_all_dates_filtering(self): - self._create_events(use_time=True) - # automatically sets first day as first day of any events - with freeze_time("2020-01-04T15:01:01Z"): - action_response = self.client.get("/api/action/trends/?date_from=all").json() - event_response = self.client.get( - "/api/action/trends/", - data={"date_from": "all", "events": jdumps([{"id": "sign up"}, {"id": "no events"}]),}, - ).json() - self.assertEqual(action_response[0]["labels"][0], "Tue. 24 December") - self.assertEqual(action_response[0]["data"][0], 1.0) - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - # test empty response - with freeze_time("2020-01-04"): - empty = self.client.get( - "/api/action/trends/?date_from=all&events=%s" % jdumps([{"id": "blabla"}, {"id": "sign up"}]) - ).json() - self.assertEqual(empty[0]["data"][0], 0) - - def test_breakdown_filtering(self): - self._create_events() - # test breakdown filtering - with freeze_time("2020-01-04T13:01:01Z"): - action_response = self.client.get("/api/action/trends/?date_from=-14d&breakdown=$some_property").json() - event_response = self.client.get( - "/api/action/trends/?date_from=-14d&properties={}&actions=[]&display=ActionsTable&interval=day&breakdown=$some_property&events=%s" - % jdumps([{"id": "sign up", "name": "sign up", "type": "events", "order": 0,}, {"id": "no events"},]) - ).json() - - self.assertEqual(event_response[0]["label"], "sign up - Other") - self.assertEqual(event_response[1]["label"], "sign up - other_value") - self.assertEqual(event_response[2]["label"], "sign up - value") - self.assertEqual(event_response[3]["label"], "no events - Other") - - self.assertEqual(sum(event_response[0]["data"]), 2) - self.assertEqual(event_response[0]["data"][4 + 7], 2) - self.assertEqual(event_response[0]["breakdown_value"], "None") - - self.assertEqual(sum(event_response[1]["data"]), 1) - self.assertEqual(event_response[1]["data"][5 + 7], 1) - self.assertEqual(event_response[1]["breakdown_value"], "other_value") - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - # check numerical breakdown - with freeze_time("2020-01-04T13:01:01Z"): - action_response = self.client.get( - "/api/action/trends/?date_from=-14d&breakdown=$some_numerical_prop" - ).json() - event_response = self.client.get( - "/api/action/trends/?date_from=-14d&properties={}&actions=[]&display=ActionsTable&interval=day&breakdown=$some_numerical_prop&events=%s" - % jdumps([{"id": "sign up", "name": "sign up", "type": "events", "order": 0,}, {"id": "no events"},]) - ).json() - self.assertEqual(event_response[0]["label"], "sign up - Other") - self.assertEqual(event_response[0]["count"], 4.0) - self.assertEqual(event_response[1]["label"], "sign up - 80.0") - self.assertEqual(event_response[1]["count"], 1.0) - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_breakdown_filtering_limit(self): - self._create_breakdown_events() - with freeze_time("2020-01-04T13:01:01Z"): - action_response = self.client.get("/api/action/trends/?date_from=-14d&breakdown=$some_property").json() - event_response = self.client.get( - "/api/action/trends/?date_from=-14d&properties={}&actions=[]&display=ActionsTable&interval=day&breakdown=$some_property&events=%s" - % jdumps([{"id": "sign up", "name": "sign up", "type": "events", "order": 0}]) - ).json() - self.assertEqual(len(action_response), 20) - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_action_filtering(self): - sign_up_action, person = self._create_events() - with freeze_time("2020-01-04"): - action_response = self.client.get( - "/api/action/trends/", data={"actions": jdumps([{"id": sign_up_action.id}]),}, - ).json() - event_response = self.client.get( - "/api/action/trends/", data={"events": jdumps([{"id": "sign up"}]),}, - ).json() - self.assertEqual(len(action_response), 1) - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_trends_for_non_existing_action(self): - with freeze_time("2020-01-04"): - response = self.client.get("/api/action/trends/", {"actions": jdumps([{"id": 4000000}])}).json() - self.assertEqual(len(response), 0) - - with freeze_time("2020-01-04"): - response = self.client.get("/api/action/trends/", {"events": jdumps([{"id": "DNE"}])}).json() - - self.assertEqual(response[0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) - - def test_dau_filtering(self): - sign_up_action, person = self._create_events() - with freeze_time("2020-01-02"): - Person.objects.create(team=self.team, distinct_ids=["someone_else"]) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else") - with freeze_time("2020-01-04"): - action_response = self.client.get( - "/api/action/trends/", data={"actions": jdumps([{"id": sign_up_action.id, "math": "dau"}]),}, - ).json() - event_response = self.client.get( - "/api/action/trends/", data={"events": jdumps([{"id": "sign up", "math": "dau"}]),}, - ).json() - self.assertEqual(action_response[0]["data"][4], 1) - self.assertEqual(action_response[0]["data"][5], 2) - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_dau_with_breakdown_filtering(self): - sign_up_action, _ = self._create_events() - with freeze_time("2020-01-02"): - Event.objects.create( - team=self.team, event="sign up", distinct_id="blabla", properties={"$some_property": "other_value"}, - ) - with freeze_time("2020-01-04"): - action_response = self.client.get( - "/api/action/trends/?breakdown=$some_property&actions=%s" - % jdumps([{"id": sign_up_action.id, "math": "dau"}]) - ).json() - event_response = self.client.get( - "/api/action/trends/?breakdown=$some_property&events=%s" % jdumps([{"id": "sign up", "math": "dau"}]) - ).json() - - self.assertEqual(event_response[0]["label"], "sign up - other_value") - self.assertEqual(event_response[1]["label"], "sign up - value") - self.assertEqual(event_response[2]["label"], "sign up - Other") - - self.assertEqual(sum(event_response[0]["data"]), 1) - self.assertEqual(event_response[0]["data"][5], 1) - - self.assertEqual(sum(event_response[2]["data"]), 1) - self.assertEqual(event_response[2]["data"][4], 1) # property not defined - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_sum_filtering(self): - sign_up_action, person = self._create_events() - Person.objects.create(team=self.team, distinct_ids=["someone_else"]) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 2}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 3}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 5}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 8}) - action_response = self.client.get( - "/api/action/trends/", - data={"actions": jdumps([{"id": sign_up_action.id, "math": "sum", "math_property": "some_number"}]),}, - ).json() - event_response = self.client.get( - "/api/action/trends/", - data={"events": jdumps([{"id": "sign up", "math": "sum", "math_property": "some_number"}]),}, - ).json() - self.assertEqual(action_response[0]["data"][-1], 18) - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_avg_filtering(self): - sign_up_action, person = self._create_events() - Person.objects.create(team=self.team, distinct_ids=["someone_else"]) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 2}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 3}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 5}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 8}) - action_response = self.client.get( - "/api/action/trends/", - data={"actions": jdumps([{"id": sign_up_action.id, "math": "avg", "math_property": "some_number"}]),}, - ).json() - event_response = self.client.get( - "/api/action/trends/", - data={"events": jdumps([{"id": "sign up", "math": "avg", "math_property": "some_number"}]),}, - ).json() - self.assertEqual(action_response[0]["data"][-1], 4.5) - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_min_filtering(self): - sign_up_action, person = self._create_events() - Person.objects.create(team=self.team, distinct_ids=["someone_else"]) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 2}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 3}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 5}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 8}) - action_response = self.client.get( - "/api/action/trends/", - data={"actions": jdumps([{"id": sign_up_action.id, "math": "min", "math_property": "some_number"}]),}, - ).json() - event_response = self.client.get( - "/api/action/trends/", - data={"events": jdumps([{"id": "sign up", "math": "min", "math_property": "some_number"}]),}, - ).json() - self.assertEqual(action_response[0]["data"][-1], 2) - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_max_filtering(self): - sign_up_action, person = self._create_events() - Person.objects.create(team=self.team, distinct_ids=["someone_else"]) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 2}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 3}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 5}) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 8}) - action_response = self.client.get( - "/api/action/trends/", - data={"actions": jdumps([{"id": sign_up_action.id, "math": "max", "math_property": "some_number"}]),}, - ).json() - event_response = self.client.get( - "/api/action/trends/", - data={"events": jdumps([{"id": "sign up", "math": "max", "math_property": "some_number"}]),}, - ).json() - self.assertEqual(action_response[0]["data"][-1], 8) - self.assertTrue(self._compare_entity_response(action_response, event_response)) - - def test_avg_filtering_non_number_resiliency(self): - sign_up_action, person = self._create_events() - Person.objects.create(team=self.team, distinct_ids=["someone_else"]) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 2}) - Event.objects.create( - team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": "x"} - ) - Event.objects.create( - team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": None} - ) - Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 8}) - action_response = self.client.get( - "/api/action/trends/", - data={"actions": jdumps([{"id": sign_up_action.id, "math": "avg", "math_property": "some_number"}]),}, - ).json() - event_response = self.client.get( - "/api/action/trends/", - data={"events": jdumps([{"id": "sign up", "math": "avg", "math_property": "some_number"}]),}, - ).json() - self.assertEqual(action_response[0]["data"][-1], 5) - self.assertTrue(self._compare_entity_response(action_response, event_response)) - def test_people_endpoint(self): sign_up_action, person = self._create_events() person1 = Person.objects.create(team=self.team, distinct_ids=["person1"]) @@ -961,38 +518,13 @@ def _create_multiple_people(self): ) return (person1, person2, person3, person4) - def test_stickiness(self): + def test_stickiness_people_endpoint(self): person1 = self._create_multiple_people()[0] watched_movie = Action.objects.create(team=self.team) ActionStep.objects.create(action=watched_movie, event="watched movie") watched_movie.calculate_events() - with freeze_time("2020-01-08T13:01:01Z"): - action_response = self.client.get( - "/api/action/trends/", data={"shown_as": "Stickiness", "actions": jdumps([{"id": watched_movie.id}]),}, - ).json() - event_response = self.client.get( - "/api/action/trends/", - data={ - "shown_as": "Stickiness", - "date_from": "2020-01-01", - "date_to": "2020-01-08", - "events": jdumps([{"id": "watched movie"}]), - }, - ).json() - self.assertEqual(action_response[0]["count"], 4) - self.assertEqual(action_response[0]["labels"][0], "1 day") - self.assertEqual(action_response[0]["data"][0], 2) - self.assertEqual(action_response[0]["labels"][1], "2 days") - self.assertEqual(action_response[0]["data"][1], 1) - self.assertEqual(action_response[0]["labels"][2], "3 days") - self.assertEqual(action_response[0]["data"][2], 1) - self.assertEqual(action_response[0]["labels"][6], "7 days") - self.assertEqual(action_response[0]["data"][6], 0) - - self.assertTrue(self._compare_entity_response(action_response, event_response)) - # test people action_response = self.client.get( "/api/action/people/", @@ -1033,7 +565,7 @@ def test_stickiness(self): self.assertEqual(len(response[0]["data"]), 7) - def test_breakdown_by_cohort(self): + def test_breakdown_by_cohort_people_endpoint(self): person1, person2, person3, person4 = self._create_multiple_people() cohort = Cohort.objects.create(name="cohort1", team=self.team, groups=[{"properties": {"name": "person1"}}]) cohort2 = Cohort.objects.create(name="cohort2", team=self.team, groups=[{"properties": {"name": "person2"}}]) @@ -1049,41 +581,6 @@ def test_breakdown_by_cohort(self): ActionStep.objects.create(action=action, event="watched movie") action.calculate_events() - with freeze_time("2020-01-04T13:01:01Z"): - event_response = self.client.get( - "/api/action/trends/?date_from=-14d&breakdown=%s&breakdown_type=cohort&events=%s" - % ( - jdumps([cohort.pk, cohort2.pk, cohort3.pk, "all"]), - jdumps([{"id": "watched movie", "name": "watched movie", "type": "events", "order": 0,}]), - ) - ).json() - action_response = self.client.get( - "/api/action/trends/?date_from=-14d&breakdown=%s&breakdown_type=cohort&actions=%s" - % ( - jdumps([cohort.pk, cohort2.pk, cohort3.pk, "all"]), - jdumps([{"id": action.pk, "type": "actions", "order": 0}]), - ) - ).json() - - self.assertEqual(event_response[0]["label"], "watched movie - cohort1") - self.assertEqual(event_response[1]["label"], "watched movie - cohort2") - self.assertEqual(event_response[2]["label"], "watched movie - cohort3") - self.assertEqual(event_response[3]["label"], "watched movie - all users") - - self.assertEqual(sum(event_response[0]["data"]), 1) - self.assertEqual(event_response[0]["breakdown_value"], cohort.pk) - - self.assertEqual(sum(event_response[1]["data"]), 3) - self.assertEqual(event_response[1]["breakdown_value"], cohort2.pk) - - self.assertEqual(sum(event_response[2]["data"]), 4) - self.assertEqual(event_response[2]["breakdown_value"], cohort3.pk) - - self.assertEqual(sum(event_response[3]["data"]), 7) - self.assertEqual(event_response[3]["breakdown_value"], "all") - - self.assertTrue(self._compare_entity_response(event_response, action_response,)) - people = self.client.get( "/api/action/people/", data={ @@ -1115,36 +612,12 @@ def test_breakdown_by_cohort(self): self.assertEqual(len(people["results"][0]["people"]), 4) self.assertEqual(people["results"][0]["people"][0]["id"], person1.pk) - def test_breakdown_by_person_property(self): + def test_breakdown_by_person_property_people_endpoint(self): person1, person2, person3, person4 = self._create_multiple_people() action = Action.objects.create(name="watched movie", team=self.team) ActionStep.objects.create(action=action, event="watched movie") action.calculate_events() - with freeze_time("2020-01-04T13:01:01Z"): - event_response = self.client.get( - "/api/action/trends/?date_from=-14d&breakdown=%s&breakdown_type=person&events=%s" - % ("name", jdumps([{"id": "watched movie", "name": "watched movie", "type": "events", "order": 0,}]),) - ).json() - action_response = self.client.get( - "/api/action/trends/?date_from=-14d&breakdown=%s&breakdown_type=person&actions=%s" - % ("name", jdumps([{"id": action.pk, "type": "actions", "order": 0}]),) - ).json() - - self.assertEqual(event_response[0]["count"], 3) - self.assertEqual(event_response[0]["breakdown_value"], "person2") - - self.assertEqual(event_response[1]["count"], 1) - self.assertEqual(event_response[1]["breakdown_value"], "person1") - - self.assertEqual(event_response[2]["count"], 3) - self.assertEqual(event_response[2]["breakdown_value"], "person3") - - self.assertEqual(event_response[3]["count"], 0) - self.assertEqual(event_response[3]["breakdown_value"], "person4") - - self.assertTrue(self._compare_entity_response(event_response, action_response,)) - people = self.client.get( "/api/action/people/", data={ @@ -1159,137 +632,3 @@ def test_breakdown_by_person_property(self): ).json() self.assertEqual(len(people["results"][0]["people"]), 1) self.assertEqual(people["results"][0]["people"][0]["name"], "person3") - - -class TestRetention(TransactionBaseTest): - def test_retention(self): - person1 = Person.objects.create(team=self.team, distinct_ids=["person1", "alias1"]) - person2 = Person.objects.create(team=self.team, distinct_ids=["person2"]) - - self._create_pageviews( - [ - ("person1", self._date(0)), - ("person1", self._date(1)), - ("person1", self._date(2)), - ("person1", self._date(5)), - ("alias1", self._date(5, 9)), - ("person1", self._date(6)), - ("person2", self._date(1)), - ("person2", self._date(2)), - ("person2", self._date(3)), - ("person2", self._date(6)), - ] - ) - - result = calculate_retention(Filter(data={"date_from": self._date(0, hour=0)}), self.team, total_days=7) - - self.assertEqual(len(result["data"]), 7) - self.assertEqual( - self.pluck(result["data"], "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], - ) - self.assertEqual(result["data"][0]["date"], "Wed. 10 June") - - self.assertEqual( - self.pluck(result["data"], "values", "count"), - [[1, 1, 1, 0, 0, 1, 1], [2, 2, 1, 0, 1, 2], [2, 1, 0, 1, 2], [1, 0, 0, 1], [0, 0, 0], [1, 1], [2],], - ) - - def test_retention_with_properties(self): - person1 = Person.objects.create( - team=self.team, distinct_ids=["person1", "alias1"], properties={"email": "person1@test.com"} - ) - person2 = Person.objects.create( - team=self.team, distinct_ids=["person2"], properties={"email": "person2@test.com"} - ) - - self._create_pageviews( - [ - ("person1", self._date(0)), - ("person1", self._date(1)), - ("person1", self._date(2)), - ("person1", self._date(5)), - ("alias1", self._date(5, 9)), - ("person1", self._date(6)), - ("person2", self._date(1)), - ("person2", self._date(2)), - ("person2", self._date(3)), - ("person2", self._date(6)), - ] - ) - - result = calculate_retention( - Filter( - data={ - "properties": [{"key": "email", "value": "person1@test.com", "type": "person"}], - "date_from": self._date(0, hour=0), - } - ), - self.team, - total_days=7, - ) - - self.assertEqual(len(result["data"]), 7) - self.assertEqual( - self.pluck(result["data"], "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], - ) - self.assertEqual(result["data"][0]["date"], "Wed. 10 June") - self.assertEqual( - self.pluck(result["data"], "values", "count"), - [[1, 1, 1, 0, 0, 1, 1], [1, 1, 0, 0, 1, 1], [1, 0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0], [1, 1], [1]], - ) - - def test_retention_action_start_point(self): - person1 = Person.objects.create(team=self.team, distinct_ids=["person1", "alias1"]) - person2 = Person.objects.create(team=self.team, distinct_ids=["person2"]) - - action = self._create_signup_actions( - [ - ("person1", self._date(0)), - ("person1", self._date(1)), - ("person1", self._date(2)), - ("person1", self._date(5)), - ("alias1", self._date(5, 9)), - ("person1", self._date(6)), - ("person2", self._date(1)), - ("person2", self._date(2)), - ("person2", self._date(3)), - ("person2", self._date(6)), - ] - ) - - start_entity = Entity({"id": action.pk, "type": TREND_FILTER_TYPE_ACTIONS}) - result = calculate_retention( - Filter(data={"date_from": self._date(0, hour=0)}), self.team, start_entity=start_entity, total_days=7 - ) - - self.assertEqual(len(result["data"]), 7) - self.assertEqual( - self.pluck(result["data"], "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], - ) - self.assertEqual(result["data"][0]["date"], "Wed. 10 June") - - self.assertEqual( - self.pluck(result["data"], "values", "count"), - [[1, 1, 1, 0, 0, 1, 1], [2, 2, 1, 0, 1, 2], [2, 1, 0, 1, 2], [1, 0, 0, 1], [0, 0, 0], [1, 1], [2],], - ) - - def _create_pageviews(self, user_and_timestamps): - for distinct_id, timestamp in user_and_timestamps: - Event.objects.create( - team=self.team, event="$pageview", distinct_id=distinct_id, timestamp=timestamp, - ) - - def _create_signup_actions(self, user_and_timestamps): - sign_up_action = Action.objects.create(team=self.team, name="sign up") - ActionStep.objects.create(action=sign_up_action, event="sign up") - for distinct_id, timestamp in user_and_timestamps: - Event.objects.create( - team=self.team, event="sign up", distinct_id=distinct_id, timestamp=timestamp, - ) - return sign_up_action - - def _date(self, day, hour=5): - return datetime(2020, 6, 10 + day, hour).isoformat() - - def pluck(self, list_of_dicts, key, child_key=None): - return [self.pluck(d[key], child_key) if child_key else d[key] for d in list_of_dicts] diff --git a/posthog/api/test/test_dashboard.py b/posthog/api/test/test_dashboard.py index 53cdf71c5cc23..7ce8dd6fec6cc 100644 --- a/posthog/api/test/test_dashboard.py +++ b/posthog/api/test/test_dashboard.py @@ -4,7 +4,6 @@ from django.utils.timezone import now from freezegun import freeze_time -from posthog.api.action import calculate_trends from posthog.decorators import TRENDS_ENDPOINT from posthog.models import Dashboard, DashboardItem, Filter diff --git a/posthog/api/test/test_team.py b/posthog/api/test/test_team.py index 579d0ebc6cc41..d30aaf401e433 100644 --- a/posthog/api/test/test_team.py +++ b/posthog/api/test/test_team.py @@ -1,9 +1,12 @@ -from typing import List, Dict +import random +from typing import Dict, List + from django.db.models import Q from rest_framework import status + +from posthog.models import Team, User + from .base import BaseTest -from posthog.models import User, Team -import random class TestTeamUser(BaseTest): diff --git a/posthog/queries/__init__.py b/posthog/queries/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/posthog/queries/base.py b/posthog/queries/base.py new file mode 100644 index 0000000000000..a0fc7fd5216e8 --- /dev/null +++ b/posthog/queries/base.py @@ -0,0 +1,112 @@ +import copy +from typing import Any, Callable, Dict, List, Optional + +from dateutil.relativedelta import relativedelta +from django.db.models import Q, QuerySet + +from posthog.constants import TREND_FILTER_TYPE_ACTIONS, TREND_FILTER_TYPE_EVENTS, TRENDS_CUMULATIVE, TRENDS_STICKINESS +from posthog.models import Entity, Event, Filter, Team +from posthog.utils import get_compare_period_dates + +""" +process_entity_for_events takes in an Entity and team_id, and returns an Event QuerySet that's correctly filtered +""" + + +def process_entity_for_events(entity: Entity, team_id: int, order_by="-id") -> QuerySet: + if entity.type == TREND_FILTER_TYPE_ACTIONS: + events = Event.objects.filter(action__pk=entity.id).add_person_id(team_id) + if order_by: + events = events.order_by(order_by) + return events + elif entity.type == TREND_FILTER_TYPE_EVENTS: + return Event.objects.filter_by_event_with_people(event=entity.id, team_id=team_id, order_by=order_by) + return QuerySet() + + +def _determine_compared_filter(filter: Filter) -> Filter: + if not filter.date_to or not filter.date_from: + raise ValueError("You need date_from and date_to to compare") + date_from, date_to = get_compare_period_dates(filter.date_from, filter.date_to) + compared_filter = copy.deepcopy(filter) + compared_filter._date_from = date_from.date().isoformat() + compared_filter._date_to = date_to.date().isoformat() + return compared_filter + + +def _convert_to_comparison(trend_entity: List[Dict[str, Any]], filter: Filter, label: str) -> List[Dict[str, Any]]: + for entity in trend_entity: + days = [i for i in range(len(entity["days"]))] + labels = [ + "{} {}".format(filter.interval if filter.interval is not None else "day", i) + for i in range(len(entity["labels"])) + ] + entity.update( + {"labels": labels, "days": days, "label": label, "dates": entity["days"], "compare": True,} + ) + return trend_entity + + +""" + handle_compare takes an Entity, Filter and a callable. + It'll automatically create a new entity with the 'current' and 'previous' labels and automatically pick the right date_from and date_to filters . + It will then call func(entity, filter, team_id). +""" + + +def handle_compare(entity: Entity, filter: Filter, func: Callable, team_id: int) -> List: + entities_list = [] + trend_entity = func(entity=entity, filter=filter, team_id=team_id) + if filter.compare: + trend_entity = _convert_to_comparison(trend_entity, filter, "{} - {}".format(entity.name, "current")) + entities_list.extend(trend_entity) + + compared_filter = _determine_compared_filter(filter) + compared_trend_entity = func(entity=entity, filter=compared_filter, team_id=team_id) + + compared_trend_entity = _convert_to_comparison( + compared_trend_entity, compared_filter, "{} - {}".format(entity.name, "previous"), + ) + entities_list.extend(compared_trend_entity) + else: + entities_list.extend(trend_entity) + return entities_list + + +""" +filter_events takes team_id, filter, entity and generates a Q objects that you can use to filter a QuerySet +""" + + +def filter_events(team_id: int, filter: Filter, entity: Optional[Entity] = None) -> Q: + filters = Q() + if filter.date_from: + filters &= Q(timestamp__gte=filter.date_from) + if filter.date_to: + relativity = relativedelta(days=1) + if filter.interval == "hour": + relativity = relativedelta(hours=1) + elif filter.interval == "minute": + relativity = relativedelta(minutes=1) + elif filter.interval == "week": + relativity = relativedelta(weeks=1) + elif filter.interval == "month": + relativity = relativedelta(months=1) - relativity # go to last day of month instead of first of next + filters &= Q(timestamp__lte=filter.date_to + relativity) + if filter.properties: + filters &= filter.properties_to_Q(team_id=team_id) + if entity and entity.properties: + filters &= entity.properties_to_Q(team_id=team_id) + return filters + + +class BaseQuery: + """ + Run needs to be implemented in the individual Query class. It takes in a Filter, Team + and optionally other arguments within kwargs (though use sparingly!) + + The output is a List comprised of Dicts. What those dicts looks like depend on the needs of the frontend. + """ + + def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: + raise NotImplementedError("You need to implement run") diff --git a/posthog/queries/retention.py b/posthog/queries/retention.py new file mode 100644 index 0000000000000..30e1d3e2d73e5 --- /dev/null +++ b/posthog/queries/retention.py @@ -0,0 +1,35 @@ +import datetime +from datetime import timedelta +from typing import Any, Dict, List, Optional + +from posthog.models import Entity, Event, Filter, Team +from posthog.queries.base import BaseQuery + + +class Retention(BaseQuery): + def calculate_retention(self, filter: Filter, team: Team, start_entity: Optional[Entity] = None, total_days=11): + date_from: datetime.datetime = filter.date_from # type: ignore + filter._date_to = (date_from + timedelta(days=total_days)).isoformat() + labels_format = "%a. %-d %B" + resultset = Event.objects.query_retention(filter, team, start_entity=start_entity) + + result = [ + { + "values": [ + resultset.get((first_day, day), {"count": 0, "people": []}) for day in range(total_days - first_day) + ], + "label": "Day {}".format(first_day), + "date": (date_from + timedelta(days=first_day)).strftime(labels_format), + } + for first_day in range(total_days) + ] + + return result + + def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: + return self.calculate_retention( + filter=filter, + team=team, + start_entity=filter.entities[0] if len(filter.entities) > 0 else None, + total_days=kwargs.get("total_days", 11), + ) diff --git a/posthog/queries/stickiness.py b/posthog/queries/stickiness.py new file mode 100644 index 0000000000000..dba87d8703e30 --- /dev/null +++ b/posthog/queries/stickiness.py @@ -0,0 +1,94 @@ +from typing import List, Dict, Any +from .base import filter_events, handle_compare, process_entity_for_events, BaseQuery +from posthog.models import Entity, Filter, Team, Event, Action +from posthog.constants import TREND_FILTER_TYPE_ACTIONS +from django.db.models import QuerySet, Count, functions +from django.utils.timezone import now +from django.db import connection +import copy + + +def execute_custom_sql(query, params): + cursor = connection.cursor() + cursor.execute(query, params) + return cursor.fetchall() + + +class Stickiness(BaseQuery): + def _serialize_entity(self, entity: Entity, filter: Filter, team_id: int) -> List[Dict[str, Any]]: + if filter.interval is None: + filter.interval = "day" + + serialized: Dict[str, Any] = { + "action": entity.to_dict(), + "label": entity.name, + "count": 0, + "data": [], + "labels": [], + "days": [], + } + response = [] + events = process_entity_for_events(entity=entity, team_id=team_id, order_by=None,) + events = events.filter(filter_events(team_id, filter, entity)) + new_dict = copy.deepcopy(serialized) + new_dict.update(self.stickiness(filtered_events=events, entity=entity, filter=filter, team_id=team_id)) + response.append(new_dict) + return response + + def stickiness(self, filtered_events: QuerySet, entity: Entity, filter: Filter, team_id: int) -> Dict[str, Any]: + if not filter.date_to or not filter.date_from: + raise ValueError("_stickiness needs date_to and date_from set") + range_days = (filter.date_to - filter.date_from).days + 2 + + events = ( + filtered_events.filter(filter_events(team_id, filter, entity)) + .values("person_id") + .annotate(day_count=Count(functions.TruncDay("timestamp"), distinct=True)) + .filter(day_count__lte=range_days) + ) + + events_sql, events_sql_params = events.query.sql_with_params() + aggregated_query = "select count(v.person_id), v.day_count from ({}) as v group by v.day_count".format( + events_sql + ) + aggregated_counts = execute_custom_sql(aggregated_query, events_sql_params) + + response: Dict[int, int] = {} + for result in aggregated_counts: + response[result[1]] = result[0] + + labels = [] + data = [] + + for day in range(1, range_days): + label = "{} day{}".format(day, "s" if day > 1 else "") + labels.append(label) + data.append(response[day] if day in response else 0) + + return { + "labels": labels, + "days": [day for day in range(1, range_days)], + "data": data, + "count": sum(data), + } + + def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: + response = [] + + if not filter.date_from: + filter._date_from = ( + Event.objects.filter(team_id=team.pk) + .order_by("timestamp")[0] + .timestamp.replace(hour=0, minute=0, second=0, microsecond=0) + .isoformat() + ) + if not filter.date_to: + filter._date_to = now().isoformat() + + for entity in filter.entities: + if entity.type == TREND_FILTER_TYPE_ACTIONS: + entity.name = Action.objects.only("name").get(team=team, pk=entity.id).name + + entity_resp = handle_compare(entity=entity, filter=filter, func=self._serialize_entity, team_id=team.pk) + response.extend(entity_resp) + return response diff --git a/posthog/queries/test/__init__.py b/posthog/queries/test/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/posthog/queries/test/test_retention.py b/posthog/queries/test/test_retention.py new file mode 100644 index 0000000000000..083e738330d57 --- /dev/null +++ b/posthog/queries/test/test_retention.py @@ -0,0 +1,157 @@ +from datetime import datetime + +import pytz +from freezegun import freeze_time + +from posthog.api.test.base import BaseTest +from posthog.constants import TREND_FILTER_TYPE_ACTIONS +from posthog.models import Action, ActionStep, Entity, Event, Filter, Person, Team +from posthog.queries.retention import Retention +from posthog.queries.stickiness import Stickiness + + +class TestRetention(BaseTest): + def test_retention(self): + person1 = Person.objects.create(team=self.team, distinct_ids=["person1", "alias1"]) + person2 = Person.objects.create(team=self.team, distinct_ids=["person2"]) + + self._create_pageviews( + [ + ("person1", self._date(0)), + ("person1", self._date(1)), + ("person1", self._date(2)), + ("person1", self._date(5)), + ("alias1", self._date(5, 9)), + ("person1", self._date(6)), + ("person2", self._date(1)), + ("person2", self._date(2)), + ("person2", self._date(3)), + ("person2", self._date(6)), + ] + ) + + result = Retention().run(Filter(data={"date_from": self._date(0, hour=0)}), self.team) + + self.assertEqual(len(result), 11) + self.assertEqual( + self.pluck(result, "label"), + ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6", "Day 7", "Day 8", "Day 9", "Day 10"], + ) + self.assertEqual(result[0]["date"], "Wed. 10 June") + + self.assertEqual( + self.pluck(result, "values", "count"), + [ + [1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0], + [2, 2, 1, 0, 1, 2, 0, 0, 0, 0], + [2, 1, 0, 1, 2, 0, 0, 0, 0], + [1, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [2, 0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0], + [0, 0], + [0], + ], + ) + + def test_retention_with_properties(self): + person1 = Person.objects.create( + team=self.team, distinct_ids=["person1", "alias1"], properties={"email": "person1@test.com"} + ) + person2 = Person.objects.create( + team=self.team, distinct_ids=["person2"], properties={"email": "person2@test.com"} + ) + + self._create_pageviews( + [ + ("person1", self._date(0)), + ("person1", self._date(1)), + ("person1", self._date(2)), + ("person1", self._date(5)), + ("alias1", self._date(5, 9)), + ("person1", self._date(6)), + ("person2", self._date(1)), + ("person2", self._date(2)), + ("person2", self._date(3)), + ("person2", self._date(6)), + ] + ) + + result = Retention().run( + Filter( + data={ + "properties": [{"key": "email", "value": "person1@test.com", "type": "person"}], + "date_from": self._date(0, hour=0), + } + ), + self.team, + total_days=7, + ) + + self.assertEqual(len(result), 7) + self.assertEqual( + self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], + ) + self.assertEqual(result[0]["date"], "Wed. 10 June") + self.assertEqual( + self.pluck(result, "values", "count"), + [[1, 1, 1, 0, 0, 1, 1], [1, 1, 0, 0, 1, 1], [1, 0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0], [1, 1], [1]], + ) + + def test_retention_action_start_point(self): + person1 = Person.objects.create(team=self.team, distinct_ids=["person1", "alias1"]) + person2 = Person.objects.create(team=self.team, distinct_ids=["person2"]) + + action = self._create_signup_actions( + [ + ("person1", self._date(0)), + ("person1", self._date(1)), + ("person1", self._date(2)), + ("person1", self._date(5)), + ("alias1", self._date(5, 9)), + ("person1", self._date(6)), + ("person2", self._date(1)), + ("person2", self._date(2)), + ("person2", self._date(3)), + ("person2", self._date(6)), + ] + ) + + start_entity = Entity({"id": action.pk, "type": TREND_FILTER_TYPE_ACTIONS}) + result = Retention().run( + Filter(data={"date_from": self._date(0, hour=0), "entities": [start_entity]}), self.team, total_days=7 + ) + + self.assertEqual(len(result), 7) + self.assertEqual( + self.pluck(result, "label"), ["Day 0", "Day 1", "Day 2", "Day 3", "Day 4", "Day 5", "Day 6"], + ) + self.assertEqual(result[0]["date"], "Wed. 10 June") + + self.assertEqual( + self.pluck(result, "values", "count"), + [[1, 1, 1, 0, 0, 1, 1], [2, 2, 1, 0, 1, 2], [2, 1, 0, 1, 2], [1, 0, 0, 1], [0, 0, 0], [1, 1], [2],], + ) + + def _create_pageviews(self, user_and_timestamps): + for distinct_id, timestamp in user_and_timestamps: + Event.objects.create( + team=self.team, event="$pageview", distinct_id=distinct_id, timestamp=timestamp, + ) + + def _create_signup_actions(self, user_and_timestamps): + sign_up_action = Action.objects.create(team=self.team, name="sign up") + ActionStep.objects.create(action=sign_up_action, event="sign up") + for distinct_id, timestamp in user_and_timestamps: + Event.objects.create( + team=self.team, event="sign up", distinct_id=distinct_id, timestamp=timestamp, + ) + return sign_up_action + + def _date(self, day, hour=5): + return datetime(2020, 6, 10 + day, hour, tzinfo=pytz.UTC).isoformat() + + def pluck(self, list_of_dicts, key, child_key=None): + return [self.pluck(d[key], child_key) if child_key else d[key] for d in list_of_dicts] diff --git a/posthog/queries/test/test_stickiness.py b/posthog/queries/test/test_stickiness.py new file mode 100644 index 0000000000000..295e8184c85a6 --- /dev/null +++ b/posthog/queries/test/test_stickiness.py @@ -0,0 +1,86 @@ +from posthog.queries.stickiness import Stickiness +from posthog.api.test.base import BaseTest +from posthog.models import Action, Person, Event, ActionStep, Team, Filter +from freezegun import freeze_time + + +class TestStickiness(BaseTest): + def _create_multiple_people(self): + person1 = Person.objects.create(team=self.team, distinct_ids=["person1"], properties={"name": "person1"}) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person1", timestamp="2020-01-01T12:00:00Z", + ) + + person2 = Person.objects.create(team=self.team, distinct_ids=["person2"], properties={"name": "person2"}) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person2", timestamp="2020-01-01T12:00:00Z", + ) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person2", timestamp="2020-01-02T12:00:00Z", + ) + # same day + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person2", timestamp="2020-01-02T12:00:00Z", + ) + + person3 = Person.objects.create(team=self.team, distinct_ids=["person3"], properties={"name": "person3"}) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person3", timestamp="2020-01-01T12:00:00Z", + ) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person3", timestamp="2020-01-02T12:00:00Z", + ) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person3", timestamp="2020-01-03T12:00:00Z", + ) + + person4 = Person.objects.create(team=self.team, distinct_ids=["person4"], properties={"name": "person4"}) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person4", timestamp="2020-01-05T12:00:00Z", + ) + return (person1, person2, person3, person4) + + def test_stickiness(self): + person1 = self._create_multiple_people()[0] + + with freeze_time("2020-01-08T13:01:01Z"): + filter = Filter( + data={ + "shown_as": "Stickiness", + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "events": [{"id": "watched movie"}], + } + ) + response = Stickiness().run(filter, self.team) + + self.assertEqual(response[0]["count"], 4) + self.assertEqual(response[0]["labels"][0], "1 day") + self.assertEqual(response[0]["data"][0], 2) + self.assertEqual(response[0]["labels"][1], "2 days") + self.assertEqual(response[0]["data"][1], 1) + self.assertEqual(response[0]["labels"][2], "3 days") + self.assertEqual(response[0]["data"][2], 1) + self.assertEqual(response[0]["labels"][6], "7 days") + self.assertEqual(response[0]["data"][6], 0) + + def test_stickiness_action(self): + person1 = self._create_multiple_people()[0] + + watched_movie = Action.objects.create(team=self.team, name="watch movie action") + ActionStep.objects.create(action=watched_movie, event="watched movie") + watched_movie.calculate_events() + + with freeze_time("2020-01-08T13:01:01Z"): + filter = Filter( + data={ + "shown_as": "Stickiness", + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "actions": [{"id": watched_movie.pk}], + } + ) + response = Stickiness().run(filter, self.team) + self.assertEqual(response[0]["label"], "watch movie action") + self.assertEqual(response[0]["count"], 4) + self.assertEqual(response[0]["labels"][0], "1 day") diff --git a/posthog/queries/test/test_trends.py b/posthog/queries/test/test_trends.py new file mode 100644 index 0000000000000..468a2991c8244 --- /dev/null +++ b/posthog/queries/test/test_trends.py @@ -0,0 +1,587 @@ +from freezegun import freeze_time +from posthog.queries.trends import Trends +from posthog.api.test.base import BaseTest +from posthog.models import Action, Person, Event, ActionStep, Team, Filter, Cohort +import json + + +class TestTrends(BaseTest): + def _create_events(self, use_time=False): + no_events = Action.objects.create(team=self.team, name="no events") + ActionStep.objects.create(action=no_events, event="no events") + + sign_up_action = Action.objects.create(team=self.team, name="sign up") + ActionStep.objects.create(action=sign_up_action, event="sign up") + + person = Person.objects.create(team=self.team, distinct_ids=["blabla", "anonymous_id"]) + secondTeam = Team.objects.create(api_token="token123") + + freeze_without_time = ["2019-12-24", "2020-01-01", "2020-01-02"] + freeze_with_time = [ + "2019-12-24 03:45:34", + "2020-01-01 00:06:34", + "2020-01-02 16:34:34", + ] + + freeze_args = freeze_without_time + if use_time: + freeze_args = freeze_with_time + + with freeze_time(freeze_args[0]): + Event.objects.create( + team=self.team, event="sign up", distinct_id="blabla", properties={"$some_property": "value"}, + ) + + with freeze_time(freeze_args[1]): + Event.objects.create( + team=self.team, event="sign up", distinct_id="blabla", properties={"$some_property": "value"}, + ) + Event.objects.create(team=self.team, event="sign up", distinct_id="anonymous_id") + Event.objects.create(team=self.team, event="sign up", distinct_id="blabla") + with freeze_time(freeze_args[2]): + Event.objects.create( + team=self.team, + event="sign up", + distinct_id="blabla", + properties={"$some_property": "other_value", "$some_numerical_prop": 80,}, + ) + Event.objects.create(team=self.team, event="no events", distinct_id="blabla") + + # second team should have no effect + Event.objects.create( + team=secondTeam, event="sign up", distinct_id="blabla", properties={"$some_property": "other_value"}, + ) + return sign_up_action, person + + def _create_breakdown_events(self): + freeze_without_time = ["2020-01-02"] + + sign_up_action = Action.objects.create(team=self.team, name="sign up") + ActionStep.objects.create(action=sign_up_action, event="sign up") + + with freeze_time(freeze_without_time[0]): + for i in range(25): + Event.objects.create( + team=self.team, event="sign up", distinct_id="blabla", properties={"$some_property": i}, + ) + + def _compare_entity_response(self, response1, response2, remove=("action", "label")): + if len(response1): + for attr in remove: + response1[0].pop(attr) + else: + return False + if len(response2): + for attr in remove: + response2[0].pop(attr) + else: + return False + return str(response1[0]) == str(response2[0]) + + def test_trends_per_day(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + # with self.assertNumQueries(16): + response = Trends().run( + Filter(data={"date_from": "-7d", "events": [{"id": "sign up"}, {"id": "no events"}],}), self.team, + ) + self.assertEqual(response[0]["label"], "sign up") + self.assertEqual(response[0]["labels"][4], "Wed. 1 January") + self.assertEqual(response[0]["data"][4], 3.0) + self.assertEqual(response[0]["labels"][5], "Thu. 2 January") + self.assertEqual(response[0]["data"][5], 1.0) + + def test_trends_per_day_48hours(self): + self._create_events() + with freeze_time("2020-01-03T13:00:01Z"): + response = Trends().run( + Filter( + data={"date_from": "-48h", "interval": "day", "events": [{"id": "sign up"}, {"id": "no events"}],} + ), + self.team, + ) + + self.assertEqual(response[0]["data"][1], 1.0) + self.assertEqual(response[0]["labels"][1], "Thu. 2 January") + + def test_trends_per_day_cumulative(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + with self.assertNumQueries(4): + response = Trends().run( + Filter(data={"date_from": "-7d", "display": "ActionsLineGraphCumulative"}), self.team, + ) + + self.assertEqual(response[0]["label"], "sign up") + self.assertEqual(response[0]["labels"][4], "Wed. 1 January") + self.assertEqual(response[0]["data"][4], 3.0) + self.assertEqual(response[0]["labels"][5], "Thu. 2 January") + self.assertEqual(response[0]["data"][5], 4.0) + + def test_trends_compare(self): + self._create_events() + with freeze_time("2020-01-04T13:00:01Z"): + response = Trends().run(Filter(data={"compare": True}), self.team) + + self.assertEqual(response[0]["label"], "sign up - current") + self.assertEqual(response[0]["labels"][4], "day 4") + self.assertEqual(response[0]["data"][4], 3.0) + self.assertEqual(response[0]["labels"][5], "day 5") + self.assertEqual(response[0]["data"][5], 1.0) + + self.assertEqual(response[1]["label"], "sign up - previous") + self.assertEqual(response[1]["labels"][4], "day 4") + self.assertEqual(response[1]["data"][4], 1.0) + self.assertEqual(response[1]["labels"][5], "day 5") + self.assertEqual(response[1]["data"][5], 0.0) + + def test_property_filtering(self): + self._create_events() + with freeze_time("2020-01-04"): + response = Trends().run( + Filter(data={"properties": [{"key": "$some_property", "value": "value"}]}), self.team + ) + self.assertEqual(response[0]["labels"][4], "Wed. 1 January") + self.assertEqual(response[0]["data"][4], 1.0) + self.assertEqual(response[0]["labels"][5], "Thu. 2 January") + self.assertEqual(response[0]["data"][5], 0) + self.assertEqual(response[1]["count"], 0) + + def test_filter_events_by_cohort(self): + person1 = Person.objects.create(team=self.team, distinct_ids=["person_1"], properties={"name": "John"}) + person2 = Person.objects.create(team=self.team, distinct_ids=["person_2"], properties={"name": "Jane"}) + + event1 = Event.objects.create( + event="event_name", team=self.team, distinct_id="person_1", properties={"$browser": "Safari"}, + ) + event2 = Event.objects.create( + event="event_name", team=self.team, distinct_id="person_2", properties={"$browser": "Chrome"}, + ) + event3 = Event.objects.create( + event="event_name", team=self.team, distinct_id="person_2", properties={"$browser": "Safari"}, + ) + + cohort = Cohort.objects.create(team=self.team, groups=[{"properties": {"name": "Jane"}}]) + cohort.calculate_people() + + with self.assertNumQueries(1): + response = Trends().run( + Filter( + data={ + "properties": [{"key": "id", "value": cohort.pk, "type": "cohort"}], + "events": [{"id": "event_name"}], + } + ), + self.team, + ) + self.assertEqual(response[0]["count"], 2) + self.assertEqual(response[0]["data"][-1], 2) + + def test_date_filtering(self): + self._create_events() + with freeze_time("2020-01-02"): + response = Trends().run(Filter(data={"date_from": "2019-12-21"}), self.team) + self.assertEqual(response[0]["labels"][3], "Tue. 24 December") + self.assertEqual(response[0]["data"][3], 1.0) + self.assertEqual(response[0]["data"][12], 1.0) + + def test_interval_filtering(self): + self._create_events(use_time=True) + + # test minute + with freeze_time("2020-01-02"): + response = Trends().run(Filter(data={"date_from": "2020-01-01", "interval": "minute"}), self.team) + self.assertEqual(response[0]["labels"][6], "Wed. 1 January, 00:06") + self.assertEqual(response[0]["data"][6], 3.0) + + # test hour + with freeze_time("2020-01-02"): + response = Trends().run(Filter(data={"date_from": "2019-12-24", "interval": "hour"}), self.team) + self.assertEqual(response[0]["labels"][3], "Tue. 24 December, 03:00") + self.assertEqual(response[0]["data"][3], 1.0) + # 217 - 24 - 1 + self.assertEqual(response[0]["data"][192], 3.0) + + # test week + with freeze_time("2020-01-02"): + response = Trends().run(Filter(data={"date_from": "2019-11-24", "interval": "week"}), self.team) + self.assertEqual(response[0]["labels"][4], "Sun. 22 December") + self.assertEqual(response[0]["data"][4], 1.0) + self.assertEqual(response[0]["labels"][5], "Sun. 29 December") + self.assertEqual(response[0]["data"][5], 4.0) + + # test month + with freeze_time("2020-01-02"): + response = Trends().run(Filter(data={"date_from": "2019-9-24", "interval": "month"}), self.team) + self.assertEqual(response[0]["labels"][2], "Sat. 30 November") + self.assertEqual(response[0]["data"][2], 1.0) + self.assertEqual(response[0]["labels"][3], "Tue. 31 December") + self.assertEqual(response[0]["data"][3], 4.0) + + with freeze_time("2020-01-02 23:30"): + Event.objects.create(team=self.team, event="sign up", distinct_id="blabla") + + # test today + hourly + with freeze_time("2020-01-02T23:31:00Z"): + response = Trends().run(Filter(data={"date_from": "dStart", "interval": "hour"}), self.team) + self.assertEqual(response[0]["labels"][23], "Thu. 2 January, 23:00") + self.assertEqual(response[0]["data"][23], 1.0) + + def test_all_dates_filtering(self): + self._create_events(use_time=True) + # automatically sets first day as first day of any events + with freeze_time("2020-01-04T15:01:01Z"): + response = Trends().run(Filter(data={"date_from": "all"}), self.team) + self.assertEqual(response[0]["labels"][0], "Tue. 24 December") + self.assertEqual(response[0]["data"][0], 1.0) + + # test empty response + with freeze_time("2020-01-04"): + empty = Trends().run( + Filter(data={"date_from": "all", "events": [{"id": "blabla"}, {"id": "sign up"}]}), self.team + ) + self.assertEqual(empty[0]["data"][0], 0) + + def test_breakdown_filtering(self): + self._create_events() + # test breakdown filtering + with freeze_time("2020-01-04T13:01:01Z"): + response = Trends().run( + Filter( + data={ + "date_from": "-14d", + "breakdown": "$some_property", + "events": [ + {"id": "sign up", "name": "sign up", "type": "events", "order": 0,}, + {"id": "no events"}, + ], + } + ), + self.team, + ) + + self.assertEqual(response[0]["label"], "sign up - Other") + self.assertEqual(response[1]["label"], "sign up - other_value") + self.assertEqual(response[2]["label"], "sign up - value") + self.assertEqual(response[3]["label"], "no events - Other") + + self.assertEqual(sum(response[0]["data"]), 2) + self.assertEqual(response[0]["data"][4 + 7], 2) + self.assertEqual(response[0]["breakdown_value"], "None") + + self.assertEqual(sum(response[1]["data"]), 1) + self.assertEqual(response[1]["data"][5 + 7], 1) + self.assertEqual(response[1]["breakdown_value"], "other_value") + + # check numerical breakdown + with freeze_time("2020-01-04T13:01:01Z"): + response = Trends().run( + Filter( + data={ + "date_from": "-14d", + "breakdown": "$some_numerical_prop", + "events": [ + {"id": "sign up", "name": "sign up", "type": "events", "order": 0,}, + {"id": "no events"}, + ], + } + ), + self.team, + ) + self.assertEqual(response[0]["label"], "sign up - Other") + self.assertEqual(response[0]["count"], 4.0) + self.assertEqual(response[1]["label"], "sign up - 80.0") + self.assertEqual(response[1]["count"], 1.0) + + def test_breakdown_filtering_limit(self): + self._create_breakdown_events() + with freeze_time("2020-01-04T13:01:01Z"): + response = Trends().run( + Filter( + data={ + "date_from": "-14d", + "breakdown": "$some_property", + "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0,}], + } + ), + self.team, + ) + self.assertEqual(len(response), 20) + + def test_action_filtering(self): + sign_up_action, person = self._create_events() + with freeze_time("2020-01-04"): + action_response = Trends().run(Filter(data={"actions": [{"id": sign_up_action.id}]}), self.team) + event_response = Trends().run(Filter(data={"events": [{"id": "sign up"}]}), self.team) + self.assertEqual(len(action_response), 1) + + self.assertTrue(self._compare_entity_response(action_response, event_response)) + + def test_trends_for_non_existing_action(self): + with freeze_time("2020-01-04"): + response = Trends().run(Filter(data={"actions": [{"id": 50000000}]}), self.team) + self.assertEqual(len(response), 0) + + with freeze_time("2020-01-04"): + response = Trends().run(Filter(data={"events": [{"id": "DNE"}]}), self.team) + self.assertEqual(response[0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + + def test_dau_filtering(self): + sign_up_action, person = self._create_events() + with freeze_time("2020-01-02"): + Person.objects.create(team=self.team, distinct_ids=["someone_else"]) + Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else") + with freeze_time("2020-01-04"): + action_response = Trends().run( + Filter(data={"actions": [{"id": sign_up_action.id, "math": "dau"}]}), self.team + ) + response = Trends().run(Filter(data={"events": [{"id": "sign up", "math": "dau"}]}), self.team) + + self.assertEqual(response[0]["data"][4], 1) + self.assertEqual(response[0]["data"][5], 2) + + self.assertTrue(self._compare_entity_response(action_response, response)) + + def test_dau_with_breakdown_filtering(self): + sign_up_action, _ = self._create_events() + with freeze_time("2020-01-02"): + Event.objects.create( + team=self.team, event="sign up", distinct_id="blabla", properties={"$some_property": "other_value"}, + ) + with freeze_time("2020-01-04"): + action_response = Trends().run( + Filter(data={"breakdown": "$some_property", "actions": [{"id": sign_up_action.id, "math": "dau"}]}), + self.team, + ) + event_response = Trends().run( + Filter(data={"breakdown": "$some_property", "events": [{"id": "sign up", "math": "dau"}]}), self.team + ) + + self.assertEqual(event_response[0]["label"], "sign up - other_value") + self.assertEqual(event_response[1]["label"], "sign up - value") + self.assertEqual(event_response[2]["label"], "sign up - Other") + + self.assertEqual(sum(event_response[0]["data"]), 1) + self.assertEqual(event_response[0]["data"][5], 1) + + self.assertEqual(sum(event_response[2]["data"]), 1) + self.assertEqual(event_response[2]["data"][4], 1) # property not defined + + self.assertTrue(self._compare_entity_response(action_response, event_response)) + + def _create_maths_events(self): + sign_up_action, person = self._create_events() + Person.objects.create(team=self.team, distinct_ids=["someone_else"]) + Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 2}) + Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 3}) + Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 5}) + Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 8}) + return sign_up_action + + def test_sum_filtering(self): + sign_up_action = self._create_maths_events() + + action_response = Trends().run( + Filter(data={"actions": [{"id": sign_up_action.id, "math": "sum", "math_property": "some_number"}]}), + self.team, + ) + event_response = Trends().run( + Filter(data={"events": [{"id": "sign up", "math": "sum", "math_property": "some_number"}]}), self.team + ) + self.assertEqual(action_response[0]["data"][-1], 18) + self.assertTrue(self._compare_entity_response(action_response, event_response)) + + def test_avg_filtering(self): + sign_up_action = self._create_maths_events() + + action_response = Trends().run( + Filter(data={"actions": [{"id": sign_up_action.id, "math": "avg", "math_property": "some_number"}]}), + self.team, + ) + event_response = Trends().run( + Filter(data={"events": [{"id": "sign up", "math": "avg", "math_property": "some_number"}]}), self.team + ) + self.assertEqual(action_response[0]["data"][-1], 4.5) + self.assertTrue(self._compare_entity_response(action_response, event_response)) + + def test_min_filtering(self): + sign_up_action = self._create_maths_events() + action_response = Trends().run( + Filter(data={"actions": [{"id": sign_up_action.id, "math": "min", "math_property": "some_number"}]}), + self.team, + ) + event_response = Trends().run( + Filter(data={"events": [{"id": "sign up", "math": "min", "math_property": "some_number"}]}), self.team + ) + self.assertEqual(action_response[0]["data"][-1], 2) + self.assertTrue(self._compare_entity_response(action_response, event_response)) + + def test_max_filtering(self): + sign_up_action = self._create_maths_events() + action_response = Trends().run( + Filter(data={"actions": [{"id": sign_up_action.id, "math": "max", "math_property": "some_number"}]}), + self.team, + ) + event_response = Trends().run( + Filter(data={"events": [{"id": "sign up", "math": "max", "math_property": "some_number"}]}), self.team + ) + self.assertEqual(action_response[0]["data"][-1], 8) + self.assertTrue(self._compare_entity_response(action_response, event_response)) + + def test_avg_filtering_non_number_resiliency(self): + sign_up_action, person = self._create_events() + Person.objects.create(team=self.team, distinct_ids=["someone_else"]) + Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 2}) + Event.objects.create( + team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": "x"} + ) + Event.objects.create( + team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": None} + ) + Event.objects.create(team=self.team, event="sign up", distinct_id="someone_else", properties={"some_number": 8}) + action_response = Trends().run( + Filter(data={"actions": [{"id": sign_up_action.id, "math": "avg", "math_property": "some_number"}]}), + self.team, + ) + event_response = Trends().run( + Filter(data={"events": [{"id": "sign up", "math": "avg", "math_property": "some_number"}]}), self.team + ) + self.assertEqual(action_response[0]["data"][-1], 5) + self.assertTrue(self._compare_entity_response(action_response, event_response)) + + def _create_multiple_people(self): + person1 = Person.objects.create(team=self.team, distinct_ids=["person1"], properties={"name": "person1"}) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person1", timestamp="2020-01-01T12:00:00Z", + ) + + person2 = Person.objects.create(team=self.team, distinct_ids=["person2"], properties={"name": "person2"}) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person2", timestamp="2020-01-01T12:00:00Z", + ) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person2", timestamp="2020-01-02T12:00:00Z", + ) + # same day + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person2", timestamp="2020-01-02T12:00:00Z", + ) + + person3 = Person.objects.create(team=self.team, distinct_ids=["person3"], properties={"name": "person3"}) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person3", timestamp="2020-01-01T12:00:00Z", + ) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person3", timestamp="2020-01-02T12:00:00Z", + ) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person3", timestamp="2020-01-03T12:00:00Z", + ) + + person4 = Person.objects.create(team=self.team, distinct_ids=["person4"], properties={"name": "person4"}) + Event.objects.create( + team=self.team, event="watched movie", distinct_id="person4", timestamp="2020-01-05T12:00:00Z", + ) + return (person1, person2, person3, person4) + + def test_breakdown_by_cohort(self): + person1, person2, person3, person4 = self._create_multiple_people() + cohort = Cohort.objects.create(name="cohort1", team=self.team, groups=[{"properties": {"name": "person1"}}]) + cohort2 = Cohort.objects.create(name="cohort2", team=self.team, groups=[{"properties": {"name": "person2"}}]) + cohort3 = Cohort.objects.create( + name="cohort3", + team=self.team, + groups=[{"properties": {"name": "person1"}}, {"properties": {"name": "person2"}},], + ) + cohort.calculate_people() + cohort2.calculate_people() + cohort3.calculate_people() + action = Action.objects.create(name="watched movie", team=self.team) + ActionStep.objects.create(action=action, event="watched movie") + action.calculate_events() + + with freeze_time("2020-01-04T13:01:01Z"): + action_response = Trends().run( + Filter( + data={ + "date_from": "-14d", + "breakdown": json.dumps([cohort.pk, cohort2.pk, cohort3.pk, "all"]), + "breakdown_type": "cohort", + "actions": [{"id": action.pk, "type": "actions", "order": 0}], + } + ), + self.team, + ) + event_response = Trends().run( + Filter( + data={ + "date_from": "-14d", + "breakdown": json.dumps([cohort.pk, cohort2.pk, cohort3.pk, "all"]), + "breakdown_type": "cohort", + "events": [{"id": "watched movie", "name": "watched movie", "type": "events", "order": 0,}], + } + ), + self.team, + ) + + self.assertEqual(event_response[1]["label"], "watched movie - cohort2") + self.assertEqual(event_response[2]["label"], "watched movie - cohort3") + self.assertEqual(event_response[3]["label"], "watched movie - all users") + + self.assertEqual(sum(event_response[0]["data"]), 1) + self.assertEqual(event_response[0]["breakdown_value"], cohort.pk) + + self.assertEqual(sum(event_response[1]["data"]), 3) + self.assertEqual(event_response[1]["breakdown_value"], cohort2.pk) + + self.assertEqual(sum(event_response[2]["data"]), 4) + self.assertEqual(event_response[2]["breakdown_value"], cohort3.pk) + + self.assertEqual(sum(event_response[3]["data"]), 7) + self.assertEqual(event_response[3]["breakdown_value"], "all") + + self.assertTrue(self._compare_entity_response(event_response, action_response,)) + + def test_breakdown_by_person_property(self): + person1, person2, person3, person4 = self._create_multiple_people() + action = Action.objects.create(name="watched movie", team=self.team) + ActionStep.objects.create(action=action, event="watched movie") + action.calculate_events() + + with freeze_time("2020-01-04T13:01:01Z"): + action_response = Trends().run( + Filter( + data={ + "date_from": "-14d", + "breakdown": "name", + "breakdown_type": "person", + "actions": [{"id": action.pk, "type": "actions", "order": 0}], + } + ), + self.team, + ) + event_response = Trends().run( + Filter( + data={ + "date_from": "-14d", + "breakdown": "name", + "breakdown_type": "person", + "events": [{"id": "watched movie", "name": "watched movie", "type": "events", "order": 0,}], + } + ), + self.team, + ) + + self.assertEqual(event_response[0]["count"], 3) + self.assertEqual(event_response[0]["breakdown_value"], "person2") + + self.assertEqual(event_response[1]["count"], 1) + self.assertEqual(event_response[1]["breakdown_value"], "person1") + + self.assertEqual(event_response[2]["count"], 3) + self.assertEqual(event_response[2]["breakdown_value"], "person3") + + self.assertEqual(event_response[3]["count"], 0) + self.assertEqual(event_response[3]["breakdown_value"], "person4") + + self.assertTrue(self._compare_entity_response(event_response, action_response,)) diff --git a/posthog/queries/trends.py b/posthog/queries/trends.py new file mode 100644 index 0000000000000..e8feedad8f42e --- /dev/null +++ b/posthog/queries/trends.py @@ -0,0 +1,322 @@ +from django.db.models.expressions import Subquery +from .base import filter_events, process_entity_for_events, handle_compare, BaseQuery +from posthog.models import ( + Event, + Team, + Action, + ActionStep, + DashboardItem, + User, + Person, + Filter, + Entity, + Cohort, + CohortPeople, +) +from posthog.utils import ( + append_data, + TemporaryTokenAuthentication, +) +from posthog.constants import ( + TREND_FILTER_TYPE_ACTIONS, + TREND_FILTER_TYPE_EVENTS, + TRENDS_CUMULATIVE, + TRENDS_STICKINESS, +) +from posthog.tasks.calculate_action import calculate_action +from rest_framework import request, serializers, viewsets, authentication +from rest_framework.response import Response +from rest_framework.decorators import action +from django.db.models import ( + Q, + Count, + Sum, + Avg, + Min, + Max, + Prefetch, + functions, + QuerySet, + OuterRef, + Exists, + Value, + BooleanField, + FloatField, +) +from django.db.models.expressions import RawSQL +from django.db.models.functions import Cast +from django.utils.timezone import now +from typing import Any, List, Dict, Optional, Tuple, Union +from datetime import timedelta +import pandas as pd +import datetime +import json +import copy +import numpy as np +from posthog.decorators import cached_function, TRENDS_ENDPOINT + + +FREQ_MAP = {"minute": "60S", "hour": "H", "day": "D", "week": "W", "month": "M"} + + +def build_dataframe(aggregates: QuerySet, interval: str, breakdown: Optional[str] = None) -> pd.DataFrame: + if breakdown == "cohorts": + cohort_keys = [key for key in aggregates[0].keys() if key.startswith("cohort_")] + # Convert queryset with day, count, cohort_88, cohort_99, ... to multiple rows, for example: + # 2020-01-01..., 1, cohort_88 + # 2020-01-01..., 3, cohort_99 + dataframe = pd.melt( + pd.DataFrame(aggregates), id_vars=[interval, "count"], value_vars=cohort_keys, var_name="breakdown", + ).rename(columns={interval: "date"}) + # Filter out false values + dataframe = dataframe[dataframe["value"] == True] + # Sum dates with same cohort + dataframe = dataframe.groupby(["breakdown", "date"], as_index=False).sum() + else: + dataframe = pd.DataFrame( + [ + {"date": a[interval], "count": a["count"], "breakdown": a[breakdown] if breakdown else "Total",} + for a in aggregates + ] + ) + if interval == "week": + dataframe["date"] = dataframe["date"].apply(lambda x: x - pd.offsets.Week(weekday=6)) + elif interval == "month": + dataframe["date"] = dataframe["date"].apply(lambda x: x - pd.offsets.MonthEnd(n=1)) + return dataframe + + +def group_events_to_date( + date_from: Optional[datetime.datetime], + date_to: Optional[datetime.datetime], + aggregates: QuerySet, + interval: str, + breakdown: Optional[str] = None, +) -> Dict[str, Dict[datetime.datetime, int]]: + response = {} + + if interval == "day": + if date_from: + date_from = date_from.replace(hour=0, minute=0, second=0, microsecond=0) + if date_to: + date_to = date_to.replace(hour=0, minute=0, second=0, microsecond=0) + + time_index = pd.date_range(date_from, date_to, freq=FREQ_MAP[interval]) + if len(aggregates) > 0: + dataframe = build_dataframe(aggregates, interval, breakdown) + + # extract top 20 if more than 20 breakdowns + if breakdown and dataframe["breakdown"].nunique() > 20: + counts = ( + dataframe.groupby(["breakdown"])["count"] + .sum() + .reset_index(name="total") + .sort_values(by=["total"], ascending=False)[:20] + ) + top_breakdown = counts["breakdown"].to_list() + dataframe = dataframe[dataframe.breakdown.isin(top_breakdown)] + dataframe = dataframe.astype({"breakdown": str}) + for value in dataframe["breakdown"].unique(): + filtered = ( + dataframe.loc[dataframe["breakdown"] == value] + if value + else dataframe.loc[dataframe["breakdown"].isnull()] + ) + df_dates = pd.DataFrame(filtered.groupby("date").mean(), index=time_index) + df_dates = df_dates.fillna(0) + response[value] = {key: value[0] if len(value) > 0 else 0 for key, value in df_dates.iterrows()} + else: + dataframe = pd.DataFrame([], index=time_index) + dataframe = dataframe.fillna(0) + response["total"] = {key: value[0] if len(value) > 0 else 0 for key, value in dataframe.iterrows()} + + return response + + +def get_interval_annotation(key: str) -> Dict[str, Any]: + map: Dict[str, Any] = { + "minute": functions.TruncMinute("timestamp"), + "hour": functions.TruncHour("timestamp"), + "day": functions.TruncDay("timestamp"), + "week": functions.TruncWeek("timestamp"), + "month": functions.TruncMonth("timestamp"), + } + func = map.get(key) + if func is None: + return {"day": map.get("day")} # default + + return {key: func} + + +def add_cohort_annotations(team_id: int, breakdown: List[Union[int, str]]) -> Dict[str, Union[Value, Exists]]: + cohorts = Cohort.objects.filter(team_id=team_id, pk__in=[b for b in breakdown if b != "all"]) + annotations: Dict[str, Union[Value, Exists]] = {} + for cohort in cohorts: + annotations["cohort_{}".format(cohort.pk)] = Exists( + CohortPeople.objects.filter(cohort=cohort.pk, person_id=OuterRef("person_id")).only("id") + ) + if "all" in breakdown: + annotations["cohort_all"] = Value(True, output_field=BooleanField()) + return annotations + + +def add_person_properties_annotations(team_id: int, breakdown: str) -> Dict[str, Subquery]: + person_properties = Subquery( + Person.objects.filter(team_id=team_id, id=OuterRef("person_id")).values("properties__{}".format(breakdown)) + ) + annotations = {} + annotations["properties__{}".format(breakdown)] = person_properties + return annotations + + +def aggregate_by_interval( + filtered_events: QuerySet, team_id: int, entity: Entity, filter: Filter, breakdown: Optional[str] = None, +) -> Dict[str, Any]: + interval = filter.interval if filter.interval else "day" + interval_annotation = get_interval_annotation(interval) + values = [interval] + if breakdown: + if filter.breakdown_type == "cohort": + cohort_annotations = add_cohort_annotations( + team_id, json.loads(filter.breakdown) if filter.breakdown else [] + ) + values.extend(cohort_annotations.keys()) + filtered_events = filtered_events.annotate(**cohort_annotations) + breakdown = "cohorts" + elif filter.breakdown_type == "person": + person_annotations = add_person_properties_annotations( + team_id, filter.breakdown if filter.breakdown else "" + ) + filtered_events = filtered_events.annotate(**person_annotations) + values.append(breakdown) + else: + values.append(breakdown) + aggregates = filtered_events.annotate(**interval_annotation).values(*values).annotate(count=Count(1)).order_by() + + if breakdown: + aggregates = aggregates.order_by("-count") + + aggregates = process_math(aggregates, entity) + + dates_filled = group_events_to_date( + date_from=filter.date_from, + date_to=filter.date_to, + aggregates=aggregates, + interval=interval, + breakdown=breakdown, + ) + + return dates_filled + + +def process_math(query: QuerySet, entity: Entity) -> QuerySet: + math_to_aggregate_function = {"sum": Sum, "avg": Avg, "min": Min, "max": Max} + if entity.math == "dau": + # In daily active users mode count only up to 1 event per user per day + query = query.annotate(count=Count("person_id", distinct=True)) + elif entity.math in math_to_aggregate_function: + # Run relevant aggregate function on specified event property, casting it to a double + query = query.annotate( + count=math_to_aggregate_function[entity.math]( + Cast(RawSQL('"posthog_event"."properties"->>%s', (entity.math_property,)), output_field=FloatField(),) + ) + ) + # Skip over events where the specified property is not set or not a number + # It may not be ideally clear to the user what events were skipped, + # but in the absence of typing, this is safe, cheap, and frictionless + query = query.extra( + where=['jsonb_typeof("posthog_event"."properties"->%s) = \'number\''], params=[entity.math_property], + ) + return query + + +def breakdown_label(entity: Entity, value: Union[str, int]) -> Dict[str, Optional[Union[str, int]]]: + ret_dict: Dict[str, Optional[Union[str, int]]] = {} + if not value or not isinstance(value, str) or "cohort_" not in value: + ret_dict["label"] = "{} - {}".format( + entity.name, value if value and value != "None" and value != "nan" else "Other", + ) + ret_dict["breakdown_value"] = value if value and not pd.isna(value) else None + else: + if value == "cohort_all": + ret_dict["label"] = "{} - all users".format(entity.name) + ret_dict["breakdown_value"] = "all" + else: + cohort = Cohort.objects.get(pk=value.replace("cohort_", "")) + ret_dict["label"] = "{} - {}".format(entity.name, cohort.name) + ret_dict["breakdown_value"] = cohort.pk + return ret_dict + + +class Trends(BaseQuery): + def _serialize_entity(self, entity: Entity, filter: Filter, team_id: int) -> List[Dict[str, Any]]: + if filter.interval is None: + filter.interval = "day" + + serialized: Dict[str, Any] = { + "action": entity.to_dict(), + "label": entity.name, + "count": 0, + "data": [], + "labels": [], + "days": [], + } + response = [] + events = process_entity_for_events(entity=entity, team_id=team_id, order_by="-timestamp",) + events = events.filter(filter_events(team_id, filter, entity)) + items = aggregate_by_interval( + filtered_events=events, + team_id=team_id, + entity=entity, + filter=filter, + breakdown="properties__{}".format(filter.breakdown) if filter.breakdown else None, + ) + for value, item in items.items(): + new_dict = copy.deepcopy(serialized) + if value != "Total": + new_dict.update(breakdown_label(entity, value)) + new_dict.update(append_data(dates_filled=list(item.items()), interval=filter.interval)) + if filter.display == TRENDS_CUMULATIVE: + new_dict["data"] = np.cumsum(new_dict["data"]) + response.append(new_dict) + + return response + + def calculate_trends(self, filter: Filter, team_id: int) -> List[Dict[str, Any]]: + actions = Action.objects.filter(team_id=team_id).order_by("-id") + if len(filter.actions) > 0: + actions = Action.objects.filter(pk__in=[entity.id for entity in filter.actions], team_id=team_id) + actions = actions.prefetch_related(Prefetch("steps", queryset=ActionStep.objects.order_by("id"))) + entities_list = [] + + if len(filter.entities) == 0: + # If no filters, automatically grab all actions and show those instead + filter.entities = [ + Entity({"id": action.id, "name": action.name, "type": TREND_FILTER_TYPE_ACTIONS,}) for action in actions + ] + + if not filter.date_from: + filter._date_from = ( + Event.objects.filter(team_id=team_id) + .order_by("timestamp")[0] + .timestamp.replace(hour=0, minute=0, second=0, microsecond=0) + .isoformat() + ) + if not filter.date_to: + filter._date_to = now().isoformat() + + for entity in filter.entities: + if entity.type == TREND_FILTER_TYPE_ACTIONS: + try: + db_action = [action for action in actions if action.id == entity.id][0] + entity.name = db_action.name + except IndexError: + continue + entities_list.extend( + handle_compare(entity=entity, filter=filter, func=self._serialize_entity, team_id=team_id) + ) + + return entities_list + + def run(self, filter: Filter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]: + return self.calculate_trends(filter, team.pk) diff --git a/posthog/tasks/update_cache.py b/posthog/tasks/update_cache.py index f13ba9ebda560..0dcaa5c97cdaf 100644 --- a/posthog/tasks/update_cache.py +++ b/posthog/tasks/update_cache.py @@ -9,11 +9,11 @@ from django.db.models import Prefetch, Q from django.utils import timezone -from posthog.api.action import calculate_trends, get_actions from posthog.api.funnel import FunnelSerializer from posthog.celery import app, update_cache_item_task from posthog.decorators import FUNNEL_ENDPOINT, TRENDS_ENDPOINT -from posthog.models import Action, ActionStep, DashboardItem, Entity, Filter, Funnel +from posthog.models import Action, ActionStep, DashboardItem, Entity, Filter, Funnel, Team +from posthog.queries.trends import Trends from posthog.utils import generate_cache_key logger = logging.getLogger(__name__) @@ -64,7 +64,7 @@ def _calculate_trends(filter: Filter, team_id: int) -> List[Dict[str, Any]]: actions = actions.prefetch_related(Prefetch("steps", queryset=ActionStep.objects.order_by("id"))) dashboard_items = DashboardItem.objects.filter(team_id=team_id, filters=filter.to_dict()) dashboard_items.update(refreshing=True) - result = calculate_trends(filter, team_id, actions) + result = Trends().run(filter, Team(pk=team_id)) dashboard_items.update(last_refresh=timezone.now(), refreshing=False) return result diff --git a/tsconfig.json b/tsconfig.json index 3b09546d208f0..091db705ec7c0 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,31 +1,31 @@ { - "compilerOptions": { - "baseUrl": "./frontend", - "paths": { - "lib/*": ["./src/lib/*"], - "scenes/*": ["./src/scenes/*"], - "~/*": ["./src/*"] + "compilerOptions": { + "baseUrl": "./frontend", + "paths": { + "lib/*": ["./src/lib/*"], + "scenes/*": ["./src/scenes/*"], + "~/*": ["./src/*"] + }, + // https://www.sitepoint.com/react-with-typescript-best-practices/ + "allowJs": true, // Allow JavaScript files to be compiled + "skipLibCheck": true, // Skip type checking of all declaration files + "esModuleInterop": true, // Disables namespace imports (import * as fs from "fs") and enables CJS/AMD/UMD style imports (import fs from "fs") + "allowSyntheticDefaultImports": true, // Allow default imports from modules with no default export + "strict": false, // Enable all strict type checking options + "forceConsistentCasingInFileNames": true, // Disallow inconsistently-cased references to the same file. + "module": "esnext", // Specify module code generation + "moduleResolution": "node", // Resolve modules using Node.js style + "resolveJsonModule": true, // Include modules imported with .json extension + "noEmit": true, // Do not emit output (meaning do not compile code, only perform type checking) + "jsx": "react", // Support JSX in .tsx files + "sourceMap": true, // Generate corrresponding .map file + "declaration": true, // Generate corresponding .d.ts file + "noUnusedLocals": true, // Report errors on unused locals + "noUnusedParameters": true, // Report errors on unused parameters + "experimentalDecorators": true, // Enables experimental support for ES decorators + "incremental": true, // Enable incremental compilation by reading/writing information from prior compilations to a file on disk + "noFallthroughCasesInSwitch": true // Report errors for fallthrough cases in switch statement }, - // https://www.sitepoint.com/react-with-typescript-best-practices/ - "allowJs": true, // Allow JavaScript files to be compiled - "skipLibCheck": true, // Skip type checking of all declaration files - "esModuleInterop": true, // Disables namespace imports (import * as fs from "fs") and enables CJS/AMD/UMD style imports (import fs from "fs") - "allowSyntheticDefaultImports": true, // Allow default imports from modules with no default export - "strict": false, // Enable all strict type checking options - "forceConsistentCasingInFileNames": true, // Disallow inconsistently-cased references to the same file. - "module": "esnext", // Specify module code generation - "moduleResolution": "node", // Resolve modules using Node.js style - "resolveJsonModule": true, // Include modules imported with .json extension - "noEmit": true, // Do not emit output (meaning do not compile code, only perform type checking) - "jsx": "react", // Support JSX in .tsx files - "sourceMap": true, // Generate corrresponding .map file - "declaration": true, // Generate corresponding .d.ts file - "noUnusedLocals": true, // Report errors on unused locals - "noUnusedParameters": true, // Report errors on unused parameters - "experimentalDecorators": true, // Enables experimental support for ES decorators - "incremental": true, // Enable incremental compilation by reading/writing information from prior compilations to a file on disk - "noFallthroughCasesInSwitch": true // Report errors for fallthrough cases in switch statement - }, - "include": ["frontend/**/*"], - "exclude": ["node_modules/**/*", "staticfiles/**/*", "frontend/dist/**/*"] + "include": ["frontend/**/*"], + "exclude": ["node_modules/**/*", "staticfiles/**/*", "frontend/dist/**/*"] }