From 9f64611b0a3c9f2e2ce599ec60c1efe9d48fb0ce Mon Sep 17 00:00:00 2001 From: Eric Date: Fri, 22 Jan 2021 15:00:42 +0000 Subject: [PATCH 1/3] add missing logic --- ee/clickhouse/models/cohort.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/ee/clickhouse/models/cohort.py b/ee/clickhouse/models/cohort.py index 16b4cc149080c..64a80a287a138 100644 --- a/ee/clickhouse/models/cohort.py +++ b/ee/clickhouse/models/cohort.py @@ -1,7 +1,9 @@ import uuid -from datetime import datetime +from datetime import datetime, timedelta from typing import Any, Dict, List, Optional, Tuple +from django.utils import timezone + from ee.clickhouse.client import sync_execute from ee.clickhouse.models.action import format_action_filter from ee.clickhouse.sql.cohort import CALCULATE_COHORT_PEOPLE_SQL @@ -30,10 +32,15 @@ def format_person_query(cohort: Cohort) -> Tuple[str, Dict[str, Any]]: if group.get("action_id"): action = Action.objects.get(pk=group["action_id"], team_id=cohort.team.pk) action_filter_query, action_params = format_action_filter(action, prepend="_{}_action".format(group_idx)) - extract_person = "SELECT distinct_id FROM events WHERE team_id = %(team_id)s AND {query}".format( - query=action_filter_query + + date_query, date_params = "", {} + if group.get("days"): + date_query, date_params = parse_action_timestamps(int(group.get("days"))) + + extract_person = "SELECT distinct_id FROM events WHERE team_id = %(team_id)s {date_query} AND {query}".format( + query=action_filter_query, date_query=date_query ) - params = {**params, **action_params} + params = {**params, **action_params, **date_params} filters.append("distinct_id IN (" + extract_person + ")") elif group.get("properties"): @@ -53,6 +60,16 @@ def format_person_query(cohort: Cohort) -> Tuple[str, Dict[str, Any]]: return joined_filter, params +def parse_action_timestamps(days: int) -> Tuple[str, Dict[str, str]]: + curr_time = timezone.now() + start_time = curr_time - timedelta(days=days) + + return ( + "and timestamp >= %(date_from)s AND timestamp <= %(date_to)s", + {"date_from": start_time.strftime("%Y-%m-%d %H:%M:%S"), "date_to": curr_time.strftime("%Y-%m-%d %H:%M:%S")}, + ) + + def format_filter_query(cohort: Cohort) -> Tuple[str, Dict[str, Any]]: person_query, params = format_person_query(cohort) person_id_query = CALCULATE_COHORT_PEOPLE_SQL.format(query=person_query) From 3ca51fe99b8d52a5976b495807cf2726b09b4354 Mon Sep 17 00:00:00 2001 From: Eric Date: Fri, 22 Jan 2021 12:57:47 -0500 Subject: [PATCH 2/3] add test --- ee/clickhouse/models/test/test_cohort.py | 50 ++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/ee/clickhouse/models/test/test_cohort.py b/ee/clickhouse/models/test/test_cohort.py index f6b2c6407f7ac..ecb4c2671e1ff 100644 --- a/ee/clickhouse/models/test/test_cohort.py +++ b/ee/clickhouse/models/test/test_cohort.py @@ -1,6 +1,8 @@ from datetime import datetime from uuid import uuid4 +from freezegun import freeze_time + from ee.clickhouse.client import sync_execute from ee.clickhouse.models.cohort import format_filter_query, get_person_ids_by_cohort_id from ee.clickhouse.models.event import create_event @@ -106,6 +108,54 @@ def test_prop_cohort_basic_action(self): result = sync_execute(final_query, {**params, "team_id": self.team.pk}) self.assertEqual(len(result), 1) + def test_prop_cohort_basic_action_days(self): + + _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"$some_prop": "something"}) + + _create_person( + distinct_ids=["some_id"], + team_id=self.team.pk, + properties={"$some_prop": "something", "$another_prop": "something"}, + ) + + action = _create_action(team=self.team, name="$pageview") + _create_event( + event="$pageview", + team=self.team, + distinct_id="some_id", + properties={"attr": "some_val"}, + timestamp=datetime(2020, 1, 9, 12, 0, 1), + ) + + _create_event( + event="$pageview", + team=self.team, + distinct_id="some_other_id", + properties={"attr": "some_val"}, + timestamp=datetime(2020, 1, 5, 12, 0, 1), + ) + + with freeze_time("2020-01-10"): + cohort1 = Cohort.objects.create( + team=self.team, groups=[{"action_id": action.pk, "days": 1}], name="cohort1", + ) + + filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}],}) + query, params = parse_prop_clauses(filter.properties, self.team.pk) + final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) + result = sync_execute(final_query, {**params, "team_id": self.team.pk}) + self.assertEqual(len(result), 1) + + cohort2 = Cohort.objects.create( + team=self.team, groups=[{"action_id": action.pk, "days": 7}], name="cohort2", + ) + + filter = Filter(data={"properties": [{"key": "id", "value": cohort2.pk, "type": "cohort"}],}) + query, params = parse_prop_clauses(filter.properties, self.team.pk) + final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) + result = sync_execute(final_query, {**params, "team_id": self.team.pk}) + self.assertEqual(len(result), 2) + def test_prop_cohort_multiple_groups(self): _create_person(distinct_ids=["some_other_id"], team_id=self.team.pk, properties={"$some_prop": "something"}) From 5ca9bb62ff0671368df3b71a6c86d010aeeb94a8 Mon Sep 17 00:00:00 2001 From: Eric Date: Fri, 22 Jan 2021 13:01:47 -0500 Subject: [PATCH 3/3] fix type --- ee/clickhouse/models/cohort.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ee/clickhouse/models/cohort.py b/ee/clickhouse/models/cohort.py index 64a80a287a138..3f7edf464b839 100644 --- a/ee/clickhouse/models/cohort.py +++ b/ee/clickhouse/models/cohort.py @@ -33,7 +33,8 @@ def format_person_query(cohort: Cohort) -> Tuple[str, Dict[str, Any]]: action = Action.objects.get(pk=group["action_id"], team_id=cohort.team.pk) action_filter_query, action_params = format_action_filter(action, prepend="_{}_action".format(group_idx)) - date_query, date_params = "", {} + date_query: str = "" + date_params: Dict[str, str] = {} if group.get("days"): date_query, date_params = parse_action_timestamps(int(group.get("days")))