Skip to content

Commit

Permalink
Introduce Path Persons API endpoint (#6070)
Browse files Browse the repository at this point in the history
* introduce path persons API endpoint

* docs in query class
  • Loading branch information
neilkakkar authored and pauldambra committed Sep 23, 2021
1 parent bb0826c commit 9e970d3
Show file tree
Hide file tree
Showing 6 changed files with 225 additions and 11 deletions.
5 changes: 4 additions & 1 deletion ee/clickhouse/queries/paths/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ee.clickhouse.queries.funnels.funnel_persons import ClickhouseFunnelPersons
from ee.clickhouse.queries.paths.path_event_query import PathEventQuery
from ee.clickhouse.sql.paths.path import PATH_ARRAY_QUERY
from posthog.constants import FUNNEL_PATH_BETWEEN_STEPS
from posthog.constants import FUNNEL_PATH_BETWEEN_STEPS, LIMIT
from posthog.models import Filter, Team
from posthog.models.filters.path_filter import PathFilter

Expand Down Expand Up @@ -35,6 +35,9 @@ def __init__(self, filter: PathFilter, team: Team, funnel_filter: Optional[Filte
if self._filter.include_all_custom_events and self._filter.custom_events:
raise ValidationError("Cannot include all custom events and specific custom events in the same query")

if not self._filter.limit:
self._filter = self._filter.with_data({LIMIT: 100})

if self._filter.path_groupings:
regex_groupings = []
for grouping in self._filter.path_groupings:
Expand Down
24 changes: 21 additions & 3 deletions ee/clickhouse/queries/paths/paths_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,21 @@


class ClickhousePathsPersons(ClickhousePaths):
"""
`path_start_key` and `path_end_key` are two new params for this class.
These determine the start and end point of Paths you want. Both of these are optional.
Not specifying them means "get me all users on this path query".
Only specifying `path_start_key` means "get me all users whose paths start at this key"
Only specifying `path_end_key` means "get me all users whose paths end at this key"
Specifying both means "get me all users whose path starts at `start_key` and ends at `end_key`."
Note that:
Persons are calculated only between direct paths. There should not be any
other path item between start and end key.
"""

def get_query(self):

paths_per_person_query = self.get_paths_per_person_query()
Expand All @@ -16,12 +31,12 @@ def get_query(self):
if self.should_query_funnel():
paths_funnel_cte = self.get_path_query_funnel_cte(cast(Filter, self._funnel_filter))

self.params["limit"] = self._filter.limit or 100
self.params["limit"] = self._filter.limit
self.params["offset"] = self._filter.offset

return f"""
{paths_funnel_cte}
SELECT person_id
SELECT DISTINCT person_id
FROM (
{paths_per_person_query}
)
Expand All @@ -41,7 +56,10 @@ def get_person_path_filter(self) -> str:
conditions.append("path_key = %(path_end_key)s")
self.params["path_end_key"] = self._filter.path_end_key

return " AND ".join(conditions)
if conditions:
return " AND ".join(conditions)

return "1=1"

def _format_results(self, results):
people = Person.objects.filter(team_id=self._team.pk, uuid__in=[val[0] for val in results])
Expand Down
7 changes: 4 additions & 3 deletions ee/clickhouse/queries/test/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@ def test_path_removes_duplicates(self):

@test_with_materialized_columns(["$current_url"])
def test_paths_start_and_end(self):
Person.objects.create(team_id=self.team.pk, distinct_ids=["person_1"])
p1 = Person.objects.create(team_id=self.team.pk, distinct_ids=["person_1"])
_create_event(
properties={"$current_url": "/1"},
distinct_id="person_1",
Expand Down Expand Up @@ -1136,7 +1136,7 @@ def test_paths_start_and_end(self):
timestamp="2021-05-01 00:07:00",
)

Person.objects.create(team_id=self.team.pk, distinct_ids=["person_2"])
p2 = Person.objects.create(team_id=self.team.pk, distinct_ids=["person_2"])
_create_event(
properties={"$current_url": "/5"},
distinct_id="person_2",
Expand All @@ -1152,7 +1152,7 @@ def test_paths_start_and_end(self):
timestamp="2021-05-01 00:02:00",
)

Person.objects.create(team_id=self.team.pk, distinct_ids=["person_3"])
p3 = Person.objects.create(team_id=self.team.pk, distinct_ids=["person_3"])
_create_event(
properties={"$current_url": "/3"},
distinct_id="person_3",
Expand Down Expand Up @@ -1195,6 +1195,7 @@ def test_paths_start_and_end(self):
self.assertEqual(
response, [{"source": "1_/5", "target": "2_/about", "value": 2, "average_conversion_time": 60000.0}]
)
self.assertCountEqual(self._get_people_at_path(filter, "1_/5", "2_/about"), [p1.uuid, p2.uuid])

def test_properties_queried_using_path_filter(self):
def should_query_list(filter) -> Tuple[bool, bool]:
Expand Down
46 changes: 43 additions & 3 deletions ee/clickhouse/views/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
from ee.clickhouse.queries.clickhouse_retention import ClickhouseRetention
from ee.clickhouse.queries.clickhouse_stickiness import ClickhouseStickiness
from ee.clickhouse.queries.funnels import ClickhouseFunnelPersons, ClickhouseFunnelTrendsPersons
from ee.clickhouse.queries.paths import ClickhousePathsPersons
from ee.clickhouse.queries.trends.lifecycle import ClickhouseLifecycle
from ee.clickhouse.sql.person import GET_PERSON_PROPERTIES_COUNT
from posthog.api.person import PersonViewSet
from posthog.api.utils import format_next_url, format_offset_absolute_url
from posthog.constants import INSIGHT_FUNNELS, FunnelVizType
from posthog.api.utils import format_offset_absolute_url
from posthog.constants import INSIGHT_FUNNELS, INSIGHT_PATHS, FunnelVizType
from posthog.decorators import cached_function
from posthog.models import Event, Filter, Person
from posthog.models.filters.path_filter import PathFilter


class ClickhousePersonViewSet(PersonViewSet):
Expand Down Expand Up @@ -53,7 +55,7 @@ def calculate_funnel_persons(self, request: Request) -> Dict[str, Tuple[list, Op
return {"result": ([], None, None)}

team = request.user.team
filter = Filter(request=request)
filter = Filter(request=request, data={"insight": INSIGHT_FUNNELS})
funnel_class: Callable = ClickhouseFunnelPersons

if filter.funnel_viz_type == FunnelVizType.TRENDS:
Expand All @@ -71,6 +73,44 @@ def get_properties(self, request: Request):
rows = sync_execute(GET_PERSON_PROPERTIES_COUNT, {"team_id": self.team.pk})
return [{"name": name, "count": count} for name, count in rows]

@action(methods=["GET", "POST"], detail=False)
def path(self, request: Request, **kwargs) -> Response:
if request.user.is_anonymous or not request.user.team:
return Response(data=[])

results_package = self.calculate_path_persons(request)

if not results_package:
return Response(data=[])

people, next_url, initial_url = results_package["result"]

return Response(
data={
"results": [{"people": people, "count": len(people)}],
"next": next_url,
"initial": initial_url,
"is_cached": results_package.get("is_cached"),
"last_refresh": results_package.get("last_refresh"),
}
)

@cached_function
def calculate_path_persons(self, request: Request) -> Dict[str, Tuple[list, Optional[str], Optional[str]]]:
if request.user.is_anonymous or not request.user.team:
return {"result": ([], None, None)}

team = request.user.team
filter = PathFilter(request=request, data={"insight": INSIGHT_PATHS})

people, should_paginate = ClickhousePathsPersons(filter, team).run()
limit = filter.limit or 100
next_url = format_offset_absolute_url(request, filter.offset + limit) if should_paginate else None
initial_url = format_offset_absolute_url(request, 0)

# cached_function expects a dict with the key result
return {"result": (people, next_url, initial_url)}

def destroy(self, request: Request, pk=None, **kwargs): # type: ignore
try:
person = Person.objects.get(team=self.team, pk=pk)
Expand Down
151 changes: 151 additions & 0 deletions ee/clickhouse/views/test/test_clickhouse_path_person.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from unittest.mock import patch
from uuid import uuid4

from django.core.cache import cache
from rest_framework import status

from ee.clickhouse.models.event import create_event
from ee.clickhouse.util import ClickhouseTestMixin
from posthog.constants import INSIGHT_PATHS
from posthog.models.person import Person
from posthog.test.base import APIBaseTest


def _create_person(**kwargs):
person = Person.objects.create(**kwargs)
return person


def _create_event(**kwargs):
kwargs.update({"event_uuid": uuid4()})
create_event(**kwargs)


class TestPathPerson(ClickhouseTestMixin, APIBaseTest):
def _create_sample_data(self, num, delete=False):
for i in range(num):
person = _create_person(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(
event="step one",
distinct_id=f"user_{i}",
team=self.team,
timestamp="2021-05-01 00:00:00",
properties={"$browser": "Chrome"},
)
if i % 2 == 0:
_create_event(
event="step two",
distinct_id=f"user_{i}",
team=self.team,
timestamp="2021-05-01 00:10:00",
properties={"$browser": "Chrome"},
)
_create_event(
event="step three",
distinct_id=f"user_{i}",
team=self.team,
timestamp="2021-05-01 00:20:00",
properties={"$browser": "Chrome"},
)
if delete:
person.delete()

def test_basic_format(self):
self._create_sample_data(5)
request_data = {
"insight": INSIGHT_PATHS,
"filter_test_accounts": "false",
"date_from": "2021-05-01",
"date_to": "2021-05-10",
}

response = self.client.get("/api/person/path/", data=request_data)
self.assertEqual(response.status_code, status.HTTP_200_OK)
j = response.json()
first_person = j["results"][0]["people"][0]
self.assertEqual(5, len(j["results"][0]["people"]))
self.assertTrue("id" in first_person and "name" in first_person and "distinct_ids" in first_person)
self.assertEqual(5, j["results"][0]["count"])

def test_basic_format_with_path_start_key_constraints(self):
self._create_sample_data(5)
request_data = {
"insight": INSIGHT_PATHS,
"filter_test_accounts": "false",
"date_from": "2021-05-01",
"date_to": "2021-05-10",
"path_start_key": "2_step two",
}

response = self.client.get("/api/person/path/", data=request_data)
self.assertEqual(response.status_code, status.HTTP_200_OK)
j = response.json()
first_person = j["results"][0]["people"][0]
self.assertEqual(3, len(j["results"][0]["people"]))
self.assertTrue("id" in first_person and "name" in first_person and "distinct_ids" in first_person)
self.assertEqual(3, j["results"][0]["count"])

def test_basic_format_with_start_point_constraints(self):
self._create_sample_data(7)
request_data = {
"insight": INSIGHT_PATHS,
"filter_test_accounts": "false",
"date_from": "2021-05-01",
"date_to": "2021-05-10",
"path_start_key": "1_step two",
"start_point": "step two",
}

response = self.client.get("/api/person/path/", data=request_data)
self.assertEqual(response.status_code, status.HTTP_200_OK)
j = response.json()
first_person = j["results"][0]["people"][0]
self.assertEqual(4, len(j["results"][0]["people"]))
self.assertTrue("id" in first_person and "name" in first_person and "distinct_ids" in first_person)
self.assertEqual(4, j["results"][0]["count"])

def test_basic_pagination(self):
self._create_sample_data(20)
request_data = {
"insight": INSIGHT_PATHS,
"filter_test_accounts": "false",
"date_from": "2021-05-01",
"date_to": "2021-05-10",
"limit": 15,
}

response = self.client.get("/api/person/path/", data=request_data)
self.assertEqual(response.status_code, status.HTTP_200_OK)
j = response.json()
people = j["results"][0]["people"]
next = j["next"]

self.assertEqual(15, len(people))
self.assertNotEqual(None, next)

response = self.client.get(next)
self.assertEqual(response.status_code, status.HTTP_200_OK)
j = response.json()
people = j["results"][0]["people"]
next = j["next"]
self.assertEqual(5, len(people))
self.assertEqual(None, j["next"])

@patch("ee.clickhouse.models.person.delete_person")
def test_basic_pagination_with_deleted(self, delete_person_patch):
cache.clear()
self._create_sample_data(110, delete=True)
request_data = {
"insight": INSIGHT_PATHS,
"filter_test_accounts": "false",
"date_from": "2021-05-01",
"date_to": "2021-05-10",
}

response = self.client.get("/api/person/path/", data=request_data)
self.assertEqual(response.status_code, status.HTTP_200_OK)
j = response.json()
people = j["results"][0]["people"]
next = j["next"]
self.assertEqual(0, len(people))
self.assertIsNone(next)
3 changes: 2 additions & 1 deletion posthog/models/filters/mixins/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ def offset_to_dict(self):
class LimitMixin(BaseParamMixin):
@cached_property
def limit(self) -> Optional[int]:
return self._data.get(LIMIT, None)
limit_raw = self._data.get(LIMIT, None)
return int(limit_raw) if limit_raw else 0

@include_dict
def limit_to_dict(self):
Expand Down

0 comments on commit 9e970d3

Please sign in to comment.