Skip to content

Commit

Permalink
Add Paths Person API dropoff functionality (#6124)
Browse files Browse the repository at this point in the history
  • Loading branch information
neilkakkar authored Sep 27, 2021
1 parent 6b8d0de commit 2fc8a9f
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 13 deletions.
25 changes: 17 additions & 8 deletions ee/clickhouse/queries/paths/paths_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,19 @@

class ClickhousePathsPersons(ClickhousePaths):
"""
`path_start_key` and `path_end_key` are two new params for this class.
These determine the start and end point of Paths you want. Both of these are optional.
`path_start_key`, `path_end_key`, and `path_dropoff_key` are three new params for this class.
These determine the start and end point of Paths you want. All of these are optional.
Not specifying them means "get me all users on this path query".
Only specifying `path_start_key` means "get me all users whose paths start at this key"
Only specifying `path_end_key` means "get me all users whose paths end at this key"
Specifying both means "get me all users whose path starts at `start_key` and ends at `end_key`."
Specifying `path_dropoff_key` means "get me users who dropped off after this key. If you specify
this key, the other two keys are invalid
Note that:
Persons are calculated only between direct paths. There should not be any
other path item between start and end key.
Expand Down Expand Up @@ -48,13 +52,18 @@ def get_query(self):

def get_person_path_filter(self) -> str:
conditions = []
if self._filter.path_start_key:
conditions.append("last_path_key = %(path_start_key)s")
self.params["path_start_key"] = self._filter.path_start_key

if self._filter.path_end_key:
conditions.append("path_key = %(path_end_key)s")
self.params["path_end_key"] = self._filter.path_end_key
if self._filter.path_dropoff_key:
conditions.append("path_dropoff_key = %(path_dropoff_key)s")
self.params["path_dropoff_key"] = self._filter.path_dropoff_key
else:
if self._filter.path_start_key:
conditions.append("last_path_key = %(path_start_key)s")
self.params["path_start_key"] = self._filter.path_start_key

if self._filter.path_end_key:
conditions.append("path_key = %(path_end_key)s")
self.params["path_end_key"] = self._filter.path_end_key

if conditions:
return " AND ".join(conditions)
Expand Down
54 changes: 52 additions & 2 deletions ee/clickhouse/queries/test/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath

maxDiff = None

def _get_people_at_path(self, filter, path_start, path_end, funnel_filter=None):
person_filter = filter.with_data({"path_start_key": path_start, "path_end_key": path_end})
def _get_people_at_path(self, filter, path_start=None, path_end=None, funnel_filter=None, path_dropoff=None):
person_filter = filter.with_data(
{"path_start_key": path_start, "path_end_key": path_end, "path_dropoff_key": path_dropoff}
)
result = ClickhousePathsPersons(person_filter, self.team, funnel_filter)._exec_query()
return [row[0] for row in result]

Expand Down Expand Up @@ -1494,3 +1496,51 @@ def test_path_grouping_with_evil_input(self):
{"source": "2_/2/bar/aaa", "target": "3_/3*", "value": 1, "average_conversion_time": 2 * ONE_MINUTE},
],
)

def test_paths_person_dropoffs(self):

# 5 people do 2 events
for i in range(5):
Person.objects.create(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")
_create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:04:00")

# 10 people do 3 events
for i in range(5, 15):
Person.objects.create(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")
_create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:04:00")
_create_event(event="step three", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:05:00")

# 20 people do 4 events
for i in range(15, 35):
Person.objects.create(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")
_create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:04:00")
_create_event(event="step three", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:05:00")
_create_event(event="step four", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:06:00")

filter = PathFilter(
data={
"include_event_types": ["custom_event"],
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-07 00:00:00",
}
)
self.assertEqual(5, len(self._get_people_at_path(filter, path_dropoff="2_step two"))) # 5 dropoff at step 2
self.assertEqual(35, len(self._get_people_at_path(filter, path_end="2_step two"))) # 35 total reach step 2
self.assertEqual(
30, len(self._get_people_at_path(filter, path_start="2_step two"))
) # 30 total reach after step 2

self.assertEqual(10, len(self._get_people_at_path(filter, path_dropoff="3_step three"))) # 10 dropoff at step 3
self.assertEqual(30, len(self._get_people_at_path(filter, path_end="3_step three"))) # 30 total reach step 3
self.assertEqual(
20, len(self._get_people_at_path(filter, path_start="3_step three"))
) # 20 total reach after step 3

self.assertEqual(20, len(self._get_people_at_path(filter, path_dropoff="4_step four"))) # 20 dropoff at step 4
self.assertEqual(20, len(self._get_people_at_path(filter, path_end="4_step four"))) # 20 total reach step 4
self.assertEqual(
0, len(self._get_people_at_path(filter, path_start="4_step four"))
) # 0 total reach after step 4
4 changes: 3 additions & 1 deletion ee/clickhouse/sql/paths/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
conversion_time,
event_in_session_index,
concat(toString(event_in_session_index), '_', path) as path_key,
if(event_in_session_index > 1, neighbor(path_key, -1), null) AS last_path_key
if(event_in_session_index > 1, neighbor(path_key, -1), null) AS last_path_key,
path_dropoff_key
FROM (
SELECT person_id
Expand All @@ -19,6 +20,7 @@
{target_clause}
, arrayDifference(limited_timings) as timings_diff
, arrayZip(limited_path, timings_diff) as limited_path_timings
, concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */
FROM (
SELECT person_id
, path_time_tuple.1 as path_basic
Expand Down
1 change: 1 addition & 0 deletions posthog/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ class AvailableFeature(str, Enum):
PATH_GROUPINGS = "path_groupings"
PATH_START_KEY = "path_start_key"
PATH_END_KEY = "path_end_key"
PATH_DROPOFF_KEY = "path_dropoff_key"


class FunnelOrderType(str, Enum):
Expand Down
12 changes: 10 additions & 2 deletions posthog/models/filters/mixins/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
END_POINT,
FUNNEL_PATHS,
PAGEVIEW_EVENT,
PATH_DROPOFF_KEY,
PATH_END_KEY,
PATH_GROUPINGS,
PATH_START_KEY,
Expand Down Expand Up @@ -165,11 +166,15 @@ def path_groupings_to_dict(self):
class PathPersonsMixin(BaseParamMixin):
@cached_property
def path_start_key(self) -> Optional[str]:
return self._data.get(PATH_START_KEY, None)
return self._data.get(PATH_START_KEY)

@cached_property
def path_end_key(self) -> Optional[str]:
return self._data.get(PATH_END_KEY, None)
return self._data.get(PATH_END_KEY)

@cached_property
def path_dropoff_key(self) -> Optional[str]:
return self._data.get(PATH_DROPOFF_KEY)

@include_dict
def path_start_end_to_dict(self):
Expand All @@ -180,4 +185,7 @@ def path_start_end_to_dict(self):
if self.path_end_key:
result[PATH_END_KEY] = self.path_end_key

if self.path_dropoff_key:
result[PATH_DROPOFF_KEY] = self.path_dropoff_key

return result

0 comments on commit 2fc8a9f

Please sign in to comment.