Skip to content

Commit

Permalink
Funnel persons per step and dropoff (#4883)
Browse files Browse the repository at this point in the history
* wip: pagination for persons on clickhouse funnels

* wip: added offset support for getting a list of persons; added support for conversion window;

* fixed mypy exception

* helper function to insert data for local testing

* moved generate code into separate class for more functionality later

* corrected person_distinct_id to use the person id from postgres

* minor corrections to generate local class along with addition of data cleanup via destroy() method

* reduce the number of persons who make it to each step

* moved funnel queries to a new folder for better organization; separated funnel_persons and funnel_trends_persons into individual classes;

* funnel persons and tests

* initial implementation

* invoke the funnel or funnel trends class respectively

* add a test

* add breakdown handling and first test

* add test stubs

* remove repeats

* mypy corrections and PR feedback

* run funnel test suite on new query implementation

* remove imports

* corrected tests

* minor test updates

* correct func name

* fix types

* func name change

* move builder functions to funnel base

* add test classe for new funnel

* Handle multiple same events in the funnel (#4863)

* dedup + tests

* deep equality. Tests to come

* write test for entity equality

* finish testing funnels

* clean up comments

* add ability to specify per step or dropoff persons

* remove defaults

* remove funnel_window parameter unless it's needed

* add param to filters

* test api

* remove print

* fix tests

* change distribution

* add none condition for funnel step

* add order by

* remove funnel window days

Co-authored-by: Buddy Williams <buddy@posthog.com>
Co-authored-by: Neil Kakkar <neilkakkar@gmail.com>
  • Loading branch information
3 people committed Jun 29, 2021
1 parent 41754f9 commit 562e3ba
Show file tree
Hide file tree
Showing 13 changed files with 195 additions and 28 deletions.
3 changes: 1 addition & 2 deletions ee/clickhouse/queries/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ee.clickhouse.sql.person import GET_LATEST_PERSON_DISTINCT_ID_SQL
from posthog.constants import TREND_FILTER_TYPE_ACTIONS
from posthog.models import Action, Entity, Filter, Team
from posthog.models.filters.mixins.funnel_window_days import FunnelWindowDaysMixin
from posthog.models.filters.mixins.funnel import FunnelWindowDaysMixin
from posthog.queries.funnel import Funnel
from posthog.utils import relative_date_parse

Expand Down Expand Up @@ -90,7 +90,6 @@ def _exec_query(self) -> List[Tuple]:
}

query = self.get_query(format_properties)

return sync_execute(query, self.params)

def _get_steps_per_person_query(self):
Expand Down
25 changes: 22 additions & 3 deletions ee/clickhouse/queries/funnels/funnel_persons.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,34 @@
from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase
from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_SQL
from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_BY_STEP_SQL
from posthog.models import Person


class ClickhouseFunnelPersons(ClickhouseFunnelBase):
def get_query(self, format_properties):
return FUNNEL_PERSONS_SQL.format(**format_properties)
steps_per_person_query = self._get_steps_per_person_query()
return FUNNEL_PERSONS_BY_STEP_SQL.format(
**format_properties,
steps_per_person_query=steps_per_person_query,
persons_steps=self._get_funnel_person_step_condition()
)

def _get_funnel_person_step_condition(self):
step_num = self._filter.funnel_step
max_steps = len(self._filter.entities)

if step_num is None:
raise ValueError("funnel_step should not be none")

if step_num >= 0:
self.params.update({"step_num": [i for i in range(step_num, max_steps + 1)]})
return "steps IN %(step_num)s"
else:
self.params.update({"step_num": abs(step_num) - 1})
return "steps = %(step_num)s"

def _format_results(self, results):
formatted_results = []
for row in results:
distinct_ids, email = Person.get_distinct_ids_and_email_by_id(row[1], self._team.id)
distinct_ids, email = Person.get_distinct_ids_and_email_by_id(row[0], self._team.id)
formatted_results.append({"max_step": row[0], "distinct_ids": distinct_ids, "email": email})
return formatted_results
6 changes: 6 additions & 0 deletions ee/clickhouse/queries/funnels/test/test_funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def test_basic_funnel_with_repeat_steps(self):
{"id": "user signed up", "type": "events", "order": 1},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 14,
}

filter = Filter(data=filters)
Expand Down Expand Up @@ -76,6 +77,7 @@ def test_advanced_funnel_with_repeat_steps(self):
{"id": "$pageview", "type": "events", "order": 4},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 14,
}

filter = Filter(data=filters)
Expand Down Expand Up @@ -135,6 +137,7 @@ def test_advanced_funnel_with_repeat_steps_out_of_order_events(self):
{"id": "$pageview", "type": "events", "order": 4},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 14,
}

filter = Filter(data=filters)
Expand Down Expand Up @@ -212,6 +215,7 @@ def test_funnel_with_actions(self):
{"id": sign_up_action.id, "math": "wau", "order": 1},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 14,
}

filter = Filter(data=filters)
Expand Down Expand Up @@ -249,6 +253,7 @@ def test_funnel_with_actions_and_events(self):
{"id": sign_up_action.id, "math": "wau", "order": 3},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 14,
}

filter = Filter(data=filters)
Expand Down Expand Up @@ -302,6 +307,7 @@ def test_funnel_with_matching_properties(self):
}, # TODO(nk): does this supercede the above event? i.e. order 3 is subset of order 4? doesn't make sense to allow this in a funnel
],
"insight": INSIGHT_FUNNELS,
"funnel_window_days": 14,
}

filter = Filter(data=filters)
Expand Down
102 changes: 96 additions & 6 deletions ee/clickhouse/queries/funnels/test/test_funnel_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ee.clickhouse.util import ClickhouseTestMixin
from posthog.constants import INSIGHT_FUNNELS, TRENDS_FUNNEL
from posthog.models import Filter
from posthog.models.filters.mixins.funnel_window_days import FunnelWindowDaysMixin
from posthog.models.filters.mixins.funnel import FunnelWindowDaysMixin
from posthog.models.person import Person
from posthog.test.base import APIBaseTest

Expand All @@ -27,9 +27,97 @@ def _create_event(**kwargs):


class TestFunnel(ClickhouseTestMixin, APIBaseTest):
def setUp(self):
self._create_sample_data()
super().setUp()
def _create_sample_data_multiple_dropoffs(self):
for i in range(5):
_create_person(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")
_create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-03 00:00:00")
_create_event(event="step three", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-05 00:00:00")

for i in range(5, 15):
_create_person(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")
_create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-03 00:00:00")

for i in range(15, 35):
_create_person(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")

def test_first_step(self):
self._create_sample_data_multiple_dropoffs()
data = {
"insight": INSIGHT_FUNNELS,
"interval": "day",
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-07 00:00:00",
"funnel_window_days": 7,
"funnel_step": 1,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
{"id": "step three", "order": 2},
],
}
filter = Filter(data=data)
results = ClickhouseFunnelPersons(filter, self.team)._exec_query()
self.assertEqual(35, len(results))

def test_last_step(self):
self._create_sample_data_multiple_dropoffs()
data = {
"insight": INSIGHT_FUNNELS,
"interval": "day",
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-07 00:00:00",
"funnel_window_days": 7,
"funnel_step": 3,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
{"id": "step three", "order": 2},
],
}
filter = Filter(data=data)
results = ClickhouseFunnelPersons(filter, self.team)._exec_query()
self.assertEqual(5, len(results))

def test_second_step_dropoff(self):
self._create_sample_data_multiple_dropoffs()
data = {
"insight": INSIGHT_FUNNELS,
"interval": "day",
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-07 00:00:00",
"funnel_window_days": 7,
"funnel_step": -2,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
{"id": "step three", "order": 2},
],
}
filter = Filter(data=data)
results = ClickhouseFunnelPersons(filter, self.team)._exec_query()
self.assertEqual(20, len(results))

def test_last_step_dropoff(self):
self._create_sample_data_multiple_dropoffs()
data = {
"insight": INSIGHT_FUNNELS,
"interval": "day",
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-07 00:00:00",
"funnel_window_days": 7,
"funnel_step": -3,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
{"id": "step three", "order": 2},
],
}
filter = Filter(data=data)
results = ClickhouseFunnelPersons(filter, self.team)._exec_query()
self.assertEqual(10, len(results))

def _create_sample_data(self):
for i in range(250):
Expand All @@ -39,13 +127,14 @@ def _create_sample_data(self):
_create_event(event="step three", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-05 00:00:00")

def test_basic_offset(self):
self._create_sample_data()
data = {
"insight": INSIGHT_FUNNELS,
"display": TRENDS_FUNNEL,
"interval": "day",
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-07 00:00:00",
"funnel_window_days": 7,
"funnel_step": 1,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
Expand Down Expand Up @@ -79,13 +168,14 @@ def test_funnel_window_days_to_milliseconds(self):
self.assertEqual(one_day, 86_400_000)

def test_basic_conversion_window(self):
self._create_sample_data()
data = {
"insight": INSIGHT_FUNNELS,
"display": TRENDS_FUNNEL,
"interval": "day",
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-07 00:00:00",
"funnel_window_days": 7,
"funnel_step": 1,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
Expand Down
11 changes: 11 additions & 0 deletions ee/clickhouse/sql/funnels/funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@
;
"""

FUNNEL_PERSONS_BY_STEP_SQL = """
SELECT person_id
FROM
({steps_per_person_query})
WHERE {persons_steps}
ORDER BY person_id
LIMIT 100
OFFSET {offset}
SETTINGS allow_experimental_window_functions = 1
"""

FUNNEL_INNER_EVENT_STEPS_QUERY = """
SELECT
person_id,
Expand Down
30 changes: 23 additions & 7 deletions ee/clickhouse/views/test/test_clickhouse_funnel_person.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,49 @@
import json
from uuid import uuid4

from rest_framework import status

from ee.clickhouse.generate_local import GenerateLocal
from ee.clickhouse.models.event import create_event
from ee.clickhouse.util import ClickhouseTestMixin
from posthog.constants import INSIGHT_FUNNELS, TRENDS_FUNNEL
from posthog.models.person import Person
from posthog.test.base import APIBaseTest


def _create_person(**kwargs):
person = Person.objects.create(**kwargs)
return Person(id=person.uuid, uuid=person.uuid)


def _create_event(**kwargs):
kwargs.update({"event_uuid": uuid4()})
create_event(**kwargs)


class TestFunnelPerson(ClickhouseTestMixin, APIBaseTest):
def setUp(self):
super().setUp()
GenerateLocal(self.team).generate()
def _create_sample_data(self):
for i in range(250):
_create_person(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")
_create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-03 00:00:00")
_create_event(event="step three", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-05 00:00:00")

def test_basic_pagination(self):
self._create_sample_data()
request_data = {
"insight": INSIGHT_FUNNELS,
"display": TRENDS_FUNNEL,
"interval": "day",
"actions": json.dumps([]),
"events": json.dumps(
[{"id": "step one", "order": 0}, {"id": "step two", "order": 1}, {"id": "step three", "order": 2},]
),
"properties": json.dumps([]),
"funnel_window_days": 14,
"funnel_step": 1,
"filter_test_accounts": "false",
"new_entity": json.dumps([]),
"date_from": "2010-01-01",
"date_to": "2010-01-10",
"date_from": "2021-05-01",
"date_to": "2021-05-10",
}

response = self.client.get("/api/person/funnel/", data=request_data)
Expand Down
2 changes: 0 additions & 2 deletions ee/tasks/test/test_calculate_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def test_create_trends_cohort(self, _insert_cohort_from_query):
],
"entity_id": "$pageview",
"entity_type": "events",
"funnel_window_days": 14,
"insight": "TRENDS",
"interval": "day",
},
Expand Down Expand Up @@ -184,7 +183,6 @@ def test_create_trends_cohort(self, _insert_cohort_from_query):
],
"entity_id": "$pageview",
"entity_type": "events",
"funnel_window_days": 14,
"insight": "TRENDS",
"interval": "day",
},
Expand Down
5 changes: 3 additions & 2 deletions posthog/api/test/test_insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ def test_insight_funnels_basic_post(self):
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "user did things", "type": "events", "order": 1},
]
],
"funnel_window_days": 14,
},
).json()

Expand All @@ -200,7 +201,7 @@ def test_insight_funnels_basic_get(self):
event_factory(team=self.team, event="user signed up", distinct_id="1")
event_factory(team=self.team, event="user did things", distinct_id="1")
response = self.client.get(
"/api/insight/funnel/?events={}".format(
"/api/insight/funnel/?funnel_window_days=14&events={}".format(
json.dumps(
[
{"id": "user signed up", "type": "events", "order": 0},
Expand Down
1 change: 1 addition & 0 deletions posthog/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
ENTITY_TYPE = "entity_type"
ENTITY_MATH = "entity_math"
FUNNEL_WINDOW_DAYS = "funnel_window_days"
FUNNEL_STEP = "funnel_step"

RETENTION_RECURRING = "retention_recurring"
RETENTION_FIRST_TIME = "retention_first_time"
Expand Down
3 changes: 2 additions & 1 deletion posthog/models/filters/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
SessionMixin,
ShownAsMixin,
)
from posthog.models.filters.mixins.funnel_window_days import FunnelWindowDaysMixin
from posthog.models.filters.mixins.funnel import FunnelStep, FunnelWindowDaysMixin
from posthog.models.filters.mixins.property import PropertyMixin


Expand All @@ -50,6 +50,7 @@ class Filter(
BaseFilter,
FormulaMixin,
FunnelWindowDaysMixin,
FunnelStep,
):
"""
Filters allow us to describe what events to show/use in various places in the system, for example Trends or Funnels.
Expand Down
Loading

0 comments on commit 562e3ba

Please sign in to comment.