Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hog): autocomplete #23332

Merged
merged 8 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -227,26 +227,97 @@
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.1
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
/* user_id:0 request:_snapshot_ */
SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value,
count(*) as count
FROM events e
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam')
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 26
OFFSET 0
'''
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.2
'''
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
/* user_id:0 request:_snapshot_ */
SELECT countIf(steps = 1) step_1,
countIf(steps = 2) step_2,
avg(step_1_average_conversion_time_inner) step_1_average_conversion_time,
median(step_1_median_conversion_time_inner) step_1_median_conversion_time,
prop
FROM
(SELECT aggregation_target,
steps,
avg(step_1_conversion_time) step_1_average_conversion_time_inner,
median(step_1_conversion_time) step_1_median_conversion_time_inner ,
prop
FROM
(SELECT aggregation_target,
steps,
max(steps) over (PARTITION BY aggregation_target,
prop) as max_steps,
step_1_conversion_time ,
prop
FROM
(SELECT *,
if(latest_0 <= latest_1
AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps ,
if(isNotNull(latest_1)
AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
prop
FROM
(SELECT aggregation_target, timestamp, step_0,
latest_0,
step_1,
min(latest_1) over (PARTITION by aggregation_target,
prop
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 ,
if(has([['test'], ['control']], prop), prop, ['Other']) as prop
FROM
(SELECT *,
if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop
FROM
(SELECT e.timestamp as timestamp,
pdi.person_id as aggregation_target,
pdi.person_id as person_id,
if(event = '$pageview', 1, 0) as step_0,
if(step_0 = 1, timestamp, null) as latest_0,
if(event = '$pageleave', 1, 0) as step_1,
if(step_1 = 1, timestamp, null) as latest_1,
array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic,
prop_basic as prop,
argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam')
AND (step_0 = 1
OR step_1 = 1) )))
WHERE step_0 = 1 ))
GROUP BY aggregation_target,
steps,
prop
HAVING steps = max_steps)
GROUP BY prop
'''
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.3
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions frontend/src/lib/monaco/CodeEditor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function initEditor(
monaco.languages.register({ id: 'hog', extensions: ['.hog'], mimetypes: ['application/hog'] })
monaco.languages.setLanguageConfiguration('hog', hog.conf())
monaco.languages.setMonarchTokensProvider('hog', hog.language())
monaco.languages.registerCompletionItemProvider('hog', hogQLAutocompleteProvider(HogLanguage.hog))
monaco.languages.registerCodeActionProvider('hog', hogQLMetadataProvider())
}
}
Expand Down
103 changes: 85 additions & 18 deletions posthog/hogql/autocomplete.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from posthog.hogql.filters import replace_filters
from posthog.hogql.functions.mapping import ALL_EXPOSED_FUNCTION_NAMES
from posthog.hogql.parser import parse_select, parse_expr, parse_string_template
from posthog.hogql.parser import parse_select, parse_expr, parse_string_template, parse_program
from posthog.hogql import ast
from posthog.hogql.base import AST, CTE, ConstantType
from posthog.hogql.resolver import resolve_types
Expand All @@ -40,6 +40,8 @@
from hogvm.python.stl import STL

ALL_HOG_FUNCTIONS = list(STL.keys())
MATCH_ANY_CHARACTER = "$$_POSTHOG_ANY_$$"
PROPERTY_DEFINITION_LIMIT = 220


class GetNodeAtPositionTraverser(TraversingVisitor):
Expand Down Expand Up @@ -293,8 +295,52 @@ def extend_responses(
)


MATCH_ANY_CHARACTER = "$$_POSTHOG_ANY_$$"
PROPERTY_DEFINITION_LIMIT = 220
class VariableFinder(TraversingVisitor):
node: AST | None = None
stack: list[AST]
blocks: list[AST]
vars: list[set[str]]
node_vars: set[str]

def __init__(self, node: ast.AST):
super().__init__()
self.node = node
self.stack = []
self.blocks = []
self.vars = []
self.node_vars = set()

def visit(self, node: ast.AST | None):
if node is None:
return
if node == self.node:
for block_vars in self.vars:
self.node_vars.update(block_vars)
return

has_block = isinstance(node, ast.Block) or isinstance(node, ast.Program) or isinstance(node, ast.Function)
if has_block:
self.blocks.append(node)
self.vars.append(set())

self.stack.append(node)
super().visit(node)
self.stack.pop()

if has_block:
self.blocks.pop()
self.vars.pop()

def visit_variable_declaration(self, node: ast.VariableDeclaration):
if len(self.vars) > 0:
self.vars[-1].add(node.name)
super().visit_variable_declaration(node)


def gather_hog_variables_in_scope(root_node, node) -> list[str]:
finder = VariableFinder(node)
finder.visit(root_node)
return list(finder.node_vars)


def get_hogql_autocomplete(
Expand Down Expand Up @@ -326,44 +372,38 @@ def get_hogql_autocomplete(
query_to_try = query.query[: query.endPosition] + extra_characters + query.query[query.endPosition :]
query_start = query.startPosition
query_end = query.endPosition + length_to_add
node_ast: ast.AST

if query.language == HogLanguage.HOG_QL:
with timings.measure("parse_select"):
select_ast = parse_select(query_to_try)
select_ast = parse_select(query_to_try, timings=timings)
root_node: ast.AST = select_ast
elif query.language == HogLanguage.HOG_QL_EXPR:
with timings.measure("parse_expr"):
node_ast = parse_expr(query_to_try)
node_ast = parse_expr(query_to_try, timings=timings)
select_ast = cast(ast.SelectQuery, clone_expr(source_query, clear_locations=True))
select_ast.select = [node_ast]
root_node = node_ast
elif query.language == HogLanguage.HOG_TEMPLATE:
with timings.measure("parse_template"):
node_ast = parse_string_template(query_to_try)
node_ast = parse_string_template(query_to_try, timings=timings)
select_ast = cast(ast.SelectQuery, clone_expr(source_query, clear_locations=True))
select_ast.select = [node_ast]
root_node = node_ast
elif query.language == HogLanguage.HOG:
with timings.measure("parse_program"):
node_ast = parse_program(query_to_try, timings=timings)
select_ast = cast(ast.SelectQuery, clone_expr(source_query, clear_locations=True))
root_node = node_ast
else:
raise ValueError(f"Unsupported autocomplete language: {query.language}")

if query.filters:
try:
select_ast = cast(ast.SelectQuery, replace_filters(select_ast, query.filters, team))
except Exception:
pass

if isinstance(select_ast, ast.SelectQuery):
ctes = select_ast.ctes
elif isinstance(select_ast, ast.SelectUnionQuery):
ctes = select_ast.select_queries[0].ctes

with timings.measure("find_node"):
# to account for the magic F' symbol we append to change antlr's mode
extra = 2 if query.language == HogLanguage.HOG_TEMPLATE else 0
find_node = GetNodeAtPositionTraverser(root_node, query_start + extra, query_end + extra)
node = find_node.node
parent_node = find_node.parent_node
nearest_select = find_node.nearest_select_query or select_ast

if isinstance(query.globals, dict) and isinstance(node, ast.Field):
for index, key in enumerate(node.chain):
Expand Down Expand Up @@ -397,6 +437,33 @@ def get_hogql_autocomplete(
details=values,
)

if query.language == HogLanguage.HOG:
hog_vars = gather_hog_variables_in_scope(root_node, node)
extend_responses(
keys=hog_vars,
suggestions=response.suggestions,
kind=Kind.VARIABLE,
)
extend_responses(
ALL_HOG_FUNCTIONS,
response.suggestions,
Kind.FUNCTION,
insert_text=lambda key: f"{key}()",
)
break

if query.filters:
try:
select_ast = cast(ast.SelectQuery, replace_filters(select_ast, query.filters, team))
except Exception:
pass

if isinstance(select_ast, ast.SelectQuery):
ctes = select_ast.ctes
elif isinstance(select_ast, ast.SelectUnionQuery):
ctes = select_ast.select_queries[0].ctes
nearest_select = find_node.nearest_select_query or select_ast

table_has_alias = (
nearest_select is not None
and isinstance(nearest_select, ast.SelectQuery)
Expand Down
28 changes: 27 additions & 1 deletion posthog/hogql/test/test_autocomplete.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from posthog.hogql.database.schema.events import EventsTable
from posthog.hogql.database.schema.persons import PERSONS_FIELDS
from posthog.models.property_definition import PropertyDefinition
from posthog.schema import HogQLAutocomplete, HogQLAutocompleteResponse, HogLanguage, HogQLQuery
from posthog.schema import HogQLAutocomplete, HogQLAutocompleteResponse, HogLanguage, HogQLQuery, Kind
from posthog.test.base import APIBaseTest, ClickhouseTestMixin


Expand Down Expand Up @@ -56,6 +56,19 @@ def _template(
)
return get_hogql_autocomplete(query=autocomplete, team=self.team, database_arg=database)

def _program(
self, query: str, start: int, end: int, database: Optional[Database] = None
) -> HogQLAutocompleteResponse:
autocomplete = HogQLAutocomplete(
kind="HogQLAutocomplete",
query=query,
language=HogLanguage.HOG,
sourceQuery=HogQLQuery(query="select * from events"),
startPosition=start,
endPosition=end,
)
return get_hogql_autocomplete(query=autocomplete, team=self.team, database_arg=database)

def test_autocomplete(self):
query = "select * from events"
results = self._select(query=query, start=0, end=0)
Expand Down Expand Up @@ -310,3 +323,16 @@ def test_autocomplete_template_strings(self):
assert suggestion is not None
assert suggestion.label == "event"
assert suggestion.insertText == "event"

def test_autocomplete_hog(self):
database = create_hogql_database(team_id=self.team.pk, team_arg=self.team)

query = "let var1 := 3; let otherVar := 5; print(v)"
results = self._program(query=query, start=41, end=41, database=database)

suggestions = list(filter(lambda x: x.kind == Kind.VARIABLE, results.suggestions))
assert len(suggestions) == 2
assert sorted([suggestion.label for suggestion in suggestions]) == ["otherVar", "var1"]

suggestions = list(filter(lambda x: x.kind == Kind.FUNCTION, results.suggestions))
assert len(suggestions) > 0
Loading