diff --git a/posthog/api/capture.py b/posthog/api/capture.py index cf2a2fb3cbf6f..b68834c579888 100644 --- a/posthog/api/capture.py +++ b/posthog/api/capture.py @@ -8,6 +8,7 @@ from datetime import datetime from dateutil import parser from sentry_sdk import push_scope +import lzstring # type: ignore import re import json import secrets @@ -19,9 +20,6 @@ def _load_data(request) -> Optional[Union[Dict, List]]: if request.method == "POST": if request.content_type == "application/json": data = request.body - - if request.headers.get("content-encoding", "").lower() == "gzip": - data = gzip.decompress(data) else: data = request.POST.get("data") else: @@ -33,6 +31,20 @@ def _load_data(request) -> Optional[Union[Dict, List]]: with push_scope() as scope: scope.set_context("data", data) + compression = ( + request.GET.get("compression") or request.POST.get("compression") or request.headers.get("content-encoding", "") + ) + compression = compression.lower() + + if compression == "gzip": + data = gzip.decompress(data) + + if compression == "lz64": + if isinstance(data, str): + data = lzstring.LZString().decompressFromBase64(data.replace(" ", "+")) + else: + data = lzstring.LZString().decompressFromBase64(data.decode().replace(" ", "+")) + # Is it plain json? try: data = json.loads(data) diff --git a/posthog/api/decide.py b/posthog/api/decide.py index 91a17c67de2eb..cb96d13bd6402 100644 --- a/posthog/api/decide.py +++ b/posthog/api/decide.py @@ -42,6 +42,7 @@ def get_decide(request: HttpRequest): "config": {"enable_collect_everything": True}, "editorParams": {}, "isAuthenticated": False, + "supportedCompression": ["gzip", "lz64"], } if request.user.is_authenticated: diff --git a/posthog/api/test/test_capture.py b/posthog/api/test/test_capture.py index eead41224271e..443734a1dfd3e 100644 --- a/posthog/api/test/test_capture.py +++ b/posthog/api/test/test_capture.py @@ -8,6 +8,7 @@ import base64 import json import gzip +import lzstring # type: ignore class TestCapture(BaseTest): @@ -133,7 +134,7 @@ def test_batch(self, patch_process_event): @patch("posthog.models.team.TEAM_CACHE", {}) @patch("posthog.tasks.process_event.process_event.delay") - def test_batch_gzip(self, patch_process_event): + def test_batch_gzip_header(self, patch_process_event): data = { "api_key": self.team.api_token, "batch": [{"type": "capture", "event": "user signed up", "distinct_id": "2"}], @@ -161,6 +162,65 @@ def test_batch_gzip(self, patch_process_event): }, ) + @patch("posthog.models.team.TEAM_CACHE", {}) + @patch("posthog.tasks.process_event.process_event.delay") + def test_batch_gzip_param(self, patch_process_event): + data = { + "api_key": self.team.api_token, + "batch": [{"type": "capture", "event": "user signed up", "distinct_id": "2"}], + } + + response = self.client.generic( + "POST", + "/batch/?compression=gzip", + data=gzip.compress(json.dumps(data).encode()), + content_type="application/json", + ) + + arguments = patch_process_event.call_args[1] + arguments.pop("now") # can't compare fakedate + arguments.pop("sent_at") # can't compare fakedate + self.assertDictEqual( + arguments, + { + "distinct_id": "2", + "ip": "127.0.0.1", + "site_url": "http://testserver", + "data": data["batch"][0], + "team_id": self.team.pk, + }, + ) + + @patch("posthog.models.team.TEAM_CACHE", {}) + @patch("posthog.tasks.process_event.process_event.delay") + def test_batch_lzstring(self, patch_process_event): + data = { + "api_key": self.team.api_token, + "batch": [{"type": "capture", "event": "user signed up", "distinct_id": "2"}], + } + + response = self.client.generic( + "POST", + "/batch/", + data=lzstring.LZString().compressToBase64(json.dumps(data)).encode(), + content_type="application/json", + HTTP_CONTENT_ENCODING="lz64", + ) + + arguments = patch_process_event.call_args[1] + arguments.pop("now") # can't compare fakedate + arguments.pop("sent_at") # can't compare fakedate + self.assertDictEqual( + arguments, + { + "distinct_id": "2", + "ip": "127.0.0.1", + "site_url": "http://testserver", + "data": data["batch"][0], + "team_id": self.team.pk, + }, + ) + def test_batch_incorrect_token(self): response = self.client.post( "/batch/", diff --git a/requirements.txt b/requirements.txt index a11180ffde889..ab66782863f34 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,6 +22,7 @@ django-loginas==0.3.8 django-redis==4.12.1 djangorestframework==3.11.0 djangorestframework-csv==2.1.0 +future==0.18.2 gunicorn==20.0.4 idna==2.8 importlib-metadata==1.6.0 @@ -29,6 +30,7 @@ inflection==0.3.1 itypes==1.1.0 Jinja2==2.11.1 kombu==4.6.8 +lzstring==1.0.4 MarkupSafe==1.1.1 monotonic==1.5 numpy==1.18.1