From 3814ffdc2106b6dbd0d238e4c0478b5e42775022 Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Sat, 12 Apr 2025 23:13:53 +0200 Subject: [PATCH 01/14] first draft of downloading and storing the pretalx data --- intbot/core/integrations/pretalx.py | 67 +++++++ intbot/core/models.py | 29 +++ intbot/intbot/settings.py | 5 + .../tests/test_integrations/test_pretalx.py | 165 ++++++++++++++++++ 4 files changed, 266 insertions(+) create mode 100644 intbot/core/integrations/pretalx.py create mode 100644 intbot/tests/test_integrations/test_pretalx.py diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py new file mode 100644 index 0000000..a318cb9 --- /dev/null +++ b/intbot/core/integrations/pretalx.py @@ -0,0 +1,67 @@ +from typing import Any + +import httpx +from core.models import PretalxData +from django.conf import settings + +PRETALX_EVENT = "ep2025" +base_url = f"https://pretalx.com/api/events/{PRETALX_EVENT}/" + +RESOURCES = { + # Questions need to be passed to include answers in the same endpoint, + # saving us later time with joining the answers. + PretalxData.PretalxEndpoints.submissions: "submissions?questions=all", + PretalxData.PretalxEndpoints.speakers: "speakers?questions=all", +} + + +JsonType = dict[str, Any] + + +def fetch_pretalx_data(resource) -> list[JsonType]: + headers = { + "Authorization": f"Token {settings.PRETALX_API_TOKEN}", + "Content-Type": "application/json", + } + + endpoint = RESOURCES[resource] + url = base_url + f"{endpoint}" + + # Pretalx paginates the output, so we will need to do multiple requests and + # then merge mutliple pages to one big dictionary + res0 = [] + data = {"next": url} + n = 0 + while url := data["next"]: + n += 1 + response = httpx.get(url, headers=headers) + + if response.status_code != 200: + raise Exception(f"Error {response.status_code}: {response.text}") + + data = response.json() + res0 += data["results"] + + return res0 + + +def download_latest_submissions() -> PretalxData: + data = fetch_pretalx_data(PretalxData.PretalxEndpoints.submissions) + + pretalx_data = PretalxData.objects.create( + endpoint=PretalxData.PretalxEndpoints.submissions, + content=data, + ) + + return pretalx_data + + +def download_latest_speakers() -> PretalxData: + data = fetch_pretalx_data(PretalxData.PretalxEndpoints.speakers) + + pretalx_data = PretalxData.objects.create( + endpoint=PretalxData.PretalxEndpoints.speakers, + content=data, + ) + + return pretalx_data diff --git a/intbot/core/models.py b/intbot/core/models.py index 5a09f63..27724d6 100644 --- a/intbot/core/models.py +++ b/intbot/core/models.py @@ -81,3 +81,32 @@ def summary(self) -> str: def __str__(self): return f"{self.uuid} {self.author}: {self.content[:30]}" + + +class PretalxData(models.Model): + """ + Table to store raw data download from pretalx for later parsing. + + We first download data from pretalx to this table, and then fire a separate + background task that pulls data from this table and stores in separate + "business" tables, like "Proposal" or "Speaker". + """ + + class PretalxEndpoints(models.TextChoices): + submissions = "submissions", "Submissions" + speakers = "speakers", "Speakers" + schedule = "schedule", "Schedule" + + uuid = models.UUIDField(default=uuid.uuid4) + endpoint = models.CharField( + max_length=255, + choices=PretalxEndpoints.choices, + ) + content = models.JSONField() + + created_at = models.DateTimeField(auto_now_add=True) + modified_at = models.DateTimeField(auto_now=True) + processed_at = models.DateTimeField(blank=True, null=True) + + def __str__(self): + return f"{self.uuid}" diff --git a/intbot/intbot/settings.py b/intbot/intbot/settings.py index 1a7b907..a1d555a 100644 --- a/intbot/intbot/settings.py +++ b/intbot/intbot/settings.py @@ -197,6 +197,9 @@ def get(name) -> str: ZAMMAD_GROUP_SPONSORS = get("ZAMMAD_GROUP_SPONSORS") ZAMMAD_GROUP_GRANTS = get("ZAMMAD_GROUP_GRANTS") +# Pretalx +PRETALX_API_TOKEN = get("PRETALX_API_TOKEN") + if DJANGO_ENV == "dev": DEBUG = True @@ -282,6 +285,8 @@ def get(name) -> str: ZAMMAD_GROUP_HELPDESK = "TestZammad Helpdesk" ZAMMAD_GROUP_BILLING = "TestZammad Billing" + PRETALX_API_TOKEN = "Test-Pretalx-API-token" + elif DJANGO_ENV == "local_container": DEBUG = False diff --git a/intbot/tests/test_integrations/test_pretalx.py b/intbot/tests/test_integrations/test_pretalx.py new file mode 100644 index 0000000..7e2e04c --- /dev/null +++ b/intbot/tests/test_integrations/test_pretalx.py @@ -0,0 +1,165 @@ +import respx +import pytest +from core.integrations import pretalx +from core.models import PretalxData +from httpx import Response + + +@respx.mock +def test_fetch_submissions_from_pretalx(): + endpoint = pretalx.RESOURCES[PretalxData.PretalxEndpoints.submissions] + url = pretalx.base_url + endpoint + respx.get(url).mock( + return_value=Response( + 200, + json={ + "results": [ + {"hello": "world"}, + ], + "next": f"{url}&page=2", + }, + ) + ) + respx.get(url + "&page=2").mock( + return_value=Response( + 200, + json={ + "results": [ + {"foo": "bar"}, + ], + # It's important to make it last page in tests. + # Otherwise it will be infinite loop :) + "next": None, + }, + ) + ) + + submissions = pretalx.fetch_pretalx_data( + PretalxData.PretalxEndpoints.submissions, + ) + + assert submissions == [ + {"hello": "world"}, + {"foo": "bar"}, + ] + + +@respx.mock +def test_fetch_speakers_from_pretalx(): + endpoint = pretalx.RESOURCES[PretalxData.PretalxEndpoints.speakers] + url = pretalx.base_url + endpoint + respx.get(url).mock( + return_value=Response( + 200, + json={ + "results": [ + {"hello": "world"}, + ], + "next": f"{url}&page=2", + }, + ) + ) + respx.get(url + "&page=2").mock( + return_value=Response( + 200, + json={ + "results": [ + {"foo": "bar"}, + ], + # It's important to make it last page in tests. + # Otherwise it will be infinite loop :) + "next": None, + }, + ) + ) + + submissions = pretalx.fetch_pretalx_data( + PretalxData.PretalxEndpoints.speakers, + ) + + assert submissions == [ + {"hello": "world"}, + {"foo": "bar"}, + ] + + +@respx.mock +@pytest.mark.django_db +def test_download_latest_submissions(): + endpoint = pretalx.RESOURCES[PretalxData.PretalxEndpoints.submissions] + url = pretalx.base_url + endpoint + respx.get(url).mock( + return_value=Response( + 200, + json={ + "results": [ + {"hello": "world"}, + ], + "next": f"{url}&page=2", + }, + ) + ) + respx.get(url + "&page=2").mock( + return_value=Response( + 200, + json={ + "results": [ + {"foo": "bar"}, + ], + # It's important to make it last page in tests. + # Otherwise it will be infinite loop :) + "next": None, + }, + ) + ) + + pretalx.download_latest_submissions() + + pd = PretalxData.objects.get(endpoint=PretalxData.PretalxEndpoints.submissions) + + assert pd.endpoint == "submissions" + assert pd.content == [ + {"hello": "world"}, + {"foo": "bar"}, + ] + +@respx.mock +@pytest.mark.django_db +def test_download_latest_speakers(): + endpoint = pretalx.RESOURCES[PretalxData.PretalxEndpoints.speakers] + url = pretalx.base_url + endpoint + respx.get(url).mock( + return_value=Response( + 200, + json={ + "results": [ + {"hello": "world"}, + ], + "next": f"{url}&page=2", + }, + ) + ) + respx.get(url + "&page=2").mock( + return_value=Response( + 200, + json={ + "results": [ + {"foo": "bar"}, + ], + # It's important to make it last page in tests. + # Otherwise it will be infinite loop :) + "next": None, + }, + ) + ) + + pretalx.download_latest_speakers() + + pd = PretalxData.objects.get(endpoint=PretalxData.PretalxEndpoints.speakers) + + assert pd.endpoint == "speakers" + assert pd.content == [ + {"hello": "world"}, + {"foo": "bar"}, + ] + From 9672021ae8a6b741599311daf108837a3a46f517 Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Sat, 12 Apr 2025 23:35:21 +0200 Subject: [PATCH 02/14] add basic admin support --- intbot/core/admin.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/intbot/core/admin.py b/intbot/core/admin.py index 25ada2e..b35f6ba 100644 --- a/intbot/core/admin.py +++ b/intbot/core/admin.py @@ -1,6 +1,6 @@ import json -from core.models import DiscordMessage, Webhook +from core.models import DiscordMessage, PretalxData, Webhook from django.contrib import admin from django.utils.html import format_html @@ -67,5 +67,29 @@ def content_short(self, obj): return f"{obj.content[:10]}...{obj.content[-10:]}" +class PretalxDataAdmin(admin.ModelAdmin): + list_display = [ + "uuid", + "endpoint", + "created_at", + "modified_at", + ] + list_filter = ["created_at"] + readonly_fields = fields = [ + "uuid", + "endpoint", + "pretty_content", + "created_at", + "modified_at", + "processed_at", + ] + + def pretty_content(self, obj): + return format_html("
{}
", json.dumps(obj.content, indent=4)) + + pretty_content.short_description = "Content" + + admin.site.register(Webhook, WebhookAdmin) admin.site.register(DiscordMessage, DiscordMessageAdmin) +admin.site.register(PretalxData, PretalxDataAdmin) From 7265cec730fab0c30692fdd736bfaabc180f5f59 Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 13:13:45 +0200 Subject: [PATCH 03/14] small refactoring and review feedback --- intbot/core/admin.py | 8 +- intbot/core/integrations/pretalx.py | 57 +++-- intbot/core/models.py | 6 +- .../tests/test_integrations/test_pretalx.py | 201 ++++++++---------- 4 files changed, 130 insertions(+), 142 deletions(-) diff --git a/intbot/core/admin.py b/intbot/core/admin.py index b35f6ba..ec34cc2 100644 --- a/intbot/core/admin.py +++ b/intbot/core/admin.py @@ -26,12 +26,12 @@ class WebhookAdmin(admin.ModelAdmin): "processed_at", ] - def pretty_meta(self, obj): + def pretty_meta(self, obj: Webhook): return format_html("
{}
", json.dumps(obj.meta, indent=4)) pretty_meta.short_description = "Meta" - def pretty_content(self, obj): + def pretty_content(self, obj: Webhook): return format_html("
{}
", json.dumps(obj.content, indent=4)) pretty_content.short_description = "Content" @@ -61,7 +61,7 @@ class DiscordMessageAdmin(admin.ModelAdmin): "sent_at", ] - def content_short(self, obj): + def content_short(self, obj: DiscordMessage): # NOTE(artcz) This can create false shortcuts, but for most messages is # good enough, because most of them are longer than 20 chars return f"{obj.content[:10]}...{obj.content[-10:]}" @@ -84,7 +84,7 @@ class PretalxDataAdmin(admin.ModelAdmin): "processed_at", ] - def pretty_content(self, obj): + def pretty_content(self, obj: PretalxData): return format_html("
{}
", json.dumps(obj.content, indent=4)) pretty_content.short_description = "Content" diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py index a318cb9..e8af5ab 100644 --- a/intbot/core/integrations/pretalx.py +++ b/intbot/core/integrations/pretalx.py @@ -1,66 +1,85 @@ +import logging from typing import Any import httpx from core.models import PretalxData from django.conf import settings -PRETALX_EVENT = "ep2025" -base_url = f"https://pretalx.com/api/events/{PRETALX_EVENT}/" +logger = logging.getLogger(__name__) -RESOURCES = { +PRETALX_EVENTS = [ + "ep2023", + "ep2024", + "ep2025", +] + +ENDPOINTS = { # Questions need to be passed to include answers in the same endpoint, # saving us later time with joining the answers. - PretalxData.PretalxEndpoints.submissions: "submissions?questions=all", - PretalxData.PretalxEndpoints.speakers: "speakers?questions=all", + PretalxData.PretalxResources.submissions: "submissions?questions=all", + PretalxData.PretalxResources.speakers: "speakers?questions=all", } JsonType = dict[str, Any] -def fetch_pretalx_data(resource) -> list[JsonType]: +def get_event_url(event): + assert event in PRETALX_EVENTS + + return f"https://pretalx.com/api/events/{event}/" + + +def fetch_pretalx_data(event: str, resource: PretalxData.PretalxResources) -> list[JsonType]: headers = { "Authorization": f"Token {settings.PRETALX_API_TOKEN}", "Content-Type": "application/json", } - endpoint = RESOURCES[resource] - url = base_url + f"{endpoint}" + base_url = get_event_url(event) + endpoint = ENDPOINTS[resource] + url = f"{base_url}{endpoint}" # Pretalx paginates the output, so we will need to do multiple requests and # then merge mutliple pages to one big dictionary - res0 = [] + results = [] data = {"next": url} - n = 0 + page = 0 + + # This takes advantage of the fact that "next" will contain a url to the + # next page, until there is more data to fetch. If this is the last page, + # then the data["next"] will be None (falsy), and thus stop the while loop. while url := data["next"]: - n += 1 + page += 1 response = httpx.get(url, headers=headers) if response.status_code != 200: raise Exception(f"Error {response.status_code}: {response.text}") + logger.info("Fetching data from %s, page %s", url, page) + data = response.json() - res0 += data["results"] + results += data["results"] - return res0 + return results -def download_latest_submissions() -> PretalxData: - data = fetch_pretalx_data(PretalxData.PretalxEndpoints.submissions) +def download_latest_submissions(event: str) -> PretalxData: + data = fetch_pretalx_data(event, PretalxData.PretalxResources.submissions) pretalx_data = PretalxData.objects.create( - endpoint=PretalxData.PretalxEndpoints.submissions, + resource=PretalxData.PretalxResources.submissions, content=data, ) return pretalx_data -def download_latest_speakers() -> PretalxData: - data = fetch_pretalx_data(PretalxData.PretalxEndpoints.speakers) +def download_latest_speakers(event: str) -> PretalxData: + data = fetch_pretalx_data(event, PretalxData.PretalxResources.speakers) pretalx_data = PretalxData.objects.create( - endpoint=PretalxData.PretalxEndpoints.speakers, + resource=PretalxData.PretalxResources.speakers, content=data, ) diff --git a/intbot/core/models.py b/intbot/core/models.py index 27724d6..8bb4de4 100644 --- a/intbot/core/models.py +++ b/intbot/core/models.py @@ -92,15 +92,15 @@ class PretalxData(models.Model): "business" tables, like "Proposal" or "Speaker". """ - class PretalxEndpoints(models.TextChoices): + class PretalxResources(models.TextChoices): submissions = "submissions", "Submissions" speakers = "speakers", "Speakers" schedule = "schedule", "Schedule" uuid = models.UUIDField(default=uuid.uuid4) - endpoint = models.CharField( + resource= models.CharField( max_length=255, - choices=PretalxEndpoints.choices, + choices=PretalxResources.choices, ) content = models.JSONField() diff --git a/intbot/tests/test_integrations/test_pretalx.py b/intbot/tests/test_integrations/test_pretalx.py index 7e2e04c..60d791a 100644 --- a/intbot/tests/test_integrations/test_pretalx.py +++ b/intbot/tests/test_integrations/test_pretalx.py @@ -1,41 +1,77 @@ -import respx import pytest +import respx from core.integrations import pretalx from core.models import PretalxData from httpx import Response -@respx.mock -def test_fetch_submissions_from_pretalx(): - endpoint = pretalx.RESOURCES[PretalxData.PretalxEndpoints.submissions] - url = pretalx.base_url + endpoint - respx.get(url).mock( - return_value=Response( - 200, - json={ - "results": [ - {"hello": "world"}, - ], - "next": f"{url}&page=2", - }, - ) +def submissions_pages_generator(url): + """ + Generator to simulate pagination. + + Extracted to a generator because we use it in multiple places + """ + yield Response( + 200, + json={ + "results": [ + {"hello": "world"}, + ], + "next": f"{url}&page=2", + }, + ) + + yield Response( + 200, + json={ + "results": [ + {"foo": "bar"}, + ], + # It's important to make it last page in tests. + # Otherwise it will be infinite loop :) + "next": None, + }, ) - respx.get(url + "&page=2").mock( - return_value=Response( - 200, - json={ - "results": [ - {"foo": "bar"}, - ], - # It's important to make it last page in tests. - # Otherwise it will be infinite loop :) - "next": None, - }, - ) + +def speaker_pages_generator(url): + """ + Generator to simulate pagination. + + Extracted to a generator because we use it in multiple places + """ + yield Response( + 200, + json={ + "results": [ + {"hello": "world"}, + ], + "next": f"{url}&page=2", + }, + ) + + yield Response( + 200, + json={ + "results": [ + {"foo": "bar"}, + ], + # It's important to make it last page in tests. + # Otherwise it will be infinite loop :) + "next": None, + }, ) + +@respx.mock +def test_fetch_submissions_from_pretalx(): + url = "https://pretalx.com/api/events/ep2025/submissions?questions=all" + data = submissions_pages_generator(url) + respx.get(url).mock(return_value=next(data)) + respx.get(url + "&page=2").mock(return_value=next(data)) + submissions = pretalx.fetch_pretalx_data( - PretalxData.PretalxEndpoints.submissions, + "ep2025", + PretalxData.PretalxResources.submissions, ) assert submissions == [ @@ -46,35 +82,14 @@ def test_fetch_submissions_from_pretalx(): @respx.mock def test_fetch_speakers_from_pretalx(): - endpoint = pretalx.RESOURCES[PretalxData.PretalxEndpoints.speakers] - url = pretalx.base_url + endpoint - respx.get(url).mock( - return_value=Response( - 200, - json={ - "results": [ - {"hello": "world"}, - ], - "next": f"{url}&page=2", - }, - ) - ) - respx.get(url + "&page=2").mock( - return_value=Response( - 200, - json={ - "results": [ - {"foo": "bar"}, - ], - # It's important to make it last page in tests. - # Otherwise it will be infinite loop :) - "next": None, - }, - ) - ) + url = "https://pretalx.com/api/events/ep2025/speakers?questions=all" + data = speaker_pages_generator(url) + respx.get(url).mock(return_value=next(data)) + respx.get(url + "&page=2").mock(return_value=next(data)) submissions = pretalx.fetch_pretalx_data( - PretalxData.PretalxEndpoints.speakers, + "ep2025", + PretalxData.PretalxResources.speakers, ) assert submissions == [ @@ -86,80 +101,34 @@ def test_fetch_speakers_from_pretalx(): @respx.mock @pytest.mark.django_db def test_download_latest_submissions(): - endpoint = pretalx.RESOURCES[PretalxData.PretalxEndpoints.submissions] - url = pretalx.base_url + endpoint - respx.get(url).mock( - return_value=Response( - 200, - json={ - "results": [ - {"hello": "world"}, - ], - "next": f"{url}&page=2", - }, - ) - ) - respx.get(url + "&page=2").mock( - return_value=Response( - 200, - json={ - "results": [ - {"foo": "bar"}, - ], - # It's important to make it last page in tests. - # Otherwise it will be infinite loop :) - "next": None, - }, - ) - ) - - pretalx.download_latest_submissions() + url = "https://pretalx.com/api/events/ep2025/submissions?questions=all" + data = submissions_pages_generator(url) + respx.get(url).mock(return_value=next(data)) + respx.get(url + "&page=2").mock(return_value=next(data)) - pd = PretalxData.objects.get(endpoint=PretalxData.PretalxEndpoints.submissions) + pretalx.download_latest_submissions("ep2025") - assert pd.endpoint == "submissions" + pd = PretalxData.objects.get(resource=PretalxData.PretalxResources.submissions) + assert pd.resource == "submissions" assert pd.content == [ {"hello": "world"}, {"foo": "bar"}, ] + @respx.mock @pytest.mark.django_db def test_download_latest_speakers(): - endpoint = pretalx.RESOURCES[PretalxData.PretalxEndpoints.speakers] - url = pretalx.base_url + endpoint - respx.get(url).mock( - return_value=Response( - 200, - json={ - "results": [ - {"hello": "world"}, - ], - "next": f"{url}&page=2", - }, - ) - ) - respx.get(url + "&page=2").mock( - return_value=Response( - 200, - json={ - "results": [ - {"foo": "bar"}, - ], - # It's important to make it last page in tests. - # Otherwise it will be infinite loop :) - "next": None, - }, - ) - ) + url = "https://pretalx.com/api/events/ep2025/speakers?questions=all" + data = speaker_pages_generator(url) + respx.get(url).mock(return_value=next(data)) + respx.get(url + "&page=2").mock(return_value=next(data)) - pretalx.download_latest_speakers() + pretalx.download_latest_speakers("ep2025") - pd = PretalxData.objects.get(endpoint=PretalxData.PretalxEndpoints.speakers) - - assert pd.endpoint == "speakers" + pd = PretalxData.objects.get(resource=PretalxData.PretalxResources.speakers) + assert pd.resource == "speakers" assert pd.content == [ {"hello": "world"}, {"foo": "bar"}, ] - From 8e7fac91f07db7b3dd8bfe0f9ba0d306e2eac2ee Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 13:32:21 +0200 Subject: [PATCH 04/14] PretalxData admin sanity check --- intbot/core/admin.py | 9 ++++++--- intbot/tests/test_admin.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/intbot/core/admin.py b/intbot/core/admin.py index ec34cc2..5047065 100644 --- a/intbot/core/admin.py +++ b/intbot/core/admin.py @@ -70,14 +70,17 @@ def content_short(self, obj: DiscordMessage): class PretalxDataAdmin(admin.ModelAdmin): list_display = [ "uuid", - "endpoint", + "resource", "created_at", "modified_at", ] - list_filter = ["created_at"] + list_filter = [ + "created_at", + "resource", + ] readonly_fields = fields = [ "uuid", - "endpoint", + "resource", "pretty_content", "created_at", "modified_at", diff --git a/intbot/tests/test_admin.py b/intbot/tests/test_admin.py index d731edc..70252f4 100644 --- a/intbot/tests/test_admin.py +++ b/intbot/tests/test_admin.py @@ -2,7 +2,7 @@ Sanity checks (mostly) if the admin resources are available """ -from core.models import DiscordMessage, Webhook +from core.models import DiscordMessage, PretalxData, Webhook def test_admin_for_webhooks_sanity_check(admin_client): @@ -32,3 +32,32 @@ def test_admin_for_discordmessages_sanity_check(admin_client): assert str(dm.uuid).encode() in response.content assert dm.channel_id.encode() in response.content assert dm.channel_name.encode() in response.content + + +def test_admin_for_pretalx_data_sanity_check(admin_client): + url = "/admin/core/pretalxdata/" + pd = PretalxData.objects.create( + resource=PretalxData.PretalxResources.speakers, + content={}, + ) + assert pd.uuid + + response = admin_client.get(url) + + assert response.status_code == 200 + assert str(pd.uuid).encode() in response.content + assert pd.get_resource_display().encode() in response.content + +def test_admin_for_pretalx_data_sanity_check(admin_client): + url = "/admin/core/pretalxdata/" + pd = PretalxData.objects.create( + resource=PretalxData.PretalxResources.speakers, + content={}, + ) + assert pd.uuid + + response = admin_client.get(f"{url}{pd.pk}/change/") + + assert response.status_code == 200 + assert str(pd.uuid).encode() in response.content + assert pd.get_resource_display().encode() in response.content From 47f90246bd5e38ea3a82a75a23395c3758565bd9 Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 13:42:38 +0200 Subject: [PATCH 05/14] fix urls --- intbot/core/integrations/pretalx.py | 12 +++++++----- intbot/tests/test_integrations/test_pretalx.py | 16 ++++++++-------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py index e8af5ab..d4a3b22 100644 --- a/intbot/core/integrations/pretalx.py +++ b/intbot/core/integrations/pretalx.py @@ -8,16 +8,17 @@ logger = logging.getLogger(__name__) PRETALX_EVENTS = [ - "ep2023", - "ep2024", - "ep2025", + "europython-2022", + "europython-2023", + "europython-2024", + "europython-2025", ] ENDPOINTS = { # Questions need to be passed to include answers in the same endpoint, # saving us later time with joining the answers. - PretalxData.PretalxResources.submissions: "submissions?questions=all", - PretalxData.PretalxResources.speakers: "speakers?questions=all", + PretalxData.PretalxResources.submissions: "submissions/?questions=all", + PretalxData.PretalxResources.speakers: "speakers/?questions=all", } @@ -54,6 +55,7 @@ def fetch_pretalx_data(event: str, resource: PretalxData.PretalxResources) -> li response = httpx.get(url, headers=headers) if response.status_code != 200: + breakpoint() raise Exception(f"Error {response.status_code}: {response.text}") logger.info("Fetching data from %s, page %s", url, page) diff --git a/intbot/tests/test_integrations/test_pretalx.py b/intbot/tests/test_integrations/test_pretalx.py index 60d791a..0576cf7 100644 --- a/intbot/tests/test_integrations/test_pretalx.py +++ b/intbot/tests/test_integrations/test_pretalx.py @@ -64,13 +64,13 @@ def speaker_pages_generator(url): @respx.mock def test_fetch_submissions_from_pretalx(): - url = "https://pretalx.com/api/events/ep2025/submissions?questions=all" + url = "https://pretalx.com/api/events/europython-2025/submissions/?questions=all" data = submissions_pages_generator(url) respx.get(url).mock(return_value=next(data)) respx.get(url + "&page=2").mock(return_value=next(data)) submissions = pretalx.fetch_pretalx_data( - "ep2025", + "europython-2025", PretalxData.PretalxResources.submissions, ) @@ -82,13 +82,13 @@ def test_fetch_submissions_from_pretalx(): @respx.mock def test_fetch_speakers_from_pretalx(): - url = "https://pretalx.com/api/events/ep2025/speakers?questions=all" + url = "https://pretalx.com/api/events/europython-2025/speakers/?questions=all" data = speaker_pages_generator(url) respx.get(url).mock(return_value=next(data)) respx.get(url + "&page=2").mock(return_value=next(data)) submissions = pretalx.fetch_pretalx_data( - "ep2025", + "europython-2025", PretalxData.PretalxResources.speakers, ) @@ -101,12 +101,12 @@ def test_fetch_speakers_from_pretalx(): @respx.mock @pytest.mark.django_db def test_download_latest_submissions(): - url = "https://pretalx.com/api/events/ep2025/submissions?questions=all" + url = "https://pretalx.com/api/events/europython-2025/submissions/?questions=all" data = submissions_pages_generator(url) respx.get(url).mock(return_value=next(data)) respx.get(url + "&page=2").mock(return_value=next(data)) - pretalx.download_latest_submissions("ep2025") + pretalx.download_latest_submissions("europython-2025") pd = PretalxData.objects.get(resource=PretalxData.PretalxResources.submissions) assert pd.resource == "submissions" @@ -119,12 +119,12 @@ def test_download_latest_submissions(): @respx.mock @pytest.mark.django_db def test_download_latest_speakers(): - url = "https://pretalx.com/api/events/ep2025/speakers?questions=all" + url = "https://pretalx.com/api/events/europython-2025/speakers/?questions=all" data = speaker_pages_generator(url) respx.get(url).mock(return_value=next(data)) respx.get(url + "&page=2").mock(return_value=next(data)) - pretalx.download_latest_speakers("ep2025") + pretalx.download_latest_speakers("europython-2025") pd = PretalxData.objects.get(resource=PretalxData.PretalxResources.speakers) assert pd.resource == "speakers" From 8dbb56179cbeea071e27359a0481adfdda455464 Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 13:43:21 +0200 Subject: [PATCH 06/14] add schema migration --- .../migrations/0005_add_pretalx_data_model.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 intbot/core/migrations/0005_add_pretalx_data_model.py diff --git a/intbot/core/migrations/0005_add_pretalx_data_model.py b/intbot/core/migrations/0005_add_pretalx_data_model.py new file mode 100644 index 0000000..16a2b73 --- /dev/null +++ b/intbot/core/migrations/0005_add_pretalx_data_model.py @@ -0,0 +1,26 @@ +# Generated by Django 5.1.4 on 2025-04-18 11:43 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0004_add_inbox_item_model'), + ] + + operations = [ + migrations.CreateModel( + name='PretalxData', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('uuid', models.UUIDField(default=uuid.uuid4)), + ('resource', models.CharField(choices=[('submissions', 'Submissions'), ('speakers', 'Speakers'), ('schedule', 'Schedule')], max_length=255)), + ('content', models.JSONField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('modified_at', models.DateTimeField(auto_now=True)), + ('processed_at', models.DateTimeField(blank=True, null=True)), + ], + ), + ] From f8e1aa79ab59663796bc4855b69a40b9e3dd4b6f Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 13:44:22 +0200 Subject: [PATCH 07/14] fix lint --- intbot/tests/test_admin.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/intbot/tests/test_admin.py b/intbot/tests/test_admin.py index 70252f4..fd537ce 100644 --- a/intbot/tests/test_admin.py +++ b/intbot/tests/test_admin.py @@ -34,7 +34,8 @@ def test_admin_for_discordmessages_sanity_check(admin_client): assert dm.channel_name.encode() in response.content -def test_admin_for_pretalx_data_sanity_check(admin_client): +def test_admin_list_for_pretalx_data(admin_client): + """Simple sanity check if the page loads correctly""" url = "/admin/core/pretalxdata/" pd = PretalxData.objects.create( resource=PretalxData.PretalxResources.speakers, @@ -48,7 +49,9 @@ def test_admin_for_pretalx_data_sanity_check(admin_client): assert str(pd.uuid).encode() in response.content assert pd.get_resource_display().encode() in response.content -def test_admin_for_pretalx_data_sanity_check(admin_client): + +def test_admin_change_for_pretalx_data(admin_client): + """Simple sanity check if the page loads correctly""" url = "/admin/core/pretalxdata/" pd = PretalxData.objects.create( resource=PretalxData.PretalxResources.speakers, From 5bf441c2307609d11e296be858d966930342ac9f Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 13:44:48 +0200 Subject: [PATCH 08/14] fix format --- intbot/core/integrations/pretalx.py | 4 +- .../migrations/0005_add_pretalx_data_model.py | 37 ++++++++++++++----- intbot/core/models.py | 2 +- .../tests/test_integrations/test_pretalx.py | 1 + 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py index d4a3b22..1b19af6 100644 --- a/intbot/core/integrations/pretalx.py +++ b/intbot/core/integrations/pretalx.py @@ -31,7 +31,9 @@ def get_event_url(event): return f"https://pretalx.com/api/events/{event}/" -def fetch_pretalx_data(event: str, resource: PretalxData.PretalxResources) -> list[JsonType]: +def fetch_pretalx_data( + event: str, resource: PretalxData.PretalxResources +) -> list[JsonType]: headers = { "Authorization": f"Token {settings.PRETALX_API_TOKEN}", "Content-Type": "application/json", diff --git a/intbot/core/migrations/0005_add_pretalx_data_model.py b/intbot/core/migrations/0005_add_pretalx_data_model.py index 16a2b73..e3d1a9c 100644 --- a/intbot/core/migrations/0005_add_pretalx_data_model.py +++ b/intbot/core/migrations/0005_add_pretalx_data_model.py @@ -5,22 +5,39 @@ class Migration(migrations.Migration): - dependencies = [ - ('core', '0004_add_inbox_item_model'), + ("core", "0004_add_inbox_item_model"), ] operations = [ migrations.CreateModel( - name='PretalxData', + name="PretalxData", fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('uuid', models.UUIDField(default=uuid.uuid4)), - ('resource', models.CharField(choices=[('submissions', 'Submissions'), ('speakers', 'Speakers'), ('schedule', 'Schedule')], max_length=255)), - ('content', models.JSONField()), - ('created_at', models.DateTimeField(auto_now_add=True)), - ('modified_at', models.DateTimeField(auto_now=True)), - ('processed_at', models.DateTimeField(blank=True, null=True)), + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("uuid", models.UUIDField(default=uuid.uuid4)), + ( + "resource", + models.CharField( + choices=[ + ("submissions", "Submissions"), + ("speakers", "Speakers"), + ("schedule", "Schedule"), + ], + max_length=255, + ), + ), + ("content", models.JSONField()), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("modified_at", models.DateTimeField(auto_now=True)), + ("processed_at", models.DateTimeField(blank=True, null=True)), ], ), ] diff --git a/intbot/core/models.py b/intbot/core/models.py index 8bb4de4..3192ab0 100644 --- a/intbot/core/models.py +++ b/intbot/core/models.py @@ -98,7 +98,7 @@ class PretalxResources(models.TextChoices): schedule = "schedule", "Schedule" uuid = models.UUIDField(default=uuid.uuid4) - resource= models.CharField( + resource = models.CharField( max_length=255, choices=PretalxResources.choices, ) diff --git a/intbot/tests/test_integrations/test_pretalx.py b/intbot/tests/test_integrations/test_pretalx.py index 0576cf7..382c53d 100644 --- a/intbot/tests/test_integrations/test_pretalx.py +++ b/intbot/tests/test_integrations/test_pretalx.py @@ -33,6 +33,7 @@ def submissions_pages_generator(url): }, ) + def speaker_pages_generator(url): """ Generator to simulate pagination. From 58aa3c50a6d847e1dd08f5f0cd5317c4f4be9a0a Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 14:03:41 +0200 Subject: [PATCH 09/14] add management command --- .../commands/download_pretalx_data.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 intbot/core/management/commands/download_pretalx_data.py diff --git a/intbot/core/management/commands/download_pretalx_data.py b/intbot/core/management/commands/download_pretalx_data.py new file mode 100644 index 0000000..86da3e5 --- /dev/null +++ b/intbot/core/management/commands/download_pretalx_data.py @@ -0,0 +1,28 @@ +from core.integrations.pretalx import ( + PRETALX_EVENTS, + download_latest_speakers, + download_latest_submissions, +) +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + help = "Downloads latest pretalx data" + + def add_arguments(self, parser): + # Add keyword argument event + parser.add_argument( + "--event", + choices=PRETALX_EVENTS, + help="slug of the event (for example `europython-2025`)", + required=True, + ) + + def handle(self, **kwargs): + event = kwargs["event"] + + self.stdout.write(f"Downloading latest speakers from pretalx... {event}") + download_latest_speakers(event) + + self.stdout.write(f"Downloading latest submissions from pretalx... {event}") + download_latest_submissions(event) From 9461e9cf66e888bffacebe8afd44ec34589dd031 Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 14:13:46 +0200 Subject: [PATCH 10/14] add basic support for cron jobs --- deploy/playbooks/04_cron.yml | 9 +++++++++ deploy/templates/app/Makefile.app.j2 | 12 ++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 deploy/playbooks/04_cron.yml diff --git a/deploy/playbooks/04_cron.yml b/deploy/playbooks/04_cron.yml new file mode 100644 index 0000000..76284f1 --- /dev/null +++ b/deploy/playbooks/04_cron.yml @@ -0,0 +1,9 @@ +- name: Scheduled tasks using the bot user + hosts: intbot_app + + tasks: + - name: "Download pretalx data every hour" + ansible.builtin.cron: + name: "Download pretalx data every hour" + minute: "5" # run on the 5th minute of every hour + job: "make prod/cron/pretalx" diff --git a/deploy/templates/app/Makefile.app.j2 b/deploy/templates/app/Makefile.app.j2 index a000963..b247ea6 100644 --- a/deploy/templates/app/Makefile.app.j2 +++ b/deploy/templates/app/Makefile.app.j2 @@ -1,18 +1,22 @@ +MAKE_APP="docker compose run app make" echo: "Dummy target, to not run something accidentally" prod/migrate: - docker compose run app make in-container/migrate + $(MAKE_APP) in-container/migrate prod/shell: - docker compose run app make in-container/shell + $(MAKE_APP) in-container/shell prod/db_shell: - docker compose run app make in-container/db_shell + $(MAKE_APP) in-container/db_shell prod/manage: - docker compose run app make in-container/manage ARG=$(ARG) + $(MAKE_APP) in-container/manage ARG=$(ARG) + +prod/cron/pretalx: + $(MAKE_APP) in-container/manage ARG="download_pretalx_data --event=europython-2025" logs: docker compose logs -f From 7fafb6570dba885a64d96eea6f4ab936fe432181 Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 14:14:59 +0200 Subject: [PATCH 11/14] fix typo --- intbot/core/integrations/pretalx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py index 1b19af6..e7bfb76 100644 --- a/intbot/core/integrations/pretalx.py +++ b/intbot/core/integrations/pretalx.py @@ -44,7 +44,7 @@ def fetch_pretalx_data( url = f"{base_url}{endpoint}" # Pretalx paginates the output, so we will need to do multiple requests and - # then merge mutliple pages to one big dictionary + # then merge multiple pages to one big dictionary results = [] data = {"next": url} page = 0 From 01b03d0280b4ff38926a1356f92ea31d8e16fb7b Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 14:17:42 +0200 Subject: [PATCH 12/14] tweak pagination --- intbot/core/integrations/pretalx.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py index e7bfb76..d8475e5 100644 --- a/intbot/core/integrations/pretalx.py +++ b/intbot/core/integrations/pretalx.py @@ -46,13 +46,12 @@ def fetch_pretalx_data( # Pretalx paginates the output, so we will need to do multiple requests and # then merge multiple pages to one big dictionary results = [] - data = {"next": url} page = 0 - # This takes advantage of the fact that "next" will contain a url to the + # This takes advantage of the fact that url will contain a url to the # next page, until there is more data to fetch. If this is the last page, - # then the data["next"] will be None (falsy), and thus stop the while loop. - while url := data["next"]: + # then the url will be None (falsy), and thus stop the while loop. + while url: page += 1 response = httpx.get(url, headers=headers) @@ -64,6 +63,7 @@ def fetch_pretalx_data( data = response.json() results += data["results"] + url = data["next"] return results From c4136b61e5044c0ee279a8f76407b89b7832ba48 Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Fri, 18 Apr 2025 14:18:05 +0200 Subject: [PATCH 13/14] remove extra breakpoint --- intbot/core/integrations/pretalx.py | 1 - 1 file changed, 1 deletion(-) diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py index d8475e5..29cef5b 100644 --- a/intbot/core/integrations/pretalx.py +++ b/intbot/core/integrations/pretalx.py @@ -56,7 +56,6 @@ def fetch_pretalx_data( response = httpx.get(url, headers=headers) if response.status_code != 200: - breakpoint() raise Exception(f"Error {response.status_code}: {response.text}") logger.info("Fetching data from %s, page %s", url, page) From 86fd6ab0837fedc68838ec37f33b9452139a6524 Mon Sep 17 00:00:00 2001 From: Artur Czepiel Date: Thu, 24 Apr 2025 17:26:54 +0200 Subject: [PATCH 14/14] Update intbot/core/integrations/pretalx.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mia Bajić <38294198+clytaemnestra@users.noreply.github.com> --- intbot/core/integrations/pretalx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intbot/core/integrations/pretalx.py b/intbot/core/integrations/pretalx.py index 29cef5b..17be122 100644 --- a/intbot/core/integrations/pretalx.py +++ b/intbot/core/integrations/pretalx.py @@ -25,7 +25,7 @@ JsonType = dict[str, Any] -def get_event_url(event): +def get_event_url(event: str) -> str: assert event in PRETALX_EVENTS return f"https://pretalx.com/api/events/{event}/"