From 91a1d17260f8b2dcf35750518c1aa78bb47886ee Mon Sep 17 00:00:00 2001 From: Daniel Monzonis Date: Tue, 18 Jun 2024 12:52:47 +0200 Subject: [PATCH] Add switch to turn collectors on/off This commit implements a way to turn off each collector using an environment variable. Turning it off means it won't be caught by celery, and therefore it won't be run automatically, although the collectors can still be run manually from a shell even if they have been turned off. There is an environment variable for each of the collector groups: Bugzilla, Jira, Errata Tool, NVD and OSV. This means they can each be turned on/off individually. The collectors are turned on by default. Closes OSIDB-2884. --- .secrets.baseline | 4 ++-- collectors/bzimport/constants.py | 9 +++++++++ collectors/bzimport/tasks.py | 9 +++++++++ collectors/errata/constants.py | 5 +++++ collectors/errata/tasks.py | 3 ++- collectors/framework/models.py | 8 ++++++++ collectors/jiraffe/constants.py | 8 ++++++++ collectors/jiraffe/tasks.py | 3 +++ collectors/nvd/constants.py | 4 ++++ collectors/nvd/tasks.py | 2 ++ collectors/osv/constants.py | 4 ++++ collectors/osv/tasks.py | 2 ++ docker-compose.yml | 16 ++++++++++++++++ docs/CHANGELOG.md | 3 +++ docs/developer/DEVELOP.md | 10 ++++++++++ 15 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 collectors/nvd/constants.py create mode 100644 collectors/osv/constants.py diff --git a/.secrets.baseline b/.secrets.baseline index a056022d5..714eb1a45 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -322,7 +322,7 @@ "filename": "docs/developer/DEVELOP.md", "hashed_secret": "7c6a61c68ef8b9b6b061b28c348bc1ed7921cb53", "is_verified": false, - "line_number": 96, + "line_number": 106, "is_secret": false } ], @@ -510,5 +510,5 @@ } ] }, - "generated_at": "2024-06-17T14:01:22Z" + "generated_at": "2024-06-19T08:46:34Z" } diff --git a/collectors/bzimport/constants.py b/collectors/bzimport/constants.py index 5fccbc159..50ba083a8 100644 --- a/collectors/bzimport/constants.py +++ b/collectors/bzimport/constants.py @@ -26,3 +26,12 @@ # Maximum age of connection to Bugzilla in seconds, recommended value is 60 BZ_MAX_CONNECTION_AGE = get_env("BZ_MAX_CONNECTION_AGE") + +# Switches to turn each collector on/off +FLAW_COLLECTOR_ENABLED = get_env("FLAW_COLLECTOR_ENABLED", default="True", is_bool=True) +BZ_TRACKER_COLLECTOR_ENABLED = get_env( + "BZ_TRACKER_COLLECTOR_ENABLED", default="True", is_bool=True +) +BZ_METADATA_COLLECTOR_ENABLED = get_env( + "BZ_METADATA_COLLECTOR_ENABLED", default="True", is_bool=True +) diff --git a/collectors/bzimport/tasks.py b/collectors/bzimport/tasks.py index c2d613046..8b62ebe88 100644 --- a/collectors/bzimport/tasks.py +++ b/collectors/bzimport/tasks.py @@ -1,12 +1,18 @@ """ Bugzilla collector celery tasks """ + from celery.schedules import crontab from celery.utils.log import get_task_logger from collectors.framework.models import collector from .collectors import BugzillaTrackerCollector, FlawCollector, MetadataCollector +from .constants import ( + BZ_METADATA_COLLECTOR_ENABLED, + BZ_TRACKER_COLLECTOR_ENABLED, + FLAW_COLLECTOR_ENABLED, +) logger = get_task_logger(__name__) @@ -15,6 +21,7 @@ base=FlawCollector, crontab=crontab(), depends_on=["collectors.product_definitions.tasks.product_definitions_collector"], + enabled=FLAW_COLLECTOR_ENABLED, ) def flaw_collector(collector_obj): """bugzilla flaw collector""" @@ -26,6 +33,7 @@ def flaw_collector(collector_obj): base=BugzillaTrackerCollector, crontab=crontab(), depends_on=["collectors.bzimport.tasks.flaw_collector"], + enabled=BZ_TRACKER_COLLECTOR_ENABLED, ) def bztracker_collector(collector_obj): logger.info(f"Collector {collector_obj.name} is running") @@ -37,6 +45,7 @@ def bztracker_collector(collector_obj): # run once a day at 3:03 crontab=crontab(hour=3, minute=3), depends_on=["collectors.product_definitions.tasks.product_definitions_collector"], + enabled=BZ_METADATA_COLLECTOR_ENABLED, ) def metadata_collector(collector_obj): """ diff --git a/collectors/errata/constants.py b/collectors/errata/constants.py index a1194d2ca..df9810156 100644 --- a/collectors/errata/constants.py +++ b/collectors/errata/constants.py @@ -15,3 +15,8 @@ ERRATA_TOOL_SERVER = get_env("ET_URL") ERRATA_TOOL_XMLRPC_BASE_URL = f"{ERRATA_TOOL_SERVER}/errata/errata_service" + +# Switch to turn the collector on/off +ERRATA_COLLECTOR_ENABLED = get_env( + "ERRATA_COLLECTOR_ENABLED", default="True", is_bool=True +) diff --git a/collectors/errata/tasks.py b/collectors/errata/tasks.py index a92d42fec..b50ee8d84 100644 --- a/collectors/errata/tasks.py +++ b/collectors/errata/tasks.py @@ -7,7 +7,7 @@ from collectors.framework.models import collector from osidb.models import Erratum -from .constants import ERRATA_TOOL_SERVER +from .constants import ERRATA_COLLECTOR_ENABLED, ERRATA_TOOL_SERVER from .core import ( get_all_errata, get_batch_end, @@ -27,6 +27,7 @@ "collectors.bzimport.tasks.bztracker_collector", "collectors.jiraffe.tasks.jira_tracker_collector", ], + enabled=ERRATA_COLLECTOR_ENABLED, ) def errata_collector(collector_obj) -> str: """Errata Tool collector""" diff --git a/collectors/framework/models.py b/collectors/framework/models.py index e06fd394a..5c76b86bc 100644 --- a/collectors/framework/models.py +++ b/collectors/framework/models.py @@ -469,6 +469,7 @@ def collector( data_models: Optional[List[Type[models.Model]]] = None, depends_on: Optional[List[str]] = None, dry_run: Optional[bool] = None, + enabled: Optional[bool] = True, ): """ collector definition decorator @@ -481,6 +482,8 @@ def collector( depends on, may be left None dry_run - determines whether the collector saves the data or just logs them, may be left None + enabled: - whether the collector is to be executed as a celery + task or not """ def wrapper(func): @@ -488,6 +491,11 @@ def wrapper(func): if crontab is None: raise RuntimeError("Collector crontab must be defined") + if not enabled: + # Return the original function so it can still be called from a shell, + # but do not register it in celery + return func + name = Collector.get_name_from_entity(func) # register collector to celery beat diff --git a/collectors/jiraffe/constants.py b/collectors/jiraffe/constants.py index 1ebc09f9a..5de95cb22 100644 --- a/collectors/jiraffe/constants.py +++ b/collectors/jiraffe/constants.py @@ -15,3 +15,11 @@ # Jira label containing Bugzilla ID JIRA_BZ_ID_LABEL_RE = re.compile(r"flaw:bz#(\d+)") + +# Switches to turn each collector on/off +JIRA_TRACKER_COLLECTOR_ENABLED = get_env( + "JIRA_TRACKER_COLLECTOR_ENABLED", default="True", is_bool=True +) +JIRA_METADATA_COLLECTOR_ENABLED = get_env( + "JIRA_METADATA_COLLECTOR_ENABLED", default="True", is_bool=True +) diff --git a/collectors/jiraffe/tasks.py b/collectors/jiraffe/tasks.py index 14d1f7312..dc73cdae8 100644 --- a/collectors/jiraffe/tasks.py +++ b/collectors/jiraffe/tasks.py @@ -8,6 +8,7 @@ from osidb.models import Tracker from .collectors import JiraTrackerCollector, MetadataCollector +from .constants import JIRA_METADATA_COLLECTOR_ENABLED, JIRA_TRACKER_COLLECTOR_ENABLED logger = get_task_logger(__name__) @@ -17,6 +18,7 @@ crontab=crontab(), # run every minute data_models=[Tracker], depends_on=["collectors.bzimport.tasks.flaw_collector"], + enabled=JIRA_TRACKER_COLLECTOR_ENABLED, ) def jira_tracker_collector(collector_obj): logger.info(f"Collector {collector_obj.name} is running") @@ -28,6 +30,7 @@ def jira_tracker_collector(collector_obj): # run once a day at 2:35 crontab=crontab(hour=2, minute=35), depends_on=["collectors.product_definitions.tasks.product_definitions_collector"], + enabled=JIRA_METADATA_COLLECTOR_ENABLED, ) def metadata_collector(collector_obj): """ diff --git a/collectors/nvd/constants.py b/collectors/nvd/constants.py new file mode 100644 index 000000000..a8dbb6660 --- /dev/null +++ b/collectors/nvd/constants.py @@ -0,0 +1,4 @@ +from osidb.helpers import get_env + +# Switch to turn the collector on/off +NVD_COLLECTOR_ENABLED = get_env("NVD_COLLECTOR_ENABLED", default="True", is_bool=True) diff --git a/collectors/nvd/tasks.py b/collectors/nvd/tasks.py index c86b4726e..609ce4435 100644 --- a/collectors/nvd/tasks.py +++ b/collectors/nvd/tasks.py @@ -7,6 +7,7 @@ from collectors.framework.models import collector from .collectors import NVDCollector +from .constants import NVD_COLLECTOR_ENABLED logger = get_task_logger(__name__) @@ -18,6 +19,7 @@ # and one day as it proceeds by 100 days starting at 1999 crontab=crontab(minute="*/10"), depends_on=["collectors.bzimport.tasks.flaw_collector"], + enabled=NVD_COLLECTOR_ENABLED, ) def nvd_collector(collector_obj) -> str: """NVD collector""" diff --git a/collectors/osv/constants.py b/collectors/osv/constants.py new file mode 100644 index 000000000..61c739025 --- /dev/null +++ b/collectors/osv/constants.py @@ -0,0 +1,4 @@ +from osidb.helpers import get_env + +# Switch to turn the collector on/off +OSV_COLLECTOR_ENABLED = get_env("OSV_COLLECTOR_ENABLED", default="True", is_bool=True) diff --git a/collectors/osv/tasks.py b/collectors/osv/tasks.py index 8f984f429..e5b508e11 100644 --- a/collectors/osv/tasks.py +++ b/collectors/osv/tasks.py @@ -4,6 +4,7 @@ from collectors.framework.models import collector from .collectors import OSVCollector +from .constants import OSV_COLLECTOR_ENABLED logger = get_task_logger(__name__) @@ -12,6 +13,7 @@ base=OSVCollector, crontab=crontab(minute=0, hour="*/1"), # Run every hour depends_on=["collectors.bzimport.tasks.flaw_collector"], + enabled=OSV_COLLECTOR_ENABLED, ) def osv_collector(collector_obj) -> str: logger.info(f"Collector {collector_obj.name} is running") diff --git a/docker-compose.yml b/docker-compose.yml index 02601c373..f20ca3203 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -151,6 +151,14 @@ services: PRODUCT_DEF_URL: ${PRODUCT_DEF_URL} PRODUCT_DEF_BRANCH: ${PRODUCT_DEF_BRANCH} PS_CONSTANTS_URL: ${PS_CONSTANTS_URL} + FLAW_COLLECTOR_ENABLED: ${FLAW_COLLECTOR_ENABLED} + BZ_TRACKER_COLLECTOR_ENABLED: ${BZ_TRACKER_COLLECTOR_ENABLED} + BZ_METADATA_COLLECTOR_ENABLED: ${BZ_METADATA_COLLECTOR_ENABLED} + ERRATA_COLLECTOR_ENABLED: ${ERRATA_COLLECTOR_ENABLED} + JIRA_TRACKER_COLLECTOR_ENABLED: ${JIRA_TRACKER_COLLECTOR_ENABLED} + JIRA_METADATA_COLLECTOR_ENABLED: ${JIRA_METADATA_COLLECTOR_ENABLED} + NVD_COLLECTOR_ENABLED: ${NVD_COLLECTOR_ENABLED} + OSV_COLLECTOR_ENABLED: ${OSV_COLLECTOR_ENABLED} depends_on: ["osidb-data", "osidb-service", "redis"] # See "NOTE about healthchecks": # depends_on: @@ -179,6 +187,14 @@ services: PRODUCT_DEF_URL: ${PRODUCT_DEF_URL} PRODUCT_DEF_BRANCH: ${PRODUCT_DEF_BRANCH} PS_CONSTANTS_URL: ${PS_CONSTANTS_URL} + FLAW_COLLECTOR_ENABLED: ${FLAW_COLLECTOR_ENABLED} + BZ_TRACKER_COLLECTOR_ENABLED: ${BZ_TRACKER_COLLECTOR_ENABLED} + BZ_METADATA_COLLECTOR_ENABLED: ${BZ_METADATA_COLLECTOR_ENABLED} + ERRATA_COLLECTOR_ENABLED: ${ERRATA_COLLECTOR_ENABLED} + JIRA_TRACKER_COLLECTOR_ENABLED: ${JIRA_TRACKER_COLLECTOR_ENABLED} + JIRA_METADATA_COLLECTOR_ENABLED: ${JIRA_METADATA_COLLECTOR_ENABLED} + NVD_COLLECTOR_ENABLED: ${NVD_COLLECTOR_ENABLED} + OSV_COLLECTOR_ENABLED: ${OSV_COLLECTOR_ENABLED} depends_on: ["osidb-data", "osidb-service", "redis"] # See "NOTE about healthchecks": # depends_on: diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 04c024bca..e20c1a20b 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +### Added +- Implement a way to switch off each collector (OSIDB-2884) + ### Changed - Update the SLA policy diff --git a/docs/developer/DEVELOP.md b/docs/developer/DEVELOP.md index 3193b5899..59adcdd39 100644 --- a/docs/developer/DEVELOP.md +++ b/docs/developer/DEVELOP.md @@ -89,6 +89,16 @@ OSIDB_CORS_ALLOW_HEADERS='["bugzilla-api-key", "jira-api-key"]' # To enable snippets creation in collectors (when date is not set, all snippets are created) SNIPPET_CREATION=1 SNIPPET_CREATION_START="2024-01-01" + +# Collector switches: set to 0 to turn each collector off, or 1 to turn it on (default) +FLAW_COLLECTOR_ENABLED=1 +BZ_TRACKER_COLLECTOR_ENABLED=1 +BZ_METADATA_COLLECTOR_ENABLED=1 +ERRATA_COLLECTOR_ENABLED=1 +JIRA_TRACKER_COLLECTOR_ENABLED=1 +JIRA_METADATA_COLLECTOR_ENABLED=1 +NVD_COLLECTOR_ENABLED=1 +OSV_COLLECTOR_ENABLED=1 ``` The `.env` file is loaded automatically by podman-compose. It is also loaded as environment variables in a few Makefile targets (run `grep -rF '.env ' mk/` to see which ones).