Skip to content

Commit

Permalink
fix: Validate CRON expressions (meltano#6592)
Browse files Browse the repository at this point in the history
* Accept starry cron expressions in JSON schema

* Use croniter to validate cron expressions

* Use single-line match

* Don't validate @once interval

* Use valid cron expressions in CLI tests

* Update schema/meltano.schema.json

Co-authored-by: Will Da Silva <will@willdasilva.xyz>

Co-authored-by: Will Da Silva <will@willdasilva.xyz>
  • Loading branch information
edgarrmondragon and WillDaSilva committed Aug 9, 2022
1 parent d14ff93 commit 6ad4081
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 26 deletions.
121 changes: 115 additions & 6 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ backoff = "^2.1.2"
bcrypt = "^3.2.0" # Needs to be installed explicitly so it can be used as a backend for passlib
click = "^8.1"
click-default-group = "^1.2.1"
croniter = "^1.3.5"
email-validator = "^1.1.2"
fasteners = "^0.17.3"
flask = "^2.1"
Expand Down
4 changes: 2 additions & 2 deletions schema/meltano.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -769,8 +769,8 @@
"interval": {
"type": "string",
"description": "A UNIX cron expression to represent the frequency the scheduled job should execute",
"examples": ["@hourly", "@daily", "@weekly"],
"pattern": "^(@(hourly|daily|weekly|monthly|yearly|once))$"
"examples": ["@hourly", "@daily", "@weekly", "0 0 * * *"],
"pattern": "^((@(hourly|daily|weekly|monthly|yearly|once))|((((\\d+,)+\\d+|(\\d+(\\/|-)\\d+)|\\d+|\\*) ?){5,6}))$"
},
"transform": {
"type": "string",
Expand Down
16 changes: 8 additions & 8 deletions src/meltano/core/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ class Schedule(NameEq, Canonical): # noqa: WPS230

def __init__(
self,
name: str = None,
extractor: str = None,
loader: str = None,
transform: str = None,
interval: str = None,
start_date: datetime = None,
job: str = None,
env: dict = None,
name: str | None = None,
extractor: str | None = None,
loader: str | None = None,
transform: str | None = None,
interval: str | None = None,
start_date: datetime.datetime | None = None,
job: str | None = None,
env: dict | None = None,
):
"""Initialize a Schedule.
Expand Down
22 changes: 22 additions & 0 deletions src/meltano/core/schedule_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import subprocess
from datetime import date, datetime

from croniter import croniter

from meltano.core.setting_definition import SettingMissingError

from .meltano_invoker import MeltanoInvoker
Expand Down Expand Up @@ -56,6 +58,18 @@ def __init__(self, namespace: str):
self.namespace = namespace


class BadCronError(Exception):
"""Occurs when a cron expression is invalid."""

def __init__(self, cron: str):
"""Initialize the exception.
Args:
cron: The cron expression that is invalid.
"""
self.cron = cron


class ScheduleService:
"""Service for managing schedules."""

Expand Down Expand Up @@ -174,8 +188,16 @@ def add_schedule(self, schedule: Schedule) -> Schedule:
The added schedule.
Raises:
BadCronError: If the cron expression is invalid.
ScheduleAlreadyExistsError: If a schedule with the same name already exists.
"""
if (
schedule.interval is not None
and schedule.interval != "@once"
and not croniter.is_valid(schedule.interval)
):
raise BadCronError(schedule.interval)

with self.project.meltano_update() as meltano:
# guard if it already exists
if schedule in meltano.schedules:
Expand Down
14 changes: 7 additions & 7 deletions tests/meltano/cli/test_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_schedule_add(self, get, session, project, cli_runner, schedule_service)
"--loader",
"target-mock",
"--interval",
"@eon",
"@yearly",
"--transform",
"run",
],
Expand All @@ -45,7 +45,7 @@ def test_schedule_add(self, get, session, project, cli_runner, schedule_service)
assert schedule.extractor == "tap-mock"
assert schedule.loader == "target-mock"
assert schedule.transform == "run"
assert schedule.interval == "@eon" # not anytime soon ;)
assert schedule.interval == "@yearly" # not anytime soon ;)
assert schedule.start_date == iso8601_datetime(test_date)

# test adding a scheduled job
Expand All @@ -61,7 +61,7 @@ def test_schedule_add(self, get, session, project, cli_runner, schedule_service)
"--job",
"mock-job",
"--interval",
"@eon",
"@yearly",
],
)
assert res.exit_code == 0
Expand All @@ -71,7 +71,7 @@ def test_schedule_add(self, get, session, project, cli_runner, schedule_service)

assert schedule.name == "job-schedule-mock"
assert schedule.job == "mock-job"
assert schedule.interval == "@eon" # not anytime soon ;)
assert schedule.interval == "@yearly" # not anytime soon ;)

# test default schedule case where no argument (set, remove, add, etc) is provided
with mock.patch(
Expand All @@ -85,7 +85,7 @@ def test_schedule_add(self, get, session, project, cli_runner, schedule_service)
"--job",
"mock-job",
"--interval",
"@eon",
"@yearly",
],
)
assert res.exit_code == 0
Expand All @@ -95,7 +95,7 @@ def test_schedule_add(self, get, session, project, cli_runner, schedule_service)

assert schedule.name == "job-schedule-mock"
assert schedule.job == "mock-job"
assert schedule.interval == "@eon" # not anytime soon ;)
assert schedule.interval == "@yearly" # not anytime soon ;)

# verify that you can't use job and elt flags together
with mock.patch(
Expand All @@ -114,7 +114,7 @@ def test_schedule_add(self, get, session, project, cli_runner, schedule_service)
"--loader",
"target-mock",
"--interval",
"@eon",
"@yearly",
"--transform",
"run",
],
Expand Down
12 changes: 9 additions & 3 deletions tests/meltano/core/test_schedule_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from meltano.core.plugin.project_plugin import ProjectPlugin
from meltano.core.project_plugins_service import PluginAlreadyAddedException
from meltano.core.schedule_service import (
BadCronError,
Schedule,
ScheduleAlreadyExistsError,
ScheduleDoesNotExistError,
Expand Down Expand Up @@ -88,6 +89,11 @@ def test_add_schedules(self, subject, create_elt_schedule, create_job_schedule):
with pytest.raises(ScheduleAlreadyExistsError):
subject.add_schedule(all_schedules[0])

with pytest.raises(BadCronError) as excinfo:
subject.add_schedule(create_elt_schedule("bad-cron", interval="bad_cron"))

assert "bad_cron" in str(excinfo.value)

def test_remove_schedule(self, subject):
if platform.system() == "Windows":
pytest.xfail(
Expand Down Expand Up @@ -115,14 +121,14 @@ def test_remove_schedule(self, subject):
def test_schedule_update(self, subject):
schedule = subject.schedules()[0]

schedule.interval = "@pytest"
schedule.interval = "@yearly"
subject.update_schedule(schedule)

# there should be only 1 element with the set interval
assert sum(sbj.interval == "@pytest" for sbj in subject.schedules()) == 1
assert sum(sbj.interval == "@yearly" for sbj in subject.schedules()) == 1

# it should be the first element
assert subject.schedules()[0].interval == "@pytest"
assert subject.schedules()[0].interval == "@yearly"

# it should be a copy of the original element
assert schedule is not subject.schedules()[0]
Expand Down

0 comments on commit 6ad4081

Please sign in to comment.