diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..2270d2a --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,23 @@ +# CODEOWNERS file (from GitHub template at +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners) +# Each line is a file pattern followed by one or more owners. + +################################################################################ +# These owners will be the default owners for everything in the repo. This is commented +# out in favor of using a team as the default (see below). It is left here as a comment +# to indicate the primary expert for this code. +# * @ghukill + +# Teams can be specified as code owners as well. Teams should be identified in +# the format @org/team-name. Teams must have explicit write access to the +# repository. +* @mitlibraries/dataeng + +# Infra should always been involved with changes to the build workflows +/.github/dev-build.yml @mitlibraries/infraeng +/.github/stage-build.yml @mitlibraries/infraeng +/.github/prod-promote.yml @mitlibraries/infraeng + +# We set the senior engineer in the team as the owner of the CODEOWNERS file as +# a layer of protection for unauthorized changes. +/.github/CODEOWNERS @ghukill \ No newline at end of file diff --git a/.gitignore b/.gitignore index 526ebcd..4f160e6 100644 --- a/.gitignore +++ b/.gitignore @@ -155,3 +155,5 @@ cython_debug/ .DS_Store output/ .vscode/ + +AGENTS.md \ No newline at end of file diff --git a/Makefile b/Makefile index 3a1ea7e..150463b 100644 --- a/Makefile +++ b/Makefile @@ -70,15 +70,48 @@ black-apply: # Apply changes with 'black' ruff-apply: # Resolve 'fixable errors' with 'ruff' uv run ruff check --fix . + +#################################### +# CLI +#################################### +cli-test-inline-run: + uv run python -m launcher.cli \ + run \ + --mount=tests/fixtures/inline_deps + +cli-test-reqs-txt-run: + uv run python -m launcher.cli \ + run \ + --mount=tests/fixtures/static_deps_reqs_txt \ + --requirements=requirements.txt + +cli-test-token-authenticated: + uv run python -m launcher.cli \ + run \ + --mount=tests/fixtures/inline_deps \ + --token="iamsecret" + #################################### # Docker #################################### -build: # Build local image for testing +docker-build: # Build local image for testing docker build -t marimo-launcher:latest . -shell: # Shell into local container for testing +docker-shell: # Shell into local container for testing docker run -it --entrypoint='bash' marimo-launcher:latest +docker-test-run: # Test local docker container with test fixture notebook + docker run \ + -p "2718:2718" \ + -v "$(CURDIR)/tests/fixtures:/tmp/fixtures" \ + -e NOTEBOOK_MOUNT="/tmp/fixtures" \ + -e NOTEBOOK_PATH="helloworld.py" \ + marimo-launcher:latest \ + run + +#################################### +# Terraform +#################################### ### Terraform-generated Developer Deploy Commands for Dev environment ### dist-dev: ## Build docker container (intended for developer-based manual build) diff --git a/README.md b/README.md index 5623e2a..a6ada8b 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,82 @@ - To lint the repo: `make lint` - To run the app: `uv run launcher --help` +## Overview + +This CLI is an application to launch _other_ Marimo notebooks. There are two ways in which this CLI can launch a notebook: + +1. The notebook is available on the same machine as the CLI, e.g. mounted into the Docker container +2. A Github repository is passed and cloned by the CLI that contains a notebook + +Because this CLI is meant to launch notebooks, it does not have a dedicated ECS task or service. + +Take a fictional example of a notebook called "Analyze All the Things (AATT)" in the repository `marimo-aatt`. To provide this notebook for use, an ECS task would be created that sets two important environment variables: + + - `NOTEBOOK_REPOSITORY=https://github.com/MITLibraries/marimo-aatt` + - `NOTEBOOK_PATH=aatt.py` (a non-default notebook path) + +The ECS task / service would invoke this `marimo-launcher` CLI, and this CLI would perform the following: + +1. Clone the Github repository into the container +2. Install dependencies +3. Launch the notebook `aatt.py` + +More information about structuring notebooks and dependencies below in "Preparing Notebooks". + +## Preparing Notebooks + +### Notebook Location +This CLI expects two primary things to discover the notebook to launch: + +1. The root directory of the notebook project (either mounted or a cloned Github repository) +2. Path to the actual notebook python file to run + +The root of the notebook directory is set either by CLI arg `--repo` / env var `NOTEBOOK_REPOSITORY` or CLI arg `--mount` / env var `NOTEBOOK_MOUNT` (less common, more for dev work). In either approach, a notebook directory is established and all other filepaths -- e.g. notebook or requirements -- are **relative** to this path. + +The default notebook path is `notebook.py` and is expected in the root of the cloned or mounted notebook repository. The CLI arg `--path` or env var `NOTEBOOK_PATH` can be passed to override this. + +### Notebook Dependencies + +There are two primary ways to handle dependencies for a notebook launched by this CLI: + +1. Inline dependencies +2. External dependencies requirement file + +#### 1- Inline dependencies + +This is the **default** behavior for this CLI. + +Python [PEP 723](https://peps.python.org/pep-0723/) introduced inline dependencies for a python file. Marimo [fully supports this](https://docs.marimo.io/guides/package_management/inlining_dependencies/) for notebooks as well. + +Inline dependencies are a text block at the top of the python notebook that outline what dependencies should be installed. This section looks and feels much like sections in the `pyproject.toml`. Here is a minimal example from `tests/fixtures/inline_deps/notebook.py`: + +```python +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "marimo", +# "tinydb==4.8.2", +# ] +# /// + +# rest of notebook here... +``` + +When the CLI launches this notebook it will include the flag `--sandbox` when running Marimo that instructs Marimo to use the inlined dependencies. + +The `Makefile` command `cli-test-inline-run` will demonstrate this. + +#### 2- External dependencies requirement file + +Another option, which requires the CLI flag `--requirements` or env var `NOTEBOOK_REQUIREMENTS`, is to install dependencies found in a standalone requirements file, e.g. `requirements.txt`. The tests fixture `tests/fixtures/static_deps_reqs_txt/requirements.txt` shows an example of this kind of file. + +The flag `--requirements` or env var `NOTEBOOK_REQUIREMENTS` should point to a relative path from the root of the notebook directory where this file can be found. When passed, Marimo will be launched with the flag `--with-requirements` which instructs it to created an isolated environment with these dependencies. + +There are many ways to create this file, [`uv export` is worth consideration](https://docs.astral.sh/uv/reference/cli/#uv-export). + +The `Makefile` command `cli-test-reqs-txt-run` will demonstrate this. + + ## Environment Variables ### Required @@ -20,7 +96,66 @@ WORKSPACE=### Set to `dev` for local development, this will be set to `stage` an ### Optional -None yet... +Set these if you want to override defaults or pass values via env instead of flags. Keep them unset if you use CLI options. + +```shell +NOTEBOOK_REPOSITORY= ### repository to clone that contains a notebook and any required assets +NOTEBOOK_REPOSITORY_BRANCH= ### optional branch to checkout on clone +NOTEBOOK_MOUNT= ### either local of Docker context, an accessible root directory that contains notebook(s) +NOTEBOOK_PATH=### Relative path of actual notebook .py file based on cloned repository or mounted directory; defaults to "notebook.py" +NOTEBOOK_REQUIREMENTS= ### filepath to install dependencies from, relative to notebook root; if unset assuming dependencies are inline in notebook + +NOTEBOOK_MODE= ### how to launch marimo: "run" to execute, "edit" to open the editor; default "run" +NOTEBOOK_HOST= ### host to bind running notebook to +NOTEBOOK_PORT= ### port to serve running notebook on +``` + + +## CLI Commands + +### `launcher` + +Base command + +```text +Usage: launcher [OPTIONS] COMMAND [ARGS]... + +Options: + -v, --verbose Pass to log at debug level instead of info + --help Show this message and exit. + +Commands: + run + validate +``` + +### `launcher run` + +```text +Usage: python -m launcher.cli run [OPTIONS] + + Launch notebook in 'run' or 'edit' mode. + +Options: + --mount PATH path to mounted / existing notebook directory (env: + NOTEBOOK_MOUNT) + --repo TEXT git repository URL containing the notebook (env: + NOTEBOOK_REPOSITORY) + --repo-branch TEXT optional branch to checkout from cloned notebook + repository (env: NOTEBOOK_REPOSITORY_BRANCH) + --path TEXT relative path to the notebook within the directory + (env: NOTEBOOK_PATH) + --requirements PATH path to requirements file for environment (env: + NOTEBOOK_REQUIREMENTS) + --mode [run|edit] launch mode, 'run' or 'edit' (env: NOTEBOOK_MODE) + [default: run] + --host TEXT host interface to bind (env: NOTEBOOK_HOST) [default: + 0.0.0.0] + --port INTEGER port to bind (env: NOTEBOOK_PORT) [default: 2718] + --token TEXT set a required authentication token/password for the + notebook; if not set, no token/password is required + --help Show this message and exit. +``` diff --git a/launcher/cli.py b/launcher/cli.py index 5d6392c..bc636a4 100644 --- a/launcher/cli.py +++ b/launcher/cli.py @@ -1,6 +1,9 @@ import logging -from datetime import timedelta -from time import perf_counter +import subprocess +import sys +import uuid +from pathlib import Path +from typing import Literal import click @@ -9,25 +12,286 @@ logger = logging.getLogger(__name__) -@click.command() +@click.group("launcher") @click.option( - "-v", "--verbose", is_flag=True, help="Pass to log at debug level instead of info" + "-v", + "--verbose", + is_flag=True, + help="Pass to log at debug level instead of info", ) -def main(*, verbose: bool) -> None: - start_time = perf_counter() +@click.pass_context +def cli( + _ctx: click.Context, + *, + verbose: bool, +) -> None: root_logger = logging.getLogger() logger.info(configure_logger(root_logger, verbose=verbose)) logger.info(configure_sentry()) - logger.info("Running process") - # Do things here! - elapsed_time = perf_counter() - start_time - logger.info( - "Total time to complete process: %s", str(timedelta(seconds=elapsed_time)) +@cli.command() +@click.option( + "--mount", + envvar="NOTEBOOK_MOUNT", + type=click.Path(path_type=Path), + help="path to mounted / existing notebook directory (env: NOTEBOOK_MOUNT)", +) +@click.option( + "--repo", + envvar="NOTEBOOK_REPOSITORY", + help="git repository URL containing the notebook (env: NOTEBOOK_REPOSITORY)", +) +@click.option( + "--repo-branch", + envvar="NOTEBOOK_REPOSITORY_BRANCH", + help=( + "optional branch to checkout from cloned notebook repository " + "(env: NOTEBOOK_REPOSITORY_BRANCH)" + ), +) +@click.option( + "--path", + "notebook_path", + envvar="NOTEBOOK_PATH", + help="relative path to the notebook within the directory (env: NOTEBOOK_PATH)", + default="notebook.py", +) +@click.option( + "--requirements", + "requirements_file", + envvar="NOTEBOOK_REQUIREMENTS", + type=click.Path(path_type=Path), + help="path to requirements file for environment (env: NOTEBOOK_REQUIREMENTS)", +) +@click.option( + "--mode", + envvar="NOTEBOOK_MODE", + default="run", + show_default=True, + type=click.Choice(["run", "edit"]), + help="launch mode, 'run' or 'edit' (env: NOTEBOOK_MODE)", +) +@click.option( + "--host", + envvar="NOTEBOOK_HOST", + default="0.0.0.0", # noqa: S104 + show_default=True, + help="host interface to bind (env: NOTEBOOK_HOST)", +) +@click.option( + "--port", + envvar="NOTEBOOK_PORT", + default=2718, + show_default=True, + type=int, + help="port to bind (env: NOTEBOOK_PORT)", +) +@click.option( + "--token", + envvar="NOTEBOOK_TOKEN", + default=None, + show_default=True, + help=( + "set a required authentication token/password for the notebook; " + "if not set, no token/password is required" + ), +) +@click.pass_context +def run( + _ctx: click.Context, + *, + mount: Path | None, + repo: str | None, + repo_branch: str | None, + notebook_path: str, + requirements_file: Path | None, + mode: Literal["run", "edit"], + host: str, + port: int, + token: str | None, +) -> None: + """Launch notebook in 'run' or 'edit' mode.""" + notebook_dir_path = resolve_notebook_directory( + mount=str(mount) if mount else None, + repo=repo, + repo_branch=repo_branch, + ) + full_notebook_path = resolve_notebook_path(notebook_dir_path, notebook_path) + + cmd = prepare_run_command( + mode=mode, + host=host, + port=port, + token=token, + notebook_path=notebook_path, + requirements_file=requirements_file, + ) + + logger.info(f"launching notebook '{full_notebook_path}' with args {cmd}") + + result = subprocess.run(cmd, cwd=str(notebook_dir_path), check=True) # noqa: S603 + + raise sys.exit(result.returncode) + + +def resolve_notebook_directory( + mount: str | None = None, + repo: str | None = None, + repo_branch: str | None = None, +) -> Path: + """Determine the root directory that will contain the notebook. + + Resolution rules: + 1) If "mount" is provided: + - Validate that the path exists and return it. + 2) Else if "repo" is provided: + - Clone repository to /tmp/notebook- and return this location. + 3) Else: + - Raise an error because at least one of the two is required. + + Args: + - mount: Optional path to an existing host directory to use directly. + - repo: Optional git repository URL to clone into a workspace. + - repo_branch: Optional git branch to checkout for notebook repository. + """ + if mount: + notebook_dir_path = Path(mount) + if not notebook_dir_path.exists(): + raise FileNotFoundError(f"NOTEBOOK_MOUNT path does not exist: {mount}") + return notebook_dir_path + + if repo: + workdir = Path("/tmp") # noqa: S108 + workdir.mkdir(parents=True, exist_ok=True) + notebook_dir_path = workdir / f"notebook-clone-{uuid.uuid4()}" + + clone_notebook_repository(notebook_dir_path, repo, repo_branch) + + return notebook_dir_path + + raise ValueError( + "either --mount/NOTEBOOK_MOUNT or --repo/NOTEBOOK_REPOSITORY must be provided" ) +def clone_notebook_repository( + notebook_dir: Path, + repo: str, + repo_branch: str | None = None, +) -> None: + """Clone a notebook repository to a target directory. + + Behavior: + - If the target directory does not already exist, clone the repository. + - If the directory already exists or repo is None, do nothing. + + Args: + - notebook_dir: Destination directory for the repository checkout. + - repo: Git repository URL to clone (e.g., https://..., or SSH URL). + - repo_branch: Optional, git branch to checkout during clone + """ + if not notebook_dir.exists(): + cmd = [ + "git", + "clone", + ] + + if repo_branch: + cmd += ["--branch", repo_branch] + + cmd += [repo, str(notebook_dir)] + logger.info(f"Cloning repository with args: {cmd}") + + result = subprocess.run(cmd, check=True) # noqa: S603 + + if result.returncode != 0: + raise RuntimeError(f"git clone failed with code {result.returncode}") + + +def resolve_notebook_path(notebook_dir: Path, notebook_path: str) -> Path: + """Build and validate the absolute path to the notebook file within notebook_dir. + + Args: + - notebook_dir: Base directory that contains the notebook file. + - notebook_path: Relative path (or filename) of the notebook within notebook_dir. + """ + full_path = notebook_dir / notebook_path + if not full_path.exists(): + raise FileNotFoundError(f"notebook path not found: {full_path}") + return full_path + + +def prepare_run_command( + *, + mode: str, + host: str, + port: int, + token: str | None, + notebook_path: str, + requirements_file: Path | None, +) -> list[str]: + """Build the shell command used to launch a marimo notebook via `uv run`. + + The command has the following general shape: + uv run [--with-requirements ] marimo --host --port + [--sandbox] [--no-token] + + Behavior: + - If a requirements file is provided, `uv run --with-requirements ` is used so + the notebook runs with those pinned dependencies. + - If no requirements file is provided, `--sandbox` is added to marimo to avoid + mutating the user's environment. + - `--no-token` disables marimo's auth token if requested. + - The final positional argument is the path to the notebook to run. + + Args: + - mode: marimo subcommand to run (e.g., "run", "edit"). + - host: interface to bind the marimo server to (e.g., "127.0.0.1", "0.0.0.0"). + - port: TCP port for the marimo server. + - token: if not None, set as token for notebook, else launch with --no-token + - notebook_path: path to the marimo notebook file. + - requirements_file: optional path to a requirements file for `uv` (enables + `--with-requirements`). + """ + # start with `uv run` so marimo executes in a managed Python environment + cmd: list[str] = ["uv", "run"] + + # if a requirements file is provided, ensure uv uses it for dependency resolution + if requirements_file: + cmd += ["--with-requirements", str(requirements_file)] + + cmd += [ + "marimo", + mode, + "--headless", + "--host", + host, + "--port", + str(port), + ] + + # without a dedicated requirements file, prefer an isolated/sandboxed environment + if not requirements_file: + cmd += ["--sandbox"] + + # set token if passed + if token: + cmd += ["--token", "--token-password", token] + else: + cmd += ["--no-token"] + + # path to the notebook is the final positional argument + cmd += [str(notebook_path)] + + return cmd + + +def main() -> None: + """CLI entrypoint wrapper for package scripts.""" + cli() + + if __name__ == "__main__": logger = logging.getLogger("launcher.cli") main() diff --git a/pyproject.toml b/pyproject.toml index a17e1cc..d2680c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ [project] name = "marimo-launcher" -version = "2.0.0" +version = "1.0.0" requires-python = ">=3.13" dependencies = [ @@ -47,20 +47,27 @@ show-fixes = true [tool.ruff.lint] select = ["ALL", "PT"] ignore = [ - "COM812", - "D107", - "N812", - "PTH", "C90", + "COM812", "D100", - "D101", + "D101", "D102", "D103", - "D104", + "D104", + "D107", + "D417", + "EM101", + "EM102", + "FIX002", + "G004", + "N812", "PLR0912", - "PLR0913", + "PLR0913", "PLR0915", + "PTH", "S321", + "TD003", + "TRY003" ] # allow autofix behavior for specified rules @@ -76,8 +83,11 @@ fixture-parentheses = false "tests/**/*" = [ "ANN", "ARG001", + "PLR2004", "S101", ] +"tests/fixtures/**" = ["ALL"] +"tests/fixtures/*" = ["ALL"] [tool.ruff.lint.pycodestyle] max-doc-length = 90 diff --git a/tests/fixtures/inline_deps/notebook.py b/tests/fixtures/inline_deps/notebook.py new file mode 100644 index 0000000..8529323 --- /dev/null +++ b/tests/fixtures/inline_deps/notebook.py @@ -0,0 +1,62 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "marimo", +# "tinydb==4.8.2", +# ] +# /// + +import marimo + +__generated_with = "0.14.17" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell +def _(mo): + mo.md( + r""" + # Hello World! + + This notebook exercises launch via a Docker container. + """ + ) + return + + +@app.cell +def _(mo): + import sys + + mo.md(f"""Python version: `{sys.version}`""") + return + + +@app.cell +def _(mo): + import tempfile + from tinydb import TinyDB, Query + + with tempfile.TemporaryDirectory() as tmpdir: + db = TinyDB(f"{tmpdir}/db.json") + db.insert({"name": "test"}) + results = db.all() + + mo.md( + f""" + TinyDB loaded: `OK`
+ Results: `{results}` + """ + ) + return + + +if __name__ == "__main__": + app.run() diff --git a/tests/fixtures/static_deps_reqs_txt/notebook.py b/tests/fixtures/static_deps_reqs_txt/notebook.py new file mode 100644 index 0000000..34fb33f --- /dev/null +++ b/tests/fixtures/static_deps_reqs_txt/notebook.py @@ -0,0 +1,59 @@ +import marimo + +__generated_with = "0.14.17" +app = marimo.App(width="medium") + + +@app.cell +def _(): + import marimo as mo + + return (mo,) + + +@app.cell +def _(mo): + mo.md( + r""" + # Hello World! + + This notebook exercises launch via a Docker container. + """ + ) + return + + +@app.cell +def _(mo): + import sys + + mo.md(f"""Python version: `{sys.version}`""") + return + + +@app.cell +def _(mo): + import tempfile + from tinydb import TinyDB, Query + + with tempfile.TemporaryDirectory() as tmpdir: + db = TinyDB(f"{tmpdir}/db.json") + db.insert({"name": "test"}) + results = db.all() + + mo.md( + f""" + TinyDB loaded: `OK`
+ Results: `{results}` + """ + ) + return + + +@app.cell +def _(): + return + + +if __name__ == "__main__": + app.run() diff --git a/tests/fixtures/static_deps_reqs_txt/requirements.txt b/tests/fixtures/static_deps_reqs_txt/requirements.txt new file mode 100644 index 0000000..f190c71 --- /dev/null +++ b/tests/fixtures/static_deps_reqs_txt/requirements.txt @@ -0,0 +1,2 @@ +marimo +tinydb diff --git a/tests/test_cli.py b/tests/test_cli.py index 1e7881d..a8703d4 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,17 +1,6 @@ -from launcher.cli import main +from launcher.cli import cli -def test_cli_no_options(caplog, runner): - result = runner.invoke(main) - assert result.exit_code == 0 - assert "Logger 'root' configured with level=INFO" in caplog.text - assert "Running process" in caplog.text - assert "Total time to complete process" in caplog.text - - -def test_cli_all_options(caplog, runner): - result = runner.invoke(main, ["--verbose"]) - assert result.exit_code == 0 - assert "Logger 'root' configured with level=DEBUG" in caplog.text - assert "Running process" in caplog.text - assert "Total time to complete process" in caplog.text +def test_cli_no_commands(caplog, runner): + result = runner.invoke(cli, []) + assert result.exit_code == 2 diff --git a/uv.lock b/uv.lock index 4c63b65..9c23518 100644 --- a/uv.lock +++ b/uv.lock @@ -384,7 +384,7 @@ wheels = [ [[package]] name = "marimo-launcher" -version = "2.0.0" +version = "1.0.0" source = { virtual = "." } dependencies = [ { name = "click" },