Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Build the API container for the REF compute engine
# The frontend and backend are built separately and then combined into a single image

FROM ghcr.io/astral-sh/uv:python3.13-bookworm AS backend
FROM ghcr.io/astral-sh/uv:python3.13-bookworm AS base

FROM base AS backend

# Enable bytecode compilation
ENV UV_COMPILE_BYTECODE=1
Expand All @@ -21,7 +23,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \

ADD backend /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen --no-dev
uv sync --frozen --no-editable --no-dev


# Build the frontend
Expand All @@ -40,17 +42,18 @@ RUN npm run build

# Runtime container
# Copy the installed packages from the build stage to decrease the size of the final image
FROM python:3.13-slim-bookworm AS runtime
FROM base AS runtime

LABEL maintainer="Jared Lewis <jared.lewis@climate-resource.com>"
LABEL description="Docker image for the REF API"

ENV PATH="/app/.venv/bin:${PATH}"
ENV STATIC_DIR=/app/static
ENV REF_CONFIGURATION=/app/.ref
ENV REF_CONFIGURATION=/ref
ENV FRONTEND_HOST=http://0.0.0.0:8000
ENV XDG_CACHE_HOME=$REF_CONFIGURATION/cache

RUN groupadd --system app && useradd --system --gid app app
RUN useradd -m -u 1000 app

WORKDIR /app

Expand All @@ -61,7 +64,8 @@ COPY --from=frontend --chown=app:app /frontend/dist /app/static

RUN chown -R app:app /app

USER app
# Switch to non-root user -- use numeric ID for k8s systems that enforce runAsUser
USER 1000

# Run the REF CLI tool by default
ENTRYPOINT ["fastapi", "run", "--workers", "4", "/app/src/ref_backend/main.py"]
13 changes: 4 additions & 9 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,21 @@ description = "Backend for the Climate Rapid Evaluation Framework"
requires-python = ">=3.11"
dependencies = [
"fastapi[standard]<1.0.0,>=0.114.2",
"python-multipart<1.0.0,>=0.0.7",
"tenacity<9.0.0,>=8.2.3",
"pydantic>2.0",
"psycopg[binary]<4.0.0,>=3.1.13",
"pydantic-settings<3.0.0,>=2.2.1",
"pydantic-settings<3.0.0,>=2.13.1",
"sentry-sdk[fastapi]>=2.0.0",
"climate-ref[aft-providers,postgres]>=0.12.2",
"climate-ref[aft-providers,postgres]>=0.13.1,<0.14",
"loguru",
"pyyaml>=6.0",
"fastapi-sqlalchemy-monitor>=1.1.3",
]

[dependency-groups]
dev = [
"pytest>=8.0",
"pytest>=9.0",
"mypy>=1.8.0",
"ruff>=0.12.0",
"ruff>=0.15.0",
"pre-commit>=4.0",
"coverage>=7.4.3",
# "climate-ref-example",
Expand All @@ -31,9 +29,6 @@ dev = [
"towncrier>=24.8.0",
]

[tool.uv]
constraint-dependencies = ["environs<15.0.0"]

[tool.uv.sources]
# Temporary pin for testing
# climate-ref = { git = "https://github.com/Climate-REF/climate-ref", subdirectory = "packages/climate-ref", tag="v0.7.0" }
Expand Down
8 changes: 4 additions & 4 deletions backend/src/ref_backend/api/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def _ref_config_dependency(settings: SettingsDep) -> Config:
REFConfigDep = Annotated[Config, Depends(_ref_config_dependency)]


def _get_database_dependency(ref_config: REFConfigDep) -> Database:
return get_database(ref_config)
def _get_database_dependency(settings: SettingsDep, ref_config: REFConfigDep) -> Database:
return get_database(ref_config, read_only=settings.REF_READ_ONLY_DATABASE)


DatabaseDep = Annotated[Database, Depends(_get_database_dependency)]
Expand Down Expand Up @@ -57,11 +57,11 @@ class AppContext:
provider_registry: ProviderRegistry


def _provider_registry_dependency(ref_config: REFConfigDep) -> ProviderRegistry:
def _provider_registry_dependency(settings: SettingsDep, ref_config: REFConfigDep) -> ProviderRegistry:
"""
Get the provider registry
"""
return get_provider_registry(ref_config)
return get_provider_registry(ref_config, read_only=settings.REF_READ_ONLY_DATABASE)


ProviderRegistryDep = Annotated[ProviderRegistry, Depends(_provider_registry_dependency)]
Expand Down
12 changes: 12 additions & 0 deletions backend/src/ref_backend/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,18 @@ def all_cors_origins(self) -> list[str]:
PROJECT_NAME: str = "Climate Rapid Evaluation Framework"
SENTRY_DSN: HttpUrl | None = None
REF_CONFIGURATION: str = "data"
REF_READ_ONLY_DATABASE: bool = False
"""
Open the SQLite database in read-only mode.

When true, the API opens the configured SQLite database via a URI-form
connection string with ``mode=ro&immutable=1`` so that SQLite does not
attempt to create a journal/WAL sidecar. This lets the REF state volume
(e.g. ``/ref``) be mounted read-only in deployments where the API is a
pure consumer of worker-produced state.

Ignored for non-SQLite databases.
"""
STATIC_DIR: str | None = None
USE_TEST_DATA: bool = False
"""
Expand Down
8 changes: 5 additions & 3 deletions backend/src/ref_backend/core/outliers.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,11 @@ def detect_outliers_in_scalar_values(
lower_bound, upper_bound = iqr_bounds
# Apply bounds to individual values (Reference values always non-outlier)
source_id_flags = group_values.apply(
lambda row: (row["value"] < lower_bound or row["value"] > upper_bound)
if row["source_id"] != "Reference"
else False,
lambda row: (
(row["value"] < lower_bound or row["value"] > upper_bound)
if row["source_id"] != "Reference"
else False
),
axis=1,
)
else:
Expand Down
51 changes: 36 additions & 15 deletions backend/src/ref_backend/core/ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from loguru import logger

from climate_ref.config import Config
from climate_ref.database import Database, _get_database_revision
from climate_ref.database import Database, MigrationState
from climate_ref.provider_registry import ProviderRegistry
from ref_backend.core.config import Settings

Expand All @@ -20,26 +20,47 @@ def get_ref_config(settings: Settings) -> Config:
return Config.load(config_fname, allow_missing=True)


def get_database(ref_config: Config) -> Database:
def get_database(ref_config: Config, read_only: bool = False) -> Database:
"""
Get a database connection using the default config
Get a database connection using the default config.

When ``read_only`` is true,
the SQLite database is opened via``Database.from_config(..., read_only=True)``,
which rewrites the URL to read-only URI form so no journal/WAL sidecar is created.
"""
database = Database.from_config(ref_config, run_migrations=False)
with database._engine.connect() as connection:
if _get_database_revision(connection) is None:
msg = (
"The database migration has not been run. "
"Check the database URL in your config file and run the migration."
)
logger.warning(msg)
if ref_config.db.run_migrations:
raise ValueError(msg)
database = Database.from_config(ref_config, run_migrations=False, read_only=read_only)

status = database.migration_status(ref_config)
state = status["state"]
if state is MigrationState.UP_TO_DATE:
return database

if state is MigrationState.UNMANAGED:
msg = (
"The database has no alembic revision stamp. "
"Check the database URL in your config file and run the migration."
)
logger.warning(msg)
if ref_config.db.run_migrations:
raise ValueError(msg)
elif state is MigrationState.REMOVED:
raise ValueError(
f"Database revision {status['current']!r} has been removed. "
"Please delete your database and start again."
)
else:
logger.warning(
f"Database revision {status['current']!r} does not match this image's "
f"head revision {status['head']!r}. "
"The API will continue to read this database."
)

return database


def get_provider_registry(ref_config: Config) -> ProviderRegistry:
def get_provider_registry(ref_config: Config, read_only: bool = False) -> ProviderRegistry:
"""
Get the provider registry
"""
database = get_database(ref_config)
database = get_database(ref_config, read_only=read_only)
return ProviderRegistry.build_from_config(ref_config, database)
4 changes: 2 additions & 2 deletions backend/src/ref_backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

# Initialize singletons at application startup
ref_config = get_ref_config(settings)
database = get_database(ref_config)
provider_registry = get_provider_registry(ref_config)
database = get_database(ref_config, read_only=settings.REF_READ_ONLY_DATABASE)
provider_registry = get_provider_registry(ref_config, read_only=settings.REF_READ_ONLY_DATABASE)

setup_logging(settings.LOG_LEVEL)
app = build_app(settings, ref_config, database)
Expand Down
49 changes: 48 additions & 1 deletion backend/tests/test_core/test_ref.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import copy
import shutil
from pathlib import Path

import pytest
import sqlalchemy

from climate_ref.config import Config
from ref_backend.core.config import Settings
from ref_backend.core.ref import get_ref_config
from ref_backend.core.ref import get_database, get_ref_config
from ref_backend.testing import test_ref_config as _load_test_ref_config


def test_get_ref_config_missing_toml(tmp_path: Path):
Expand All @@ -24,3 +30,44 @@ def test_get_ref_config_with_toml(tmp_path: Path):
config = get_ref_config(settings)

assert isinstance(config, Config)


def _copy_test_db(tmp_path: Path) -> Path:
"""Copy the checked-in test SQLite database to an isolated location."""
src = Path(_load_test_ref_config().db.database_url.removeprefix("sqlite:///"))
dst = tmp_path / "climate_ref.db"
shutil.copy2(src, dst)
return dst


def test_get_database_read_only_rejects_writes(tmp_path: Path):
"""read_only=True opens SQLite via mode=ro so writes raise OperationalError."""
db_path = _copy_test_db(tmp_path)
ref_config = copy.deepcopy(_load_test_ref_config())
ref_config.db.database_url = f"sqlite:///{db_path}"

database = get_database(ref_config, read_only=True)

assert "mode=ro" in database.url
with database._engine.connect() as connection:
with pytest.raises(sqlalchemy.exc.OperationalError):
connection.execute(sqlalchemy.text("CREATE TABLE probe (x INTEGER)"))
connection.commit()


def test_get_database_tolerates_unknown_revision(tmp_path: Path):
"""
A DB stamped with an alembic revision this image doesn't know must not
raise — it means a newer climate-ref CLI ran the migration.
"""
db_path = _copy_test_db(tmp_path)
ref_config = copy.deepcopy(_load_test_ref_config())
ref_config.db.database_url = f"sqlite:///{db_path}"

engine = sqlalchemy.create_engine(ref_config.db.database_url)
with engine.begin() as connection:
connection.execute(sqlalchemy.text("UPDATE alembic_version SET version_num = 'from_future_cli'"))
engine.dispose()

database = get_database(ref_config)
assert database is not None
Loading
Loading