Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions cloud_pipelines_backend/backend_types_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def generate_unique_id() -> str:

# # Needed to put a union type into DB
# class SqlIOTypeStruct(_BaseModel):
# type: structures.TypeSpecType
# type: structures.TypeSpecType
# No. We'll represent TypeSpecType as name:str + properties:dict
# Supported cases:
# * type: "name"
Expand Down Expand Up @@ -358,7 +358,9 @@ class ExecutionNode(_TableBase):
repr=False,
)

# updated_at: orm.Mapped[datetime.datetime | None] = orm.mapped_column(default=None)
status_updated_at: orm.Mapped[datetime.datetime | None] = orm.mapped_column(
default=None
)

# execution_kind = orm.Mapped[typing.Literal["CONTAINER", "GRAPH"]]

Expand Down Expand Up @@ -425,6 +427,23 @@ class ExecutionNode(_TableBase):
)


@sql.event.listens_for(ExecutionNode.container_execution_status, "set")
def _stamp_execution_status_updated_at(
target: ExecutionNode,
value: ContainerExecutionStatus | None,
_old_value: object,
_initiator: object,
) -> None:
"""Keep status_updated_at in sync with container_execution_status.

Fires for every Python-level write anywhere in the codebase (orchestrator,
API server, etc.). Does NOT fire on database loads, so there is no risk of
overwriting the column when SQLAlchemy hydrates a row from a SELECT.
"""
if value is not None:
target.status_updated_at = datetime.datetime.now(datetime.timezone.utc)


EXECUTION_NODE_EXTRA_DATA_SYSTEM_ERROR_EXCEPTION_MESSAGE_KEY = (
"system_error_exception_message"
)
Expand Down
38 changes: 37 additions & 1 deletion cloud_pipelines_backend/database_ops.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging

import sqlalchemy
from sqlalchemy import orm

Expand Down Expand Up @@ -66,12 +65,49 @@ def create_db_engine(
return db_engine


def _add_columns_if_missing(*, db_engine: sqlalchemy.Engine) -> None:
"""Add new nullable columns to existing tables when they are not yet present.

SQLAlchemy's create_all() only creates missing tables, not missing columns,
so new columns require an explicit migration step. All additions run in a
single transaction so the schema is updated atomically."""
_COLUMN_MIGRATIONS = [
bts.ExecutionNode.__table__.c.status_updated_at,
]
inspector = sqlalchemy.inspect(db_engine)
with db_engine.connect() as conn:
for col in _COLUMN_MIGRATIONS:
existing = {c["name"] for c in inspector.get_columns(col.table.name)}
if col.name not in existing:
_logger.info(
f"Migrating: ALTER TABLE {col.table.name} ADD COLUMN {col.name} ({col.type})"
)
try:
col_type_str = col.type.compile(dialect=db_engine.dialect)
conn.execute(
sqlalchemy.text(
f"ALTER TABLE {col.table.name}"
f" ADD COLUMN {col.name} {col_type_str}"
)
)
except sqlalchemy.exc.OperationalError:
_logger.info(
f"Column {col.table.name}.{col.name} already exists (concurrent migration) — skipping"
)
else:
_logger.info(
f"Column {col.table.name}.{col.name} already exists — skipping"
)
conn.commit()


def migrate_db(
*,
db_engine: sqlalchemy.Engine,
do_skip_backfill: bool,
) -> None:
_logger.info("Enter migrate DB")
_add_columns_if_missing(db_engine=db_engine)

# # Example:
# sqlalchemy.Index(
Expand Down
33 changes: 32 additions & 1 deletion cloud_pipelines_backend/instrumentation/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,18 @@
- Instrument: orchestrator_execution_system_errors
"""

import enum

from opentelemetry import metrics as otel_metrics


class MetricUnit(str, enum.Enum):
"""UCUM-style unit strings accepted by the OTel SDK."""

SECONDS = "s"
ERRORS = "{error}"


# ---------------------------------------------------------------------------
# tangle.orchestrator
# ---------------------------------------------------------------------------
Expand All @@ -32,5 +42,26 @@
execution_system_errors = orchestrator_meter.create_counter(
name="execution.system_errors",
description="Number of execution nodes that ended in SYSTEM_ERROR status",
unit="{error}",
unit=MetricUnit.ERRORS,
)

execution_status_transition_duration = orchestrator_meter.create_histogram(
name="execution.status_transition.duration",
description="Duration an execution spent in a status before transitioning to the next status",
unit=MetricUnit.SECONDS,
)


def record_status_transition(
from_status: str,
to_status: str,
duration_seconds: float,
) -> None:
"""Record a single status-transition duration observation."""
execution_status_transition_duration.record(
duration_seconds,
attributes={
"execution.status.from": from_status,
"execution.status.to": to_status,
},
)
Loading
Loading