Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: CI

on:
pull_request:
push:
branches: [ main ]

jobs:
ci:
name: CI
runs-on: ubuntu-latest
env:
SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }}
SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}
SNOWFLAKE_DATABASE: ${{ secrets.SNOWFLAKE_DATABASE }}
SNOWFLAKE_SCHEMA: ${{ secrets.SNOWFLAKE_SCHEMA }}
SNOWFLAKE_PRIVATE_KEY_PATH: /tmp/snowflake_pk.p8
CLICKHOUSE_HOST: localhost
CLICKHOUSE_PORT: "8123"
steps:
- uses: actions/checkout@v5
with:
submodules: recursive
- uses: astral-sh/setup-uv@v7
with:
enable-cache: true
- run: make lint
- run: make typecheck
- name: Write Snowflake key file
run: |
umask 077
printf '%s' "${{ secrets.SNOWFLAKE_PRIVATE_KEY }}" > /tmp/snowflake_pk.p8
- name: Start ClickHouse
run: docker compose up --detach --wait clickhouse
- run: make test
- name: Check Coverage
uses: 5monkeys/cobertura-action@v14
with:
minimum_coverage: 100
fail_below_threshold: true
show_missing: true
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@ wheels/
.pytest_cache/
.mypy_cache/
.ruff_cache/

# Coverage
.coverage
coverage.xml
htmlcov/

# Local secrets
.env
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "engine-test-data"]
path = engine-test-data
url = https://github.com/Flagsmith/engine-test-data.git
branch = v3.7.0
30 changes: 30 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.6
hooks:
- id: ruff-check
args: [--fix]
- id: ruff-format
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.10.10
hooks:
- id: uv-lock
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-yaml
- id: check-json
- id: check-toml
- repo: https://github.com/Flagsmith/flagsmith-common
rev: v3.8.2
hooks:
- id: flagsmith-lint-tests
- repo: local
hooks:
- id: python-typecheck
name: python-typecheck
language: system
entry: make typecheck
require_serial: true
pass_filenames: false
types: [python]
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10
1 change: 1 addition & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* @flagsmith/flagsmith-back-end
29 changes: 29 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
.PHONY: install-packages
install-packages: ## Install all required packages
uv sync

.PHONY: install-pre-commit
install-pre-commit: ## Install pre-commit hooks
uv run prek install

.PHONY: install
install: install-packages install-pre-commit ## Ensure the environment is set up

.PHONY: lint
lint: ## Run linters (pre-commit hooks across the tree)
uv run prek run --all-files

.PHONY: test
test: ## Run unit tests. Override scope with opts, e.g. `make test opts='-m engine_parity'`
uv run pytest $(opts)

.PHONY: typecheck
typecheck: ## Run mypy
uv run mypy

.PHONY: help
help:
@echo "Usage: make [target]"
@echo ""
@echo "Available targets:"
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
164 changes: 163 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,165 @@
# flagsmith-sql-flag-engine

Placeholder. The initial package scaffold lands via the first pull request.
SQL translator for Flagsmith segment predicates.

Where the Python and Rust `flag_engine` implementations evaluate
`is_context_in_segment` against an in-memory `EvaluationContext`, this
package takes a `SegmentContext` and emits a SQL `WHERE` expression that
evaluates the segment against an entire `IDENTITIES` table — one row per
identity, with the identity's full trait map held in a single column
the translator path-extracts at query time. `PERCENTAGE_SPLIT` and
`:semver`-marked comparators compile to inline pure-SQL.

## Quickstart

```python
from flag_engine.context.types import EvaluationContext, SegmentContext

from flagsmith_sql_flag_engine import TranslateContext, translate_segment
from flagsmith_sql_flag_engine.dialects import ClickHouseDialect

eval_context: EvaluationContext = {
"environment": {"key": "n9fbf9...3ngWhb", "name": "Production"},
}
ctx = TranslateContext(evaluation_context=eval_context, dialect=ClickHouseDialect())

segment: SegmentContext = {
"key": "growth-cohort",
"name": "Growth cohort",
"rules": [
{
"type": "ALL",
"conditions": [
{"operator": "EQUAL", "property": "plan", "value": "growth"},
],
},
],
}
where_expr = translate_segment(segment, ctx)
# where_expr is a SQL string. Drop into:
# SELECT COUNT(*) FROM IDENTITIES i
# WHERE i.environment_id = 'n9fbf9...3ngWhb' AND ({where_expr})
```

`environment_id` in the `IDENTITIES` table is a string column holding
`EnvironmentContext.key` directly — the same identifier the engine uses,
no separate integer PK.

`translate_segment` returns `None` if the segment uses an operator the
translator can't handle — typically a REGEX pattern the active dialect's
regex flavour can't compile. Callers should fall back to
`flag_engine.is_context_in_segment` for those segments.

## Schema

Each dialect publishes the table layout it expects via a `schema_ddl`
constant. For Snowflake:

```sql
CREATE TABLE IF NOT EXISTS IDENTITIES (
environment_id STRING NOT NULL,
id NUMBER NOT NULL,
identifier STRING NOT NULL,
identity_key STRING NOT NULL,
traits VARIANT,
PRIMARY KEY (environment_id, id)
)
CLUSTER BY (environment_id, id);
```

For ClickHouse:

```sql
CREATE TABLE IF NOT EXISTS IDENTITIES (
environment_id String,
id UInt64,
identifier String,
identity_key String,
traits JSON
)
ENGINE = MergeTree()
ORDER BY (environment_id, id);
```

Both engines store traits in a single columnar-JSON column —
Snowflake's `VARIANT` and ClickHouse's `JSON` (24+, GA in 25.x). Each
key is stored as a typed subcolumn, so trait reads are direct columnar
scans rather than per-row JSON parses. Trait keys are *data* — new keys
appear without schema changes — and the translator only sees the
abstract path extraction.

ClickHouse Cloud requires `SET allow_experimental_json_type = 1` when
creating a `JSON`-column table (the type is GA on OSS 25.x); the test
harness applies this setting automatically.

Programmatic access:

```python
from flagsmith_sql_flag_engine.dialects.snowflake import SCHEMA_DDL as SNOWFLAKE_DDL
from flagsmith_sql_flag_engine.dialects.clickhouse import SCHEMA_DDL as CLICKHOUSE_DDL
```

## Engine parity

Validated against [Flagsmith/engine-test-data](https://github.com/Flagsmith/engine-test-data),
the test suite every engine implementation is checked against. The
engine-parity suite loads each test case's identity into a per-dialect
scratch table, translates the case's segments, runs the generated SQL,
and compares to `flag_engine.is_context_in_segment`.

To run the engine-parity suite locally:

```bash
git submodule update --init # pull engine-test-data

# Snowflake
export SNOWFLAKE_ACCOUNT=...
export SNOWFLAKE_USER=...
export SNOWFLAKE_PRIVATE_KEY_PATH=...

# ClickHouse — bring up the local container the CI workflow also uses
docker compose up --detach --wait clickhouse

uv run pytest tests/test_engine.py
```

Each harness's environment variables are only read at session-create
time; to run a single dialect's parity, pass e.g. `-k snowflake` or
`-k clickhouse` and only export that dialect's credentials.

Adding a new dialect's parity coverage is one harness module — see
`tests/harnesses/` for the shape.

## Dialects

The translator is dialect-aware: a `Dialect` protocol abstracts the
SQL fragments that differ across SQL engines — MD5 hex, hex-to-int
parsing, prefix-anchored regex, padded-version comparison, type-aware
trait predicates, regex flavour. Today `SnowflakeDialect` and
`ClickHouseDialect` are implemented; adding another engine such as
DuckDB or Postgres means writing one class.

## Operator coverage

| Operator | Translatable | Notes |
| -------------------------------------------- | :----------: | -------------------------------------------------------------- |
| `EQUAL`, `NOT_EQUAL`, `IN` | yes | |
| `IS_SET`, `IS_NOT_SET` | yes | trait subcolumn `IS NOT NULL` / `IS NULL` |
| `CONTAINS`, `NOT_CONTAINS` | yes | |
| `GREATER_THAN`, `LESS_THAN` plus `_INCLUSIVE`| yes | |
| `MODULO` | yes | |
| `PERCENTAGE_SPLIT` | yes | inlined MD5-mod-9999; ~0.005% diverge on hash==9998 |
| `REGEX` | partial | dialect-flavour gated; unsupported patterns → caller fallback |
| `:semver`-marked comparators | yes | major.minor.patch only; ignores prerelease |

## Development

```bash
make install # uv sync + pre-commit install
make lint # run pre-commit hooks across the tree
make typecheck # mypy
make test # unit tests
```

Ruff (lint + format) runs as a pre-commit hook on every commit. Mypy
runs as a `make typecheck` hook on staged Python files.
19 changes: 19 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
services:
clickhouse:
image: clickhouse/clickhouse-server:25.5.6
environment:
# Skip the random-password bootstrap. The container is only ever
# reachable from the harness on the same compose network / host
# loopback, so the default `default` user with no password is fine.
CLICKHOUSE_SKIP_USER_SETUP: "1"
ports:
- "8123:8123"
ulimits:
nofile:
soft: 262144
hard: 262144
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:8123/ping"]
interval: 2s
timeout: 2s
retries: 15
1 change: 1 addition & 0 deletions engine-test-data
Submodule engine-test-data added at 4b29dc
76 changes: 76 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
[project]
name = "flagsmith-sql-flag-engine"
version = "0.1.0a2"
description = "SQL translator for Flagsmith segment predicates."
readme = "README.md"
authors = [{ name = "Flagsmith", email = "engineering@flagsmith.com" }]
requires-python = ">=3.10"
license = "BSD-3-Clause"
classifiers = [
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: SQL",
"Topic :: Database",
]
dependencies = ["flagsmith-flag-engine>=10", "jsonpath-rfc9535>=0.2"]

[project.urls]
Homepage = "https://github.com/Flagsmith/flagsmith-sql-flag-engine"

[dependency-groups]
dev = [
"pytest>=8",
"pytest-xdist>=3",
"mypy>=1.10",
"prek>=0.3",
"snowflake-snowpark-python>=1.20",
"clickhouse-connect>=0.7",
"json5>=0.14.0",
"pytest-cov>=7.1.0",
]

[build-system]
requires = ["uv_build>=0.8.14,<0.9.0"]
build-backend = "uv_build"

[tool.pytest.ini_options]
addopts = [
"-ra",
"--cov",
"src",
"--cov-report",
"term-missing",
"--cov-report",
"xml",
]
testpaths = ["tests"]

[tool.coverage.run]
branch = true
source = ["src"]

[tool.coverage.report]
# `match` statements exhaustive over a Literal type record a phantom
# fall-through branch from the last case to function exit; coverage.py
# can't see the type-system exhaustiveness mypy enforces. Treat any
# `case` line as a possibly-partial branch so the gate stays at 100%
# without us littering the source with `# pragma: no branch`.
partial_branches = [
"pragma: no branch",
"case .+:",
]

[tool.ruff]
target-version = "py310"
line-length = 100

[tool.ruff.lint]
select = ["E", "F", "I", "B", "UP"]

[tool.mypy]
strict = true
python_version = "3.10"
files = ["src/flagsmith_sql_flag_engine", "tests"]

[[tool.mypy.overrides]]
module = "clickhouse_connect.*"
ignore_missing_imports = true
Loading
Loading