Flagsmith · khvn26 · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
@@ -0,0 +1,43 @@
+name: CI
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+
+jobs:
+  ci:
+    name: CI
+    runs-on: ubuntu-latest
+    env:
+      SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
+      SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
+      SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }}
+      SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}
+      SNOWFLAKE_DATABASE: ${{ secrets.SNOWFLAKE_DATABASE }}
+      SNOWFLAKE_SCHEMA: ${{ secrets.SNOWFLAKE_SCHEMA }}
+      SNOWFLAKE_PRIVATE_KEY_PATH: /tmp/snowflake_pk.p8
+      CLICKHOUSE_HOST: localhost
+      CLICKHOUSE_PORT: "8123"
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          submodules: recursive
+      - uses: astral-sh/setup-uv@v7
+        with:
+          enable-cache: true
+      - run: make lint
+      - run: make typecheck
+      - name: Write Snowflake key file
+        run: |
+          umask 077
+          printf '%s' "${{ secrets.SNOWFLAKE_PRIVATE_KEY }}" > /tmp/snowflake_pk.p8
+      - name: Start ClickHouse
+        run: docker compose up --detach --wait clickhouse
+      - run: make test
+      - name: Check Coverage
+        uses: 5monkeys/cobertura-action@v14
+        with:
+          minimum_coverage: 100
+          fail_below_threshold: true
+          show_missing: true
@@ -13,3 +13,11 @@ wheels/
 .pytest_cache/
 .mypy_cache/
 .ruff_cache/
+
+# Coverage
+.coverage
+coverage.xml
+htmlcov/
+
+# Local secrets
+.env
@@ -0,0 +1,4 @@
+[submodule "engine-test-data"]
+	path = engine-test-data
+	url = https://github.com/Flagsmith/engine-test-data.git
+	branch = v3.7.0
@@ -0,0 +1,30 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.15.6
+    hooks:
+      - id: ruff-check
+        args: [--fix]
+      - id: ruff-format
+  - repo: https://github.com/astral-sh/uv-pre-commit
+    rev: 0.10.10
+    hooks:
+      - id: uv-lock
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v6.0.0
+    hooks:
+      - id: check-yaml
+      - id: check-json
+      - id: check-toml
+  - repo: https://github.com/Flagsmith/flagsmith-common
+    rev: v3.8.2
+    hooks:
+      - id: flagsmith-lint-tests
+  - repo: local
+    hooks:
+      - id: python-typecheck
+        name: python-typecheck
+        language: system
+        entry: make typecheck
+        require_serial: true
+        pass_filenames: false
+        types: [python]
@@ -0,0 +1 @@
+3.10
@@ -0,0 +1 @@
+* @flagsmith/flagsmith-back-end
@@ -0,0 +1,29 @@
+.PHONY: install-packages
+install-packages: ## Install all required packages
+	uv sync
+
+.PHONY: install-pre-commit
+install-pre-commit: ## Install pre-commit hooks
+	uv run prek install
+
+.PHONY: install
+install: install-packages install-pre-commit ## Ensure the environment is set up
+
+.PHONY: lint
+lint: ## Run linters (pre-commit hooks across the tree)
+	uv run prek run --all-files
+
+.PHONY: test
+test: ## Run unit tests. Override scope with opts, e.g. `make test opts='-m engine_parity'`
+	uv run pytest $(opts)
+
+.PHONY: typecheck
+typecheck: ## Run mypy
+	uv run mypy
+
+.PHONY: help
+help:
+	@echo "Usage: make [target]"
+	@echo ""
+	@echo "Available targets:"
+	@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "  \033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
@@ -1,3 +1,165 @@
 # flagsmith-sql-flag-engine
 
-Placeholder. The initial package scaffold lands via the first pull request.
+SQL translator for Flagsmith segment predicates.
+
+Where the Python and Rust `flag_engine` implementations evaluate
+`is_context_in_segment` against an in-memory `EvaluationContext`, this
+package takes a `SegmentContext` and emits a SQL `WHERE` expression that
+evaluates the segment against an entire `IDENTITIES` table — one row per
+identity, with the identity's full trait map held in a single column
+the translator path-extracts at query time. `PERCENTAGE_SPLIT` and
+`:semver`-marked comparators compile to inline pure-SQL.
+
+## Quickstart
+
+```python
+from flag_engine.context.types import EvaluationContext, SegmentContext
+
+from flagsmith_sql_flag_engine import TranslateContext, translate_segment
+from flagsmith_sql_flag_engine.dialects import ClickHouseDialect
+
+eval_context: EvaluationContext = {
+    "environment": {"key": "n9fbf9...3ngWhb", "name": "Production"},
+}
+ctx = TranslateContext(evaluation_context=eval_context, dialect=ClickHouseDialect())
+
+segment: SegmentContext = {
+    "key": "growth-cohort",
+    "name": "Growth cohort",
+    "rules": [
+        {
+            "type": "ALL",
+            "conditions": [
+                {"operator": "EQUAL", "property": "plan", "value": "growth"},
+            ],
+        },
+    ],
+}
+where_expr = translate_segment(segment, ctx)
+# where_expr is a SQL string. Drop into:
+#   SELECT COUNT(*) FROM IDENTITIES i
+#   WHERE i.environment_id = 'n9fbf9...3ngWhb' AND ({where_expr})
+```
+
+`environment_id` in the `IDENTITIES` table is a string column holding
+`EnvironmentContext.key` directly — the same identifier the engine uses,
+no separate integer PK.
+
+`translate_segment` returns `None` if the segment uses an operator the
+translator can't handle — typically a REGEX pattern the active dialect's
+regex flavour can't compile. Callers should fall back to
+`flag_engine.is_context_in_segment` for those segments.
+
+## Schema
+
+Each dialect publishes the table layout it expects via a `schema_ddl`
+constant. For Snowflake:
+
+```sql
+CREATE TABLE IF NOT EXISTS IDENTITIES (
+    environment_id STRING NOT NULL,
+    id NUMBER NOT NULL,
+    identifier STRING NOT NULL,
+    identity_key STRING NOT NULL,
+    traits VARIANT,
+    PRIMARY KEY (environment_id, id)
+)
+CLUSTER BY (environment_id, id);
+```
+
+For ClickHouse:
+
+```sql
+CREATE TABLE IF NOT EXISTS IDENTITIES (
+    environment_id String,
+    id UInt64,
+    identifier String,
+    identity_key String,
+    traits JSON
+)
+ENGINE = MergeTree()
+ORDER BY (environment_id, id);
+```
+
+Both engines store traits in a single columnar-JSON column —
+Snowflake's `VARIANT` and ClickHouse's `JSON` (24+, GA in 25.x). Each
+key is stored as a typed subcolumn, so trait reads are direct columnar
+scans rather than per-row JSON parses. Trait keys are *data* — new keys
+appear without schema changes — and the translator only sees the
+abstract path extraction.
+
+ClickHouse Cloud requires `SET allow_experimental_json_type = 1` when
+creating a `JSON`-column table (the type is GA on OSS 25.x); the test
+harness applies this setting automatically.
+
+Programmatic access:
+
+```python
+from flagsmith_sql_flag_engine.dialects.snowflake import SCHEMA_DDL as SNOWFLAKE_DDL
+from flagsmith_sql_flag_engine.dialects.clickhouse import SCHEMA_DDL as CLICKHOUSE_DDL
+```
+
+## Engine parity
+
+Validated against [Flagsmith/engine-test-data](https://github.com/Flagsmith/engine-test-data),
+the test suite every engine implementation is checked against. The
+engine-parity suite loads each test case's identity into a per-dialect
+scratch table, translates the case's segments, runs the generated SQL,
+and compares to `flag_engine.is_context_in_segment`.
+
+To run the engine-parity suite locally:
+
+```bash
+git submodule update --init                 # pull engine-test-data
+
+# Snowflake
+export SNOWFLAKE_ACCOUNT=...
+export SNOWFLAKE_USER=...
+export SNOWFLAKE_PRIVATE_KEY_PATH=...
+
+# ClickHouse — bring up the local container the CI workflow also uses
+docker compose up --detach --wait clickhouse
+
+uv run pytest tests/test_engine.py
+```
+
+Each harness's environment variables are only read at session-create
+time; to run a single dialect's parity, pass e.g. `-k snowflake` or
+`-k clickhouse` and only export that dialect's credentials.
+
+Adding a new dialect's parity coverage is one harness module — see
+`tests/harnesses/` for the shape.
+
+## Dialects
+
+The translator is dialect-aware: a `Dialect` protocol abstracts the
+SQL fragments that differ across SQL engines — MD5 hex, hex-to-int
+parsing, prefix-anchored regex, padded-version comparison, type-aware
+trait predicates, regex flavour. Today `SnowflakeDialect` and
+`ClickHouseDialect` are implemented; adding another engine such as
+DuckDB or Postgres means writing one class.
+
+## Operator coverage
+
+| Operator                                     | Translatable | Notes                                                          |
+| -------------------------------------------- | :----------: | -------------------------------------------------------------- |
+| `EQUAL`, `NOT_EQUAL`, `IN`                   |     yes      |                                                                |
+| `IS_SET`, `IS_NOT_SET`                       |     yes      | trait subcolumn `IS NOT NULL` / `IS NULL`                      |
+| `CONTAINS`, `NOT_CONTAINS`                   |     yes      |                                                                |
+| `GREATER_THAN`, `LESS_THAN` plus `_INCLUSIVE`|     yes      |                                                                |
+| `MODULO`                                     |     yes      |                                                                |
+| `PERCENTAGE_SPLIT`                           |     yes      | inlined MD5-mod-9999; ~0.005% diverge on hash==9998            |
+| `REGEX`                                      |   partial    | dialect-flavour gated; unsupported patterns → caller fallback  |
+| `:semver`-marked comparators                 |     yes      | major.minor.patch only; ignores prerelease                     |
+
+## Development
+
+```bash
+make install                  # uv sync + pre-commit install
+make lint                     # run pre-commit hooks across the tree
+make typecheck                # mypy
+make test                     # unit tests
+```
+
+Ruff (lint + format) runs as a pre-commit hook on every commit. Mypy
+runs as a `make typecheck` hook on staged Python files.
@@ -0,0 +1,19 @@
+services:
+  clickhouse:
+    image: clickhouse/clickhouse-server:25.5.6
+    environment:
+      # Skip the random-password bootstrap. The container is only ever
+      # reachable from the harness on the same compose network / host
+      # loopback, so the default `default` user with no password is fine.
+      CLICKHOUSE_SKIP_USER_SETUP: "1"
+    ports:
+      - "8123:8123"
+    ulimits:
+      nofile:
+        soft: 262144
+        hard: 262144
+    healthcheck:
+      test: ["CMD", "wget", "--spider", "-q", "http://localhost:8123/ping"]
+      interval: 2s
+      timeout: 2s
+      retries: 15
@@ -0,0 +1,76 @@
+[project]
+name = "flagsmith-sql-flag-engine"
+version = "0.1.0a2"
+description = "SQL translator for Flagsmith segment predicates."
+readme = "README.md"
+authors = [{ name = "Flagsmith", email = "engineering@flagsmith.com" }]
+requires-python = ">=3.10"
+license = "BSD-3-Clause"
+classifiers = [
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: SQL",
+    "Topic :: Database",
+]
+dependencies = ["flagsmith-flag-engine>=10", "jsonpath-rfc9535>=0.2"]
+
+[project.urls]
+Homepage = "https://github.com/Flagsmith/flagsmith-sql-flag-engine"
+
+[dependency-groups]
+dev = [
+    "pytest>=8",
+    "pytest-xdist>=3",
+    "mypy>=1.10",
+    "prek>=0.3",
+    "snowflake-snowpark-python>=1.20",
+    "clickhouse-connect>=0.7",
+    "json5>=0.14.0",
+    "pytest-cov>=7.1.0",
+]
+
+[build-system]
+requires = ["uv_build>=0.8.14,<0.9.0"]
+build-backend = "uv_build"
+
+[tool.pytest.ini_options]
+addopts = [
+    "-ra",
+    "--cov",
+    "src",
+    "--cov-report",
+    "term-missing",
+    "--cov-report",
+    "xml",
+]
+testpaths = ["tests"]
+
+[tool.coverage.run]
+branch = true
+source = ["src"]
+
+[tool.coverage.report]
+# `match` statements exhaustive over a Literal type record a phantom
+# fall-through branch from the last case to function exit; coverage.py
+# can't see the type-system exhaustiveness mypy enforces. Treat any
+# `case` line as a possibly-partial branch so the gate stays at 100%
+# without us littering the source with `# pragma: no branch`.
+partial_branches = [
+    "pragma: no branch",
+    "case .+:",
+]
+
+[tool.ruff]
+target-version = "py310"
+line-length = 100
+
+[tool.ruff.lint]
+select = ["E", "F", "I", "B", "UP"]
+
+[tool.mypy]
+strict = true
+python_version = "3.10"
+files = ["src/flagsmith_sql_flag_engine", "tests"]
+
+[[tool.mypy.overrides]]
+module = "clickhouse_connect.*"
+ignore_missing_imports = true