Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b048476
ClickHouse: add PQS, CERT, and CODDTest oracles
fm4v May 15, 2026
752a4fc
ClickHouse CERT/CODDTest: align with the papers
fm4v May 15, 2026
976290f
Merge remote-tracking branch 'upstream/main' into nik/clickhouse-add-…
fm4v May 16, 2026
60e6ba2
ClickHouse PQS: close coverage gaps vs the paper
fm4v May 16, 2026
9712507
ClickHouse CERT: make MergeTree granule pruning actually observable
fm4v May 16, 2026
f5c7d10
Bump to JDK 26 and drop the Eclipse compiler
fm4v May 16, 2026
6c200b9
Merge remote-tracking branch 'origin/main' into nik/clickhouse-add-pq…
fm4v May 16, 2026
89c7ef6
Target JDK 25 instead of JDK 26
fm4v May 16, 2026
ec77cc8
ClickHouse CODDTest: implement all three phi flavors from the paper
fm4v May 16, 2026
0fee730
ClickHouse: lift type system to recursive ADT with capability layer
fm4v May 17, 2026
c7d98b6
CI: drop non-ClickHouse DBMS test jobs
fm4v May 17, 2026
a36ca48
ClickHouse: SEMR oracle + per-session settings randomization
fm4v May 18, 2026
a2105f5
ClickHouse: type-aware leaf picking in expression generator
fm4v May 18, 2026
389216a
ClickHouse: CI bump to clickhouse-server:head + ILLEGAL_AGGREGATION c…
fm4v May 18, 2026
7f08aa6
ClickHouse: EET (Equivalent Expression Transformation) oracle
fm4v May 18, 2026
ca0cb49
ClickHouse: cap server-side query execution at 120s
fm4v May 18, 2026
85adefb
ClickHouse: plan for query primitives (combinators, set ops, ARRAY JOIN)
fm4v May 18, 2026
d5cbddd
ClickHouse: disable JDBC LZ4 response compression to dodge driver bug
fm4v May 18, 2026
ea567ff
ClickHouse: bump clickhouse-jdbc 0.9.6 -> 0.9.8
fm4v May 18, 2026
81c4538
ClickHouse: query primitives — set-op AST, combinators, ARRAY JOIN pl…
fm4v May 18, 2026
271af84
ClickHouse: harden new oracles via live-server validation
fm4v May 18, 2026
d401e13
ClickHouse: dodge mid-stream chunked-transport tear-down via response…
fm4v May 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
640 changes: 5 additions & 635 deletions .github/workflows/main.yml

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: '11'
java-version: '25'
server-id: ossrh
server-username: MAVEN_USERNAME
server-password: MAVEN_PASSWORD
Expand All @@ -33,7 +33,7 @@ jobs:
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: '11'
java-version: '25'
- name: Build SQLancer
run: mvn -B package -DskipTests=true
- name: Push to Docker Hub
Expand Down
234 changes: 234 additions & 0 deletions docs/brainstorms/clickhouse-type-system-foundation-requirements.md

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

441 changes: 441 additions & 0 deletions docs/plans/2026-05-18-001-feat-clickhouse-eet-oracle-plan.md

Large diffs are not rendered by default.

683 changes: 683 additions & 0 deletions docs/plans/2026-05-18-002-feat-clickhouse-query-primitives-plan.md

Large diffs are not rendered by default.

33 changes: 5 additions & 28 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -108,34 +108,11 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.10.1</version>
<version>3.13.0</version>
<configuration>
<source>11</source>
<target>11</target>
<compilerArguments>
<properties>${project.basedir}/.settings/org.eclipse.jdt.core.prefs</properties>
</compilerArguments>
<compilerId>eclipse</compilerId>
<release>25</release>
<showWarnings>true</showWarnings>
<failOnWarning>true</failOnWarning>
</configuration>
<dependencies>
<dependency>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-compiler-eclipse</artifactId>
<version>2.13.0</version>
</dependency>
<dependency>
<groupId>org.eclipse.jdt</groupId>
<artifactId>ecj</artifactId>
<version>3.28.0</version>
</dependency>
<dependency>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-compiler-api</artifactId>
<version>2.13.0</version>
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>net.revelc.code.formatter</groupId>
Expand Down Expand Up @@ -335,7 +312,7 @@
<dependency>
<groupId>com.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.9.6</version>
<version>0.9.8</version>
<classifier>all</classifier>
</dependency>
<dependency>
Expand Down Expand Up @@ -494,9 +471,9 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.4.1</version>
<version>3.11.2</version>
<configuration>
<source>8</source>
<source>25</source>
</configuration>
<executions>
<execution>
Expand Down
16 changes: 16 additions & 0 deletions src/sqlancer/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import com.beust.jcommander.JCommander.Builder;

import sqlancer.citus.CitusProvider;
import sqlancer.clickhouse.ClickHouseOptions;
import sqlancer.clickhouse.ClickHouseOracleFactory;
import sqlancer.clickhouse.ClickHouseProvider;
import sqlancer.cockroachdb.CockroachDBProvider;
import sqlancer.common.log.Loggable;
Expand Down Expand Up @@ -596,6 +598,20 @@ public static int executeMain(String... args) throws AssertionError {
return options.getErrorExitCode();
}

// ClickHouse pre-flight: --random-session-settings and --oracle SEMR are mutually exclusive
// in a single run. SEMR varies one setting per check() via a per-query SETTINGS suffix; the
// randomization layer applies a chosen profile once at connect time and inherits across all
// oracles. Mixing them in one run would silently mask SEMR's failure attribution. Catch the
// combination once here, before the thread pool spawns, so the operator sees one clear
// message instead of N per-thread stack traces and N reproducer artifacts under logs/.
Object parsedCommand = nameToProvider.get(jc.getParsedCommand()).getCommand();
if (parsedCommand instanceof ClickHouseOptions chOptions && chOptions.randomSessionSettings
&& chOptions.oracle.contains(ClickHouseOracleFactory.SEMR)) {
System.err.println("--random-session-settings true is incompatible with --oracle SEMR. "
+ "Drop --random-session-settings true for SEMR runs, or remove --oracle SEMR.");
return options.getErrorExitCode();
}

Randomly.initialize(options);
if (options.printProgressInformation()) {
startProgressMonitor();
Expand Down
94 changes: 93 additions & 1 deletion src/sqlancer/clickhouse/ClickHouseErrors.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ public static List<String> getExpectedExpressionErrors() {
// ()
"Function 'like' doesn't support search with non-constant needles in constant haystack", "Illegal type",
"Illegal value (aggregate function) for positional argument in GROUP BY",
// ClickHouse 26+ new-analyzer variant of the same generator-induced issue:
// a positional GROUP BY pointer (e.g., GROUP BY 1) resolving to an aggregate
// SELECT-list column. Old analyzer error above, new analyzer error here.
"is found in GROUP BY in query", "(ILLEGAL_AGGREGATION)",
"Invalid escape sequence at the end of LIKE pattern", "Invalid type for filter in", "Memory limit",
"OptimizedRegularExpression: cannot compile re2", "Partition key cannot contain constants",
"Positional argument out of bounds", "Sampling expression must be present in the primary key",
Expand Down Expand Up @@ -59,11 +63,99 @@ public static List<String> getExpectedExpressionErrors() {
"No equality condition found in JOIN ON expression", "Cannot parse number with multiple sign",

// JDBC driver may fail to decompress error responses under certain conditions
"Magic is not correct");
"Magic is not correct",

// v1 type-system foundation: Nullable / LowCardinality activation patterns. These
// are added defensively from common ClickHouse error families; the full triage is
// recorded as a follow-up issue after the regression run.
"ILLEGAL_TYPE_OF_ARGUMENT", // Nullable arithmetic, mixed wrapper operations
"Conversion from LowCardinality", "Conversion to LowCardinality", "Nested type", // composite-inside-wrapper
// rejections leaking
// through DEFAULT
// clauses
"type cannot be inside Nullable type", "type cannot be inside LowCardinality",
"Cannot read floating point value", // float-inside-LowCardinality DEFAULT round-trip
"NULL value is not allowed",
// Fired when the JDBC URL setting hasn't propagated (e.g. test fixtures opening their
// own connection). The runtime CREATE TABLE setting in ClickHouseProvider normally
// makes this unreachable.
"SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY",
// Fired when an ORDER BY / PARTITION BY / SAMPLE BY expression references a Nullable
// column without `allow_nullable_key=1`. ClickHouseTableGenerator now sets this in
// the MergeTree SETTINGS clause, but the catalog entry stays as a defense net.
"Partition key contains nullable columns", "Sorting key contains nullable columns",
"allow_nullable_key",
// INSERTs into a column with a MATERIALIZED clause whose dependency column wasn't
// provided -- ClickHouse plugs NULL and the cast to a non-Nullable target fails.
// Becomes more frequent once the v1 type flags emit mixed Nullable/non-Nullable
// columns with INSERT-projection MATERIALIZED clauses.
"Cannot convert NULL value to non-Nullable type", "CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN",
// max_execution_time=120 is set on the JDBC URL in ClickHouseProvider to cap server-side
// query execution; long-running random queries (heavy JOINs, large aggregations) hit this
// cap and ClickHouse returns TIMEOUT_EXCEEDED. The multi-word "Timeout exceeded: elapsed"
// substring is specific enough to avoid masking unrelated "timeout" errors.
"Timeout exceeded: elapsed", "(TIMEOUT_EXCEEDED)");
}

public static void addExpectedExpressionErrors(ExpectedErrors errors) {
errors.addAll(getExpectedExpressionErrors());
}

// Substring patterns for setting-validation errors raised either by SEMR's per-query
// SETTINGS suffix or by random-session-settings SET-on-connect. The patterns are deliberately
// multi-word to avoid masking unrelated bugs: a bare "Setting" token would match many
// unrelated ClickHouse messages (read-only-setting rejections, suggestion lines, echoed
// SETTINGS clauses in error context) and would silently absorb real findings.
public static List<String> getSessionSettingsErrors() {
return List.of("Unknown setting", // catalog drift: name not present in this version
"is neither a builtin setting nor a custom setting", // same, alt message
"Cannot parse setting value", // candidate value rejected as malformed
"Setting value out of range", // multi-word form; not the bare "out of range"
"UNKNOWN_SETTING"); // ClickHouse error code label
}

public static void addSessionSettingsErrors(ExpectedErrors errors) {
errors.addAll(getSessionSettingsErrors());
}

// Substring patterns specific to set-operation queries (UNION ALL / UNION DISTINCT / INTERSECT / EXCEPT).
// Multi-word per the institutional convention: a bare "columns" or "type" would mask far too many
// unrelated errors. Refined empirically; the startup-probe path catches UNKNOWN_SETTING separately,
// which is intentionally NOT in this list so setting-name drift remains visible to future audits.
public static List<String> getSetOpErrors() {
return List.of("Number of columns doesn't match", "Cannot find common type for tuple elements",
"INCOMPATIBLE_COLUMNS", "Type mismatch in IN or VALUES section",
"Column number mismatch in subqueries of intersect/except");
}

public static void addSetOpErrors(ExpectedErrors errors) {
errors.addAll(getSetOpErrors());
}

// Substring patterns specific to aggregate-combinator emission. ClickHouse rejects ill-typed
// combinator chains with messages from this family; the empirical-discovery convention keeps
// entries multi-word so they don't absorb unrelated "function" or "aggregate" errors.
public static List<String> getCombinatorErrors() {
return List.of("Unknown aggregate function", "NUMBER_OF_ARGUMENTS_DOESNT_MATCH",
"Combinator is only applicable for aggregate function", "is only applicable for aggregate functions",
"Aggregate function is not implemented for", "Cannot apply combinator", "AGGREGATE_FUNCTION_THROW",
"Nested type for combinator", "Illegal type for argument", "Illegal types of arguments");
}

public static void addCombinatorErrors(ExpectedErrors errors) {
errors.addAll(getCombinatorErrors());
}

// Substring patterns for ARRAY JOIN. The structural plumbing in this PR does not yet emit
// ARRAY JOIN -- these substrings exist for the future activation when Array column generation
// lands. Kept here so the catalog grows additively rather than in a future surprise change.
public static List<String> getArrayJoinErrors() {
return List.of("Cannot ARRAY JOIN", "ARRAY JOIN requires array argument",
"ILLEGAL_TYPE_OF_ARGUMENT_FOR_ARRAY_JOIN");
}

public static void addArrayJoinErrors(ExpectedErrors errors) {
errors.addAll(getArrayJoinErrors());
}

}
24 changes: 24 additions & 0 deletions src/sqlancer/clickhouse/ClickHouseOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,30 @@ public class ClickHouseOptions implements DBMSSpecificOptions<ClickHouseOracleFa
@Parameter(names = { "--analyzer" }, description = "Enable analyzer in ClickHouse", arity = 1)
public boolean enableAnalyzer = true;

@Parameter(names = "--test-nullable-types", description = "Wrap a small fraction of generated column types in Nullable", arity = 1)
public boolean enableNullable = true;

@Parameter(names = "--test-lowcardinality-types", description = "Wrap a small fraction of generated column types in LowCardinality", arity = 1)
public boolean enableLowCardinality = true;

@Parameter(names = "--random-session-settings", description = "Apply a random subset of curated ClickHouse settings via SET on the per-database connection", arity = 1)
public boolean randomSessionSettings = false;

@Parameter(names = "--random-session-settings-budget", description = "Cap on the number of randomized session settings per database (0 = unbounded)")
public int randomSessionSettingsBudget = 5;

@Parameter(names = "--test-set-op-tlp", description = "Enable the set-operation TLP oracle (UNION ALL / UNION DISTINCT / INTERSECT / EXCEPT invariants)", arity = 1)
public boolean enableSetOpTLP = false;

@Parameter(names = "--test-aggregate-combinators", description = "Allow the expression generator to emit aggregate-combinator chains (sumIf, countIfArray, etc.)", arity = 1)
public boolean enableCombinators = false;

@Parameter(names = "--test-combinator-tlp", description = "Enable the combinator-identity oracle (sumIf/countIf/avgOrNull/... algebraic identities)", arity = 1)
public boolean enableCombinatorTLP = false;

@Parameter(names = "--test-array-join", description = "Enable ARRAY JOIN structural emission (no-op until Array column generation lands in type-system v2)", arity = 1)
public boolean enableArrayJoin = false;

@Override
public List<ClickHouseOracleFactory> getTestOracleFactory() {
return oracle;
Expand Down
49 changes: 49 additions & 0 deletions src/sqlancer/clickhouse/ClickHouseOracleFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,17 @@
import sqlancer.OracleFactory;
import sqlancer.clickhouse.ClickHouseProvider.ClickHouseGlobalState;
import sqlancer.clickhouse.gen.ClickHouseExpressionGenerator;
import sqlancer.clickhouse.oracle.cert.ClickHouseCERTOracle;
import sqlancer.clickhouse.oracle.coddtest.ClickHouseCODDTestOracle;
import sqlancer.clickhouse.oracle.eet.ClickHouseEETOracle;
import sqlancer.clickhouse.oracle.pqs.ClickHousePivotedQuerySynthesisOracle;
import sqlancer.clickhouse.oracle.semr.ClickHouseSEMROracle;
import sqlancer.clickhouse.oracle.tlp.ClickHouseTLPAggregateOracle;
import sqlancer.clickhouse.oracle.tlp.ClickHouseTLPCombinatorOracle;
import sqlancer.clickhouse.oracle.tlp.ClickHouseTLPDistinctOracle;
import sqlancer.clickhouse.oracle.tlp.ClickHouseTLPGroupByOracle;
import sqlancer.clickhouse.oracle.tlp.ClickHouseTLPHavingOracle;
import sqlancer.clickhouse.oracle.tlp.ClickHouseTLPSetOpOracle;
import sqlancer.common.oracle.NoRECOracle;
import sqlancer.common.oracle.TLPWhereOracle;
import sqlancer.common.oracle.TestOracle;
Expand Down Expand Up @@ -58,5 +65,47 @@ public TestOracle<ClickHouseGlobalState> create(ClickHouseGlobalState globalStat

return new NoRECOracle<>(globalState, gen, errors);
}
},
PQS {
@Override
public TestOracle<ClickHouseGlobalState> create(ClickHouseGlobalState globalState) throws SQLException {
return new ClickHousePivotedQuerySynthesisOracle(globalState);
}
},
CERT {
@Override
public TestOracle<ClickHouseGlobalState> create(ClickHouseGlobalState globalState) throws SQLException {
return new ClickHouseCERTOracle(globalState);
}
},
CODDTest {
@Override
public TestOracle<ClickHouseGlobalState> create(ClickHouseGlobalState globalState) throws SQLException {
return new ClickHouseCODDTestOracle(globalState);
}
},
SEMR {
@Override
public TestOracle<ClickHouseGlobalState> create(ClickHouseGlobalState globalState) throws SQLException {
return new ClickHouseSEMROracle(globalState);
}
},
EET {
@Override
public TestOracle<ClickHouseGlobalState> create(ClickHouseGlobalState globalState) throws SQLException {
return new ClickHouseEETOracle(globalState);
}
},
SetOpTLP {
@Override
public TestOracle<ClickHouseGlobalState> create(ClickHouseGlobalState globalState) throws SQLException {
return new ClickHouseTLPSetOpOracle(globalState);
}
},
CombinatorTLP {
@Override
public TestOracle<ClickHouseGlobalState> create(ClickHouseGlobalState globalState) throws SQLException {
return new ClickHouseTLPCombinatorOracle(globalState);
}
}
}
Loading
Loading