Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
8b9fe1c
bitwuzla: Change options for parallel track. (#201)
mpreiner Jul 15, 2025
02a8c88
submission: add additional configurations for lower CPU core counts. …
zmylinxi99 Jul 15, 2025
68d603d
SMTS submission 2025 update (bug fix + 2 more versions) (#203)
Tomaqa Jul 20, 2025
f4fa309
present derived solvers' increment over base solver
wintered Jul 30, 2025
9d64aee
cosmetics
wintered Jul 30, 2025
8f1c054
interim
wintered Jul 30, 2025
b338b90
minor
wintered Jul 30, 2025
8b1ec77
fixing CI issues
wintered Jul 30, 2025
68c0bd6
Merge remote-tracking branch 'origin/2025_final_execution' into prese…
wintered Aug 2, 2025
68cc10a
establishing consistency among submissions with multiple cores
wintered Aug 2, 2025
7468375
2024 -> 2025
wintered Aug 4, 2025
f4efacf
2024 -> 2025
wintered Aug 4, 2025
7ef410f
target for cleaning up web results
wintered Aug 4, 2025
8bc62ae
Merge remote-tracking branch 'origin/2025_final_execution' into prese…
wintered Aug 4, 2025
71e5365
feat: show legends for base and non-competing only when at least one
wintered Aug 4, 2025
ac178bd
feat: non-competitive divisions will be marked
wintered Aug 4, 2025
7fe460d
skip non-competing solvers on podiums
wintered Aug 5, 2025
932b8d9
fix: -- -> -
wintered Aug 5, 2025
4078f05
fix: missing case for determining competitive divisions
wintered Aug 5, 2025
087fe77
fix: competitive solvers
wintered Aug 5, 2025
02e980b
fix competitive divisions
wintered Aug 5, 2025
ffec35a
consistency submission vs. base solver
wintered Aug 5, 2025
70af334
refactor
wintered Aug 5, 2025
ab124f5
fix: ordering of solvers
wintered Aug 5, 2025
4a2aef9
Merge branch '2025_final_execution' into 2025_webpage
wintered Aug 5, 2025
061c204
CI
wintered Aug 5, 2025
c414881
Merge branch '2025_webpage' of github.com:SMT-COMP/smt-comp.github.io…
wintered Aug 5, 2025
2d93bab
Revert "Merge branch '2025_webpage' of github.com:SMT-COMP/smt-comp.g…
wintered Aug 6, 2025
ed2e608
disable dead solver links
wintered Aug 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ build: clean-build ## Build wheel file using poetry
clean-build: ## clean build artifacts
@rm -rf dist

.PHONY: clean-web-results
clean-web-results: ## clean web results
@rm -rf web/content/results
@rm -rf web/public/results


.PHONY: help
help:
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
Expand Down
Binary file modified data/results-mv-2025.json.gz
Binary file not shown.
7 changes: 4 additions & 3 deletions smtcomp/benchexec.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def get_suffix(track: defs.Track) -> str:
return "_parallel"
case defs.Track.Cloud:
return "_cloud"
case _:
raise ValueError(f"Unhandled track: {track}")


def get_xml_name(s: defs.Submission, track: defs.Track, division: defs.Division) -> str:
Expand All @@ -46,9 +48,8 @@ class CmdTask(BaseModel):
taskdirs: List[str]


def generate_benchmark_yml(
ymlfile: Path, benchmark: Path, expected_result: Optional[bool], orig_file: Optional[Path]
) -> None:
def generate_benchmark_yml(benchmark: Path, expected_result: Optional[bool], orig_file: Optional[Path]) -> None:
ymlfile = benchmark.with_suffix(".yml")
with ymlfile.open("w") as f:
f.write("format_version: '2.0'\n\n")

Expand Down
30 changes: 28 additions & 2 deletions smtcomp/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def name_is_default_field(cls, data: Any) -> Any:

class SolverType(EnumAutoInt):
wrapped = "wrapped"
derived = "derived"
derived = "derived" # TODO: put a datatype information on base solver
standalone = "Standalone"
portfolio = "Portfolio"

Expand Down Expand Up @@ -1317,6 +1317,7 @@ class Submission(BaseModel, extra="forbid"):
website: HttpUrl
system_description: HttpUrl
solver_type: SolverType
# TODO add field base_solver?
participations: Participations
seed: int | None = None
competitive: bool = True
Expand All @@ -1325,6 +1326,7 @@ class Submission(BaseModel, extra="forbid"):
description="Must be set for the final version of the submission. An archive on zenodo is needed in this case.",
)

# TODO: model validator to check the sanity of the new base_solver field
@model_validator(mode="after")
def check_archive(self) -> Submission:
if self.archive is None and not all(p.archive for p in self.participations.root):
Expand Down Expand Up @@ -1493,6 +1495,11 @@ class Config:
"""
Commit of the model validator dolmen (branch smtcomp-2023)
"""
dolmen_force_logic_ALL = False
"""
Some benchmarks are not accepted by dolmen in their current logic.
During model validation we can rerun by forcing logic ALL to accept more models
"""

removed_benchmarks = [
{
Expand All @@ -1510,11 +1517,26 @@ class Config:
Benchmarks to remove before selection
"""

removed_results = []
removed_results: list[Any] = []

"""
Benchmarks to remove after running the solvers. Can be used when the selection has already been done.
"""

"""
Solver -> Base solver map for 2025
TODO: refactor this into Submission
"""
baseSolverMap2025 = {
"Bitwuzla-MachBV": "Bitwuzla-MachBV-base",
"Z3-Inc-Z3++": "Z3-Inc-Z3++-base",
"Z3-Noodler-Mocha": "Z3-Noodler-Mocha-base",
"Z3-Owl": "Z3-Owl-base",
"Z3-Noodler": "Z3-Noodler",
"z3siri": "z3siri-base",
"Z3-alpha": "Z3-alpha-base",
}

def __init__(self, data: Path | None) -> None:
self.id = self.__class__.__next_id__
self.__class__.__next_id__ += 1
Expand Down Expand Up @@ -1578,6 +1600,10 @@ def submissions(self) -> list[Submission]:
Submission.model_validate_json(Path(file).read_text()) for file in self.data.glob("../submissions/*.json")
]

@functools.cached_property
def competitive_solvers(self) -> list[str]:
return [s.name for s in self.submissions if s.competitive]

@functools.cached_property
def web_results(self) -> Path:
return self.data / ".." / "web" / "content" / "results"
Expand Down
116 changes: 84 additions & 32 deletions smtcomp/generate_website_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ def page_track_suffix(track: defs.Track) -> str:

class PodiumStep(BaseModel):
name: str
baseSolver: str
deltaBaseSolver: int
competing: str # yes or no
errorScore: int
correctScore: int
Expand All @@ -115,6 +117,7 @@ class PodiumDivision(BaseModel):
participants: str # participants_2023
disagreements: str # disagreements_2023
division: str # Arith
is_competitive: bool # true = least 2 subst. different solvers were submitted
track: track_name
n_benchmarks: int
time_limit: int
Expand Down Expand Up @@ -237,14 +240,27 @@ class Podium(RootModel):
root: PodiumDivision | PodiumCrossDivision | PodiumSummaryResults = Field(..., discriminator="layout")


def podium_steps(podium: List[dict[str, Any]] | None) -> List[PodiumStep]:
def podium_steps(config: defs.Config, podium: List[dict[str, Any]] | None) -> List[PodiumStep]:
if podium is None:
return []
else:
return [
PodiumStep(
podiums = []
non_competitive = []
for s in podium:
cscore = s["correctly_solved_score"]
delta = 0
derived_solver = defs.Config.baseSolverMap2025.get(s["solver"], "")
if derived_solver != "":
for sprime in podium:
if sprime["solver"] == defs.Config.baseSolverMap2025.get(s["solver"], ""):
delta = cscore - sprime["correctly_solved_score"]
break

ps = PodiumStep(
name=s["solver"],
competing="yes", # TODO
baseSolver=derived_solver,
deltaBaseSolver=delta,
competing="yes" if s["solver"] in config.competitive_solvers else "no",
errorScore=s["error_score"],
correctScore=s["correctly_solved_score"],
CPUScore=s["cpu_time_score"],
Expand All @@ -257,36 +273,70 @@ def podium_steps(podium: List[dict[str, Any]] | None) -> List[PodiumStep]:
timeout=s["timeout"],
memout=s["memout"],
)
for s in podium
]

if not s["solver"] in config.competitive_solvers:
non_competitive.append(ps)
else:
podiums.append(ps)

return podiums + non_competitive


def make_podium(config: defs.Config, d: dict[str, Any], for_division: bool, track: defs.Track) -> PodiumDivision:
def make_podium(
config: defs.Config, d: dict[str, Any], for_division: bool, track: defs.Track, results: pl.LazyFrame
) -> PodiumDivision:
def get_winner(l: List[dict[str, str]] | None) -> str:
# TODO select only participating
if l is None or not l:
return "-"

l = [e for e in l if e["solver"] in config.competitive_solvers]

if l is None or not l or l[0]["correctly_solved_score"] == 0:
return "-"
else:
return l[0]["solver"]

def is_competitive_division(results: pl.LazyFrame, division: int, for_division: bool) -> bool:
"""
A division in a track is competitive if at least two substantially different
solvers (i.e., solvers from two different teams) were submitted.
"""

solvers = (
results.filter(pl.col("division" if for_division else "logic") == division)
.select("solver")
.unique()
.collect()
.get_column("solver")
.to_list()
)

# Avoid solvers of the same solver family under the assumption
# of the following format: <solver-family>-<suffix> (holds for SMT-COMP 2025)
# TODO: improve this criterion in the future
return len(set([sol.split("-")[0].lower() for sol in solvers])) >= 2

if for_division:
competitive_division = is_competitive_division(results, d["division"], for_division)
division = defs.Division.name_of_int(d["division"])
logics = dict((defs.Logic.name_of_int(d2["logic"]), d2["n"]) for d2 in d["logics"])
else:
division = defs.Logic.name_of_int(d["logic"])
competitive_division = is_competitive_division(results, d["logic"], for_division)
logics = dict()

if (track == defs.Track.Cloud) | (track == defs.Track.Parallel):
winner_seq = "-"
steps_seq = []
else:
winner_seq = get_winner(d[smtcomp.scoring.Kind.seq.name])
steps_seq = podium_steps(d[smtcomp.scoring.Kind.seq.name])
steps_seq = podium_steps(config, d[smtcomp.scoring.Kind.seq.name])

return PodiumDivision(
resultdate="2024-07-08",
resultdate="2025-08-11",
year=config.current_year,
divisions=f"divisions_{config.current_year}",
is_competitive=competitive_division,
participants=f"participants_{config.current_year}",
disagreements=f"disagreements_{config.current_year}",
division=division,
Expand All @@ -301,15 +351,15 @@ def get_winner(l: List[dict[str, str]] | None) -> str:
winner_unsat=get_winner(d[smtcomp.scoring.Kind.unsat.name]),
winner_24s=get_winner(d[smtcomp.scoring.Kind.twentyfour.name]),
sequential=steps_seq,
parallel=podium_steps(d[smtcomp.scoring.Kind.par.name]),
sat=podium_steps(d[smtcomp.scoring.Kind.sat.name]),
unsat=podium_steps(d[smtcomp.scoring.Kind.unsat.name]),
twentyfour=podium_steps(d[smtcomp.scoring.Kind.twentyfour.name]),
parallel=podium_steps(config, d[smtcomp.scoring.Kind.par.name]),
sat=podium_steps(config, d[smtcomp.scoring.Kind.sat.name]),
unsat=podium_steps(config, d[smtcomp.scoring.Kind.unsat.name]),
twentyfour=podium_steps(config, d[smtcomp.scoring.Kind.twentyfour.name]),
)


def sq_generate_datas(
config: defs.Config, results: pl.LazyFrame, for_division: bool, track: defs.Track
config: defs.Config, selection: pl.LazyFrame, results: pl.LazyFrame, for_division: bool, track: defs.Track
) -> dict[str, PodiumDivision]:
"""
Generate datas for divisions or for logics
Expand All @@ -322,12 +372,13 @@ def sq_generate_datas(
group_by = "logic"
name_of_int = defs.Logic.name_of_int

selection = selection.filter(selected=True)

# TODO it should be done after filter_for
len_by_division = results.group_by(group_by).agg(total=pl.col("file").n_unique())
len_by_division = selection.group_by(group_by).agg(total=pl.len())

def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, results: pl.LazyFrame) -> pl.LazyFrame:
results = smtcomp.scoring.filter_for(kind, config, results)

return (
sort(
intersect(results, len_by_division, on=[group_by])
Expand Down Expand Up @@ -368,8 +419,8 @@ def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, result

if for_division:
lf_logics = [
results.group_by("division", "logic")
.agg(n=pl.col("file").n_unique())
selection.group_by("division", "logic")
.agg(n=pl.len())
.group_by("division")
.agg(logics=pl.struct("logic", "n"))
]
Expand All @@ -382,7 +433,7 @@ def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, result

df = r.collect()

return dict((name_of_int(d[group_by]), make_podium(config, d, for_division, track)) for d in df.to_dicts())
return dict((name_of_int(d[group_by]), make_podium(config, d, for_division, track, results)) for d in df.to_dicts())


def get_kind(a: PodiumDivision, k: smtcomp.scoring.Kind) -> list[PodiumStep]:
Expand Down Expand Up @@ -471,7 +522,7 @@ def get_winner(l: List[PodiumStepBiggestLead] | None) -> str:
winner_seq = get_winner(sequential)

return PodiumBiggestLead(
resultdate="2024-07-08",
resultdate="2025-08-11",
year=config.current_year,
track=track,
results=f"results_{config.current_year}",
Expand Down Expand Up @@ -589,7 +640,7 @@ def get_winner(
winner_seq = get_winner(sequential, scores, data, track)

return PodiumBestOverall(
resultdate="2024-07-08",
resultdate="2025-08-11",
year=config.current_year,
track=track,
results=f"results_{config.current_year}",
Expand Down Expand Up @@ -684,7 +735,7 @@ def timeScore(vws_step: PodiumStep) -> float:
steps_seq = ld[smtcomp.scoring.Kind.seq]

return PodiumLargestContribution(
resultdate="2024-07-08",
resultdate="2025-08-11",
year=config.current_year,
track=track,
results=f"results_{config.current_year}",
Expand All @@ -702,7 +753,9 @@ def timeScore(vws_step: PodiumStep) -> float:
)


def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs.Track) -> PodiumLargestContribution:
def largest_contribution(
config: defs.Config, selection: pl.LazyFrame, scores: pl.LazyFrame, track: defs.Track
) -> PodiumLargestContribution:
for_division = True
# For each solver compute its corresponding best solver
# TODO: check what is competitive solver (unsound?)
Expand All @@ -727,11 +780,10 @@ def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs.
pl.min("cpu_time_score"),
sound_status=pl.col("sound_status").first(),
answer=pl.col("answer").first(),
logic=-1,
)
.with_columns(solver=pl.lit("virtual"), error_score=0)
)
virtual_datas = sq_generate_datas(config, virtual_scores, for_division, track)
virtual_datas = sq_generate_datas(config, selection, virtual_scores, for_division, track)

# For each solver Compute virtual solver without the solver
solvers = scores.select("division", "solver").unique()
Expand All @@ -749,10 +801,11 @@ def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs.
sound_status=pl.col("sound_status").first(),
error_score=0,
answer=pl.col("answer").first(),
logic=-1,
)
)
virtual_without_solver_datas = sq_generate_datas(config, virtual_without_solver_scores, for_division, track)
virtual_without_solver_datas = sq_generate_datas(
config, selection, virtual_without_solver_scores, for_division, track
)

large = largest_contribution_ranking(config, virtual_datas, virtual_without_solver_datas, ratio_by_division, track)

Expand All @@ -764,8 +817,7 @@ def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs.
return large


def export_results(config: defs.Config, results: pl.LazyFrame, track: defs.Track) -> None:

def export_results(config: defs.Config, selection: pl.LazyFrame, results: pl.LazyFrame, track: defs.Track) -> None:
page_suffix = page_track_suffix(track)

dst = config.web_results
Expand All @@ -780,7 +832,7 @@ def export_results(config: defs.Config, results: pl.LazyFrame, track: defs.Track
all_divisions: list[PodiumDivision] = []

for for_division in [True, False]:
datas = sq_generate_datas(config, scores, for_division, track)
datas = sq_generate_datas(config, selection, scores, for_division, track)

for name, data in datas.items():
(dst / f"{name.lower()}-{page_suffix}.md").write_text(data.model_dump_json(indent=1))
Expand All @@ -795,7 +847,7 @@ def export_results(config: defs.Config, results: pl.LazyFrame, track: defs.Track
bigdata = biggest_lead_ranking(config, datas, track)
(dst / f"biggest-lead-{page_suffix}.md").write_text(bigdata.model_dump_json(indent=1))

largedata = largest_contribution(config, scores, track)
largedata = largest_contribution(config, selection, scores, track)
(dst / f"largest-contribution-{page_suffix}.md").write_text(largedata.model_dump_json(indent=1))

all_divisions.sort(key=lambda x: x.division)
Expand Down
Loading