diff --git a/Makefile b/Makefile index 22cbdfc1..3616c0ae 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,12 @@ build: clean-build ## Build wheel file using poetry clean-build: ## clean build artifacts @rm -rf dist +.PHONY: clean-web-results +clean-web-results: ## clean web results + @rm -rf web/content/results + @rm -rf web/public/results + + .PHONY: help help: @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' diff --git a/data/results-mv-2025.json.gz b/data/results-mv-2025.json.gz index a0abfd08..657679f4 100644 Binary files a/data/results-mv-2025.json.gz and b/data/results-mv-2025.json.gz differ diff --git a/smtcomp/benchexec.py b/smtcomp/benchexec.py index 03e8db08..62229828 100644 --- a/smtcomp/benchexec.py +++ b/smtcomp/benchexec.py @@ -28,6 +28,8 @@ def get_suffix(track: defs.Track) -> str: return "_parallel" case defs.Track.Cloud: return "_cloud" + case _: + raise ValueError(f"Unhandled track: {track}") def get_xml_name(s: defs.Submission, track: defs.Track, division: defs.Division) -> str: @@ -46,9 +48,8 @@ class CmdTask(BaseModel): taskdirs: List[str] -def generate_benchmark_yml( - ymlfile: Path, benchmark: Path, expected_result: Optional[bool], orig_file: Optional[Path] -) -> None: +def generate_benchmark_yml(benchmark: Path, expected_result: Optional[bool], orig_file: Optional[Path]) -> None: + ymlfile = benchmark.with_suffix(".yml") with ymlfile.open("w") as f: f.write("format_version: '2.0'\n\n") diff --git a/smtcomp/defs.py b/smtcomp/defs.py index 2489df8c..ab4ee420 100644 --- a/smtcomp/defs.py +++ b/smtcomp/defs.py @@ -135,7 +135,7 @@ def name_is_default_field(cls, data: Any) -> Any: class SolverType(EnumAutoInt): wrapped = "wrapped" - derived = "derived" + derived = "derived" # TODO: put a datatype information on base solver standalone = "Standalone" portfolio = "Portfolio" @@ -1317,6 +1317,7 @@ class Submission(BaseModel, extra="forbid"): website: HttpUrl system_description: HttpUrl solver_type: SolverType + # TODO add field base_solver? participations: Participations seed: int | None = None competitive: bool = True @@ -1325,6 +1326,7 @@ class Submission(BaseModel, extra="forbid"): description="Must be set for the final version of the submission. An archive on zenodo is needed in this case.", ) + # TODO: model validator to check the sanity of the new base_solver field @model_validator(mode="after") def check_archive(self) -> Submission: if self.archive is None and not all(p.archive for p in self.participations.root): @@ -1493,6 +1495,11 @@ class Config: """ Commit of the model validator dolmen (branch smtcomp-2023) """ + dolmen_force_logic_ALL = False + """ + Some benchmarks are not accepted by dolmen in their current logic. + During model validation we can rerun by forcing logic ALL to accept more models + """ removed_benchmarks = [ { @@ -1510,11 +1517,26 @@ class Config: Benchmarks to remove before selection """ - removed_results = [] + removed_results: list[Any] = [] + """ Benchmarks to remove after running the solvers. Can be used when the selection has already been done. """ + """ + Solver -> Base solver map for 2025 + TODO: refactor this into Submission + """ + baseSolverMap2025 = { + "Bitwuzla-MachBV": "Bitwuzla-MachBV-base", + "Z3-Inc-Z3++": "Z3-Inc-Z3++-base", + "Z3-Noodler-Mocha": "Z3-Noodler-Mocha-base", + "Z3-Owl": "Z3-Owl-base", + "Z3-Noodler": "Z3-Noodler", + "z3siri": "z3siri-base", + "Z3-alpha": "Z3-alpha-base", + } + def __init__(self, data: Path | None) -> None: self.id = self.__class__.__next_id__ self.__class__.__next_id__ += 1 @@ -1578,6 +1600,10 @@ def submissions(self) -> list[Submission]: Submission.model_validate_json(Path(file).read_text()) for file in self.data.glob("../submissions/*.json") ] + @functools.cached_property + def competitive_solvers(self) -> list[str]: + return [s.name for s in self.submissions if s.competitive] + @functools.cached_property def web_results(self) -> Path: return self.data / ".." / "web" / "content" / "results" diff --git a/smtcomp/generate_website_page.py b/smtcomp/generate_website_page.py index a8fb1ba9..84499e57 100644 --- a/smtcomp/generate_website_page.py +++ b/smtcomp/generate_website_page.py @@ -94,6 +94,8 @@ def page_track_suffix(track: defs.Track) -> str: class PodiumStep(BaseModel): name: str + baseSolver: str + deltaBaseSolver: int competing: str # yes or no errorScore: int correctScore: int @@ -115,6 +117,7 @@ class PodiumDivision(BaseModel): participants: str # participants_2023 disagreements: str # disagreements_2023 division: str # Arith + is_competitive: bool # true = least 2 subst. different solvers were submitted track: track_name n_benchmarks: int time_limit: int @@ -237,14 +240,27 @@ class Podium(RootModel): root: PodiumDivision | PodiumCrossDivision | PodiumSummaryResults = Field(..., discriminator="layout") -def podium_steps(podium: List[dict[str, Any]] | None) -> List[PodiumStep]: +def podium_steps(config: defs.Config, podium: List[dict[str, Any]] | None) -> List[PodiumStep]: if podium is None: return [] else: - return [ - PodiumStep( + podiums = [] + non_competitive = [] + for s in podium: + cscore = s["correctly_solved_score"] + delta = 0 + derived_solver = defs.Config.baseSolverMap2025.get(s["solver"], "") + if derived_solver != "": + for sprime in podium: + if sprime["solver"] == defs.Config.baseSolverMap2025.get(s["solver"], ""): + delta = cscore - sprime["correctly_solved_score"] + break + + ps = PodiumStep( name=s["solver"], - competing="yes", # TODO + baseSolver=derived_solver, + deltaBaseSolver=delta, + competing="yes" if s["solver"] in config.competitive_solvers else "no", errorScore=s["error_score"], correctScore=s["correctly_solved_score"], CPUScore=s["cpu_time_score"], @@ -257,23 +273,56 @@ def podium_steps(podium: List[dict[str, Any]] | None) -> List[PodiumStep]: timeout=s["timeout"], memout=s["memout"], ) - for s in podium - ] + + if not s["solver"] in config.competitive_solvers: + non_competitive.append(ps) + else: + podiums.append(ps) + + return podiums + non_competitive -def make_podium(config: defs.Config, d: dict[str, Any], for_division: bool, track: defs.Track) -> PodiumDivision: +def make_podium( + config: defs.Config, d: dict[str, Any], for_division: bool, track: defs.Track, results: pl.LazyFrame +) -> PodiumDivision: def get_winner(l: List[dict[str, str]] | None) -> str: - # TODO select only participating + if l is None or not l: + return "-" + + l = [e for e in l if e["solver"] in config.competitive_solvers] + if l is None or not l or l[0]["correctly_solved_score"] == 0: return "-" else: return l[0]["solver"] + def is_competitive_division(results: pl.LazyFrame, division: int, for_division: bool) -> bool: + """ + A division in a track is competitive if at least two substantially different + solvers (i.e., solvers from two different teams) were submitted. + """ + + solvers = ( + results.filter(pl.col("division" if for_division else "logic") == division) + .select("solver") + .unique() + .collect() + .get_column("solver") + .to_list() + ) + + # Avoid solvers of the same solver family under the assumption + # of the following format: - (holds for SMT-COMP 2025) + # TODO: improve this criterion in the future + return len(set([sol.split("-")[0].lower() for sol in solvers])) >= 2 + if for_division: + competitive_division = is_competitive_division(results, d["division"], for_division) division = defs.Division.name_of_int(d["division"]) logics = dict((defs.Logic.name_of_int(d2["logic"]), d2["n"]) for d2 in d["logics"]) else: division = defs.Logic.name_of_int(d["logic"]) + competitive_division = is_competitive_division(results, d["logic"], for_division) logics = dict() if (track == defs.Track.Cloud) | (track == defs.Track.Parallel): @@ -281,12 +330,13 @@ def get_winner(l: List[dict[str, str]] | None) -> str: steps_seq = [] else: winner_seq = get_winner(d[smtcomp.scoring.Kind.seq.name]) - steps_seq = podium_steps(d[smtcomp.scoring.Kind.seq.name]) + steps_seq = podium_steps(config, d[smtcomp.scoring.Kind.seq.name]) return PodiumDivision( - resultdate="2024-07-08", + resultdate="2025-08-11", year=config.current_year, divisions=f"divisions_{config.current_year}", + is_competitive=competitive_division, participants=f"participants_{config.current_year}", disagreements=f"disagreements_{config.current_year}", division=division, @@ -301,15 +351,15 @@ def get_winner(l: List[dict[str, str]] | None) -> str: winner_unsat=get_winner(d[smtcomp.scoring.Kind.unsat.name]), winner_24s=get_winner(d[smtcomp.scoring.Kind.twentyfour.name]), sequential=steps_seq, - parallel=podium_steps(d[smtcomp.scoring.Kind.par.name]), - sat=podium_steps(d[smtcomp.scoring.Kind.sat.name]), - unsat=podium_steps(d[smtcomp.scoring.Kind.unsat.name]), - twentyfour=podium_steps(d[smtcomp.scoring.Kind.twentyfour.name]), + parallel=podium_steps(config, d[smtcomp.scoring.Kind.par.name]), + sat=podium_steps(config, d[smtcomp.scoring.Kind.sat.name]), + unsat=podium_steps(config, d[smtcomp.scoring.Kind.unsat.name]), + twentyfour=podium_steps(config, d[smtcomp.scoring.Kind.twentyfour.name]), ) def sq_generate_datas( - config: defs.Config, results: pl.LazyFrame, for_division: bool, track: defs.Track + config: defs.Config, selection: pl.LazyFrame, results: pl.LazyFrame, for_division: bool, track: defs.Track ) -> dict[str, PodiumDivision]: """ Generate datas for divisions or for logics @@ -322,12 +372,13 @@ def sq_generate_datas( group_by = "logic" name_of_int = defs.Logic.name_of_int + selection = selection.filter(selected=True) + # TODO it should be done after filter_for - len_by_division = results.group_by(group_by).agg(total=pl.col("file").n_unique()) + len_by_division = selection.group_by(group_by).agg(total=pl.len()) def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, results: pl.LazyFrame) -> pl.LazyFrame: results = smtcomp.scoring.filter_for(kind, config, results) - return ( sort( intersect(results, len_by_division, on=[group_by]) @@ -368,8 +419,8 @@ def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, result if for_division: lf_logics = [ - results.group_by("division", "logic") - .agg(n=pl.col("file").n_unique()) + selection.group_by("division", "logic") + .agg(n=pl.len()) .group_by("division") .agg(logics=pl.struct("logic", "n")) ] @@ -382,7 +433,7 @@ def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, result df = r.collect() - return dict((name_of_int(d[group_by]), make_podium(config, d, for_division, track)) for d in df.to_dicts()) + return dict((name_of_int(d[group_by]), make_podium(config, d, for_division, track, results)) for d in df.to_dicts()) def get_kind(a: PodiumDivision, k: smtcomp.scoring.Kind) -> list[PodiumStep]: @@ -471,7 +522,7 @@ def get_winner(l: List[PodiumStepBiggestLead] | None) -> str: winner_seq = get_winner(sequential) return PodiumBiggestLead( - resultdate="2024-07-08", + resultdate="2025-08-11", year=config.current_year, track=track, results=f"results_{config.current_year}", @@ -589,7 +640,7 @@ def get_winner( winner_seq = get_winner(sequential, scores, data, track) return PodiumBestOverall( - resultdate="2024-07-08", + resultdate="2025-08-11", year=config.current_year, track=track, results=f"results_{config.current_year}", @@ -684,7 +735,7 @@ def timeScore(vws_step: PodiumStep) -> float: steps_seq = ld[smtcomp.scoring.Kind.seq] return PodiumLargestContribution( - resultdate="2024-07-08", + resultdate="2025-08-11", year=config.current_year, track=track, results=f"results_{config.current_year}", @@ -702,7 +753,9 @@ def timeScore(vws_step: PodiumStep) -> float: ) -def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs.Track) -> PodiumLargestContribution: +def largest_contribution( + config: defs.Config, selection: pl.LazyFrame, scores: pl.LazyFrame, track: defs.Track +) -> PodiumLargestContribution: for_division = True # For each solver compute its corresponding best solver # TODO: check what is competitive solver (unsound?) @@ -727,11 +780,10 @@ def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs. pl.min("cpu_time_score"), sound_status=pl.col("sound_status").first(), answer=pl.col("answer").first(), - logic=-1, ) .with_columns(solver=pl.lit("virtual"), error_score=0) ) - virtual_datas = sq_generate_datas(config, virtual_scores, for_division, track) + virtual_datas = sq_generate_datas(config, selection, virtual_scores, for_division, track) # For each solver Compute virtual solver without the solver solvers = scores.select("division", "solver").unique() @@ -749,10 +801,11 @@ def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs. sound_status=pl.col("sound_status").first(), error_score=0, answer=pl.col("answer").first(), - logic=-1, ) ) - virtual_without_solver_datas = sq_generate_datas(config, virtual_without_solver_scores, for_division, track) + virtual_without_solver_datas = sq_generate_datas( + config, selection, virtual_without_solver_scores, for_division, track + ) large = largest_contribution_ranking(config, virtual_datas, virtual_without_solver_datas, ratio_by_division, track) @@ -764,8 +817,7 @@ def largest_contribution(config: defs.Config, scores: pl.LazyFrame, track: defs. return large -def export_results(config: defs.Config, results: pl.LazyFrame, track: defs.Track) -> None: - +def export_results(config: defs.Config, selection: pl.LazyFrame, results: pl.LazyFrame, track: defs.Track) -> None: page_suffix = page_track_suffix(track) dst = config.web_results @@ -780,7 +832,7 @@ def export_results(config: defs.Config, results: pl.LazyFrame, track: defs.Track all_divisions: list[PodiumDivision] = [] for for_division in [True, False]: - datas = sq_generate_datas(config, scores, for_division, track) + datas = sq_generate_datas(config, selection, scores, for_division, track) for name, data in datas.items(): (dst / f"{name.lower()}-{page_suffix}.md").write_text(data.model_dump_json(indent=1)) @@ -795,7 +847,7 @@ def export_results(config: defs.Config, results: pl.LazyFrame, track: defs.Track bigdata = biggest_lead_ranking(config, datas, track) (dst / f"biggest-lead-{page_suffix}.md").write_text(bigdata.model_dump_json(indent=1)) - largedata = largest_contribution(config, scores, track) + largedata = largest_contribution(config, selection, scores, track) (dst / f"largest-contribution-{page_suffix}.md").write_text(largedata.model_dump_json(indent=1)) all_divisions.sort(key=lambda x: x.division) diff --git a/smtcomp/main.py b/smtcomp/main.py index e7496c23..ccfd9a69 100644 --- a/smtcomp/main.py +++ b/smtcomp/main.py @@ -283,7 +283,7 @@ def stats_of_benchexec_results( """ config = defs.Config(data) - selected = smtcomp.results.helper_get_results(config, results, track) + selected, _ = smtcomp.results.helper_get_results(config, results, track) sum_answer = (pl.col("answer") == -1).sum() waiting = (pl.col("answer") == -1).all() @@ -365,7 +365,7 @@ def find_disagreement_results( """ config = defs.Config(data) config.use_previous_results_for_status = use_previous_year_results - selected = smtcomp.results.helper_get_results(config, results, track) + selected, _ = smtcomp.results.helper_get_results(config, results, track) df = ( selected.filter(pl.col("answer").is_in([int(defs.Answer.Sat), int(defs.Answer.Unsat)])) @@ -422,7 +422,7 @@ def scoring_removed_benchmarks( ) -> None: config = defs.Config(data) config.use_previous_results_for_status = use_previous_year_results - results = smtcomp.results.helper_get_results(config, src, track) + results, _ = smtcomp.results.helper_get_results(config, src, track) results = smtcomp.scoring.add_disagreements_info(results, track) @@ -454,7 +454,7 @@ def show_scores( If src is empty use results in data """ config = defs.Config(data) - results = smtcomp.results.helper_get_results(config, src, track) + results, _ = smtcomp.results.helper_get_results(config, src, track) smtcomp.scoring.sanity_check(config, results) @@ -1071,8 +1071,8 @@ def export_results_pages(data: Path, track: defs.Track, results: list[Path] = ty Generate page for results pages in web directory """ config = defs.Config(data) - lf = smtcomp.results.helper_get_results(config, results, track) - smtcomp.generate_website_page.export_results(config, lf, track) + lf, selection = smtcomp.results.helper_get_results(config, results, track) + smtcomp.generate_website_page.export_results(config, selection, lf, track) @app.command() diff --git a/smtcomp/model_validation.py b/smtcomp/model_validation.py index a315f9a6..62eb35a9 100644 --- a/smtcomp/model_validation.py +++ b/smtcomp/model_validation.py @@ -29,7 +29,7 @@ def raise_stack_limit() -> None: resource.setrlimit(resource.RLIMIT_STACK, (soft, hard)) -def check_locally(config: defs.Config, smt2_file: Path, model: str, force_logic_all: bool) -> defs.Validation: +def check_locally(config: defs.Config, smt2_file: Path, model: str) -> defs.Validation: opts: list[str | Path] = [] opts.append(config.dolmen_binary) opts.extend( @@ -42,7 +42,7 @@ def check_locally(config: defs.Config, smt2_file: Path, model: str, force_logic_ "--warn=-all", ] ) - if force_logic_all: + if config.dolmen_force_logic_ALL: opts.append("--force-smtlib2-logic=ALL") opts.append(smt2_file) @@ -94,28 +94,22 @@ def check_result_locally( d = resultdir / "model_validation_results" file_cache = d / f"{str(r.file)}.json.gz" scramble_id = scramble_mapping[r.file] - force_logic_all = False if file_cache.is_file(): - result = defs.ValidationResult.model_validate_json(read_cin(file_cache)).root - match result: - case defs.ValidationError(stderr="E:forbidden-array-sort\n"): - force_logic_all = True + return defs.ValidationResult.model_validate_json(read_cin(file_cache)).root + else: + match r.answer: + case defs.Answer.Sat: + filedir = smtcomp.scramble_benchmarks.benchmark_files_dir(cachedir, rid.track) + basename = smtcomp.scramble_benchmarks.scramble_basename(scramble_id) + smt2_file = filedir / str(r.logic) / basename + val = check_locally(config, smt2_file, model) case _: - return result - - match r.answer: - case defs.Answer.Sat: - filedir = smtcomp.scramble_benchmarks.benchmark_files_dir(cachedir, rid.track) - basename = smtcomp.scramble_benchmarks.scramble_basename(scramble_id) - smt2_file = filedir / str(r.logic) / basename - val = check_locally(config, smt2_file, model, force_logic_all) - case _: - val = defs.noValidation - d.mkdir(parents=True, exist_ok=True) - s = defs.ValidationResult(val).model_dump_json(indent=1) - write_cin(file_cache, s) - return val + val = defs.noValidation + d.mkdir(parents=True, exist_ok=True) + s = defs.ValidationResult(val).model_dump_json(indent=1) + write_cin(file_cache, s) + return val def prepare_model_validation_tasks( diff --git a/smtcomp/results.py b/smtcomp/results.py index 8020c651..d3f2dad0 100644 --- a/smtcomp/results.py +++ b/smtcomp/results.py @@ -160,7 +160,7 @@ def convert_run(r: ET.Element) -> Run | None: def parse_xml(file: Path) -> Results: result = ET.fromstring(read_cin(file)) runs = list(filter(lambda r: r is not None, map(convert_run, result.iterfind("run")))) - return Results(runid=RunId.unmangle(result.attrib["name"]), options=result.attrib["options"], runs=runs) + return Results(runid=RunId.unmangle(result.attrib["name"]), options=result.attrib["options"], runs=runs) # type: ignore def parse_results(resultdir: Path) -> Iterator[Results]: @@ -306,7 +306,7 @@ def to_pl(resultdir: Path, logfiles: LogFile, r: Results) -> pl.LazyFrame: def convert(a: Run) -> Dict[str, Any]: d = dict(a) if r.runid.track == defs.Track.ModelValidation and a.answer == defs.Answer.Sat: - d["answer"] = mv_get_cached_answer(resultdir, a.file) + a.answer = mv_get_cached_answer(resultdir, a.file) if r.runid.track == defs.Track.Incremental: answer, nb_answers, last_time = inc_get_nb_answers(logfiles, r.runid, a.benchmark_yml) @@ -447,7 +447,9 @@ def parse_dir(dir: Path, no_cache: bool) -> pl.LazyFrame: return results -def helper_get_results(config: defs.Config, results: List[Path], track: defs.Track) -> pl.LazyFrame: +def helper_get_results( + config: defs.Config, results: List[Path], track: defs.Track +) -> Tuple[pl.LazyFrame, pl.LazyFrame]: """ If results is empty use the one in data @@ -479,7 +481,7 @@ def helper_get_results(config: defs.Config, results: List[Path], track: defs.Tra lf = lf.drop("logic", "participation") # Hack for participation 0 bug move "participation" to on= for 2025, lf = lf.drop("benchmark_yml", "unsat_core") - selection = smtcomp.selection.helper(config, track).with_columns(track=int(track)) + selection = smtcomp.selection.helper(config, track).filter(selected=True).with_columns(track=int(track)) selection = ( selection.unique() @@ -491,38 +493,13 @@ def helper_get_results(config: defs.Config, results: List[Path], track: defs.Tra .lazy() ) - defaults = { - "division": -1, - "family": -1, - "logic": -1, - "name": "", - "participation": -1, - "selected": True, - } - - if track == defs.Track.Incremental: - defaults["check_sats"] = -1 - else: - defaults["status"] = -1 - defaults["asserts"] = -1 - - if track == defs.Track.Parallel: - defaults["hard"] = True - defaults["unsolved"] = False - else: - defaults["current_result"] = -1 - defaults["new"] = False - defaults["result"] = -1 - defaults["run"] = True - defaults["trivial"] = False - defaults["file_right"] = "" - selected = intersect(selection, smtcomp.selection.solver_competing_logics(config), on=["logic", "track"]) + selected = add_columns( - lf, selected, + lf, on=["file", "solver", "track"], - defaults=defaults, + defaults={"answer": -1, "cputime_s": 0, "memory_B": 0, "walltime_s": 0, "nb_answers": -1}, ) - return selected + return selected, selection diff --git a/smtcomp/scramble_benchmarks.py b/smtcomp/scramble_benchmarks.py index 88138dcd..fc95e7af 100644 --- a/smtcomp/scramble_benchmarks.py +++ b/smtcomp/scramble_benchmarks.py @@ -30,9 +30,9 @@ def unscramble_yml_basename(basename: str) -> int: assert basename[0:9] == "scrambled" assert basename[-5:] == ".smt2" s = basename[9:-5] - file_id, core = list(map(int, s.split("_"))) - file_id += core * 10_000_000 # Hack for unsat_core_verification - return file_id + file_id, core = list(map(int, s.split("_"))) # type: ignore + file_id += core * 10_000_000 # type: ignore # Hack for unsat_core_verification + return file_id # type: ignore def benchmark_files_dir(cachedir: Path, track: defs.Track) -> Path: @@ -57,40 +57,34 @@ def scramble_file(fdict: dict, incremental: bool, srcdir: Path, dstdir: Path, ar i = "incremental" else: i = "non-incremental" - orig_path = ( + fsrc = ( srcdir.joinpath(i) .joinpath(str(defs.Logic.of_int(fdict["logic"]))) .joinpath(Path(fdict["family"])) .joinpath(fdict["name"]) ) dstdir = dstdir.joinpath(str(defs.Logic.of_int(fdict["logic"]))) - scrambled_path = dstdir.joinpath(scramble_basename(fdict["scramble_id"])) + fdst = dstdir.joinpath(scramble_basename(fdict["scramble_id"])) dstdir.mkdir(parents=True, exist_ok=True) if incremental: subprocess.run( ["grep", "-o", "(set-info :status \\(sat\\|unsat\\|unknown\\))"], - stdin=orig_path.open("r"), - stdout=scrambled_path.open("w"), + stdin=fsrc.open("r"), + stdout=fdst.open("w"), ) - subprocess.run(["sed", "-i", "s/(set-info :status \\(.*\\))/\\1/", str(scrambled_path)]) - with scrambled_path.open("a") as dstfile: + subprocess.run(["sed", "-i", "s/(set-info :status \\(.*\\))/\\1/", str(fdst)]) + with fdst.open("a") as dstfile: dstfile.write("--- BENCHMARK BEGINS HERE ---\n") - subprocess.run(args, stdin=orig_path.open("r"), stdout=scrambled_path.open("a")) + subprocess.run(args, stdin=fsrc.open("r"), stdout=fdst.open("a")) else: try: - subprocess.run(args, stdin=orig_path.open("r"), stdout=scrambled_path.open("w"), check=True) + subprocess.run(args, stdin=fsrc.open("r"), stdout=fdst.open("w"), check=True) except subprocess.CalledProcessError as e: - print(f"[red]Warning[/red] scrambler crashed on {orig_path}") + print(f"[red]Warning[/red] scrambler crashed on {fsrc}") - expected = get_expected_result(orig_path) if not incremental else None - - mangled_name = "_".join( - [str(fdict["file"]), str(defs.Logic.of_int(fdict["logic"])), fdict["family"].replace("/", "__"), fdict["name"]] - ) - yaml_dst = dstdir.joinpath(mangled_name).with_suffix(".yml") - - generate_benchmark_yml(yaml_dst, scrambled_path, expected, orig_path.relative_to(srcdir)) + expected = get_expected_result(fsrc) if not incremental else None + generate_benchmark_yml(fdst, expected, fsrc.relative_to(srcdir)) def create_scramble_id(benchmarks: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame: diff --git a/smtcomp/utils.py b/smtcomp/utils.py index 87df9295..6ac6bf94 100644 --- a/smtcomp/utils.py +++ b/smtcomp/utils.py @@ -28,8 +28,8 @@ def g(x: Tuple[W1, W2, U]) -> Tuple[W1, W2, V] | None: def add_columns(dst: pl.LazyFrame, from_: pl.LazyFrame, on: list[str], defaults: Dict[str, Any]) -> pl.LazyFrame: - dst_cols = set(dst.collect_schema().names()) - from_cols = set(from_.collect_schema().names()) + dst_cols = set(dst.columns) + from_cols = set(from_.columns) on_cols = set(on) assert on_cols.issubset(dst_cols) assert on_cols.issubset(from_cols) @@ -43,8 +43,8 @@ def intersect(dst: pl.LazyFrame, from_: pl.LazyFrame, on: list[str]) -> pl.LazyF """ All the possible matches in the two given tables """ - dst_cols = set(dst.collect_schema().names()) - from_cols = set(from_.collect_schema().names()) + dst_cols = set(dst.columns) + from_cols = set(from_.columns) on_cols = set(on) assert on_cols.issubset(dst_cols) assert on_cols.issubset(from_cols) diff --git a/submissions/STP-Parti-Bitwuzla-p16.json b/submissions/STP-Parti-Bitwuzla-p16.json index eb5a13b4..79c8df9e 100644 --- a/submissions/STP-Parti-Bitwuzla-p16.json +++ b/submissions/STP-Parti-Bitwuzla-p16.json @@ -1,5 +1,5 @@ { - "name": "STP-Parti-Bitwuzla-32core", + "name": "STP-Parti-Bitwuzla-16core", "contributors": [ "Mengyu Zhao", "Zhenghang Xu", diff --git a/submissions/STP-Parti-Bitwuzla-p8.json b/submissions/STP-Parti-Bitwuzla-p8.json index a123e462..8e8f3436 100644 --- a/submissions/STP-Parti-Bitwuzla-p8.json +++ b/submissions/STP-Parti-Bitwuzla-p8.json @@ -1,5 +1,5 @@ { - "name": "STP-Parti-Bitwuzla-16core", + "name": "STP-Parti-Bitwuzla-8core", "contributors": [ "Mengyu Zhao", "Zhenghang Xu", diff --git a/submissions/Z3-Parti-Z3pp-p16.json b/submissions/Z3-Parti-Z3pp-p16.json index 03ea07d9..ed933335 100644 --- a/submissions/Z3-Parti-Z3pp-p16.json +++ b/submissions/Z3-Parti-Z3pp-p16.json @@ -1,5 +1,5 @@ { - "name": "Z3-Parti-Z3pp-32core", + "name": "Z3-Parti-Z3pp-16core", "contributors": [ "Mengyu Zhao", "Shaowei Cai" diff --git a/submissions/Z3-Parti-Z3pp-p32.json b/submissions/Z3-Parti-Z3pp-p32.json index 8cc45168..edc79836 100644 --- a/submissions/Z3-Parti-Z3pp-p32.json +++ b/submissions/Z3-Parti-Z3pp-p32.json @@ -1,5 +1,5 @@ { - "name": "Z3-Parti-Z3pp-64core", + "name": "Z3-Parti-Z3pp-32core", "contributors": [ "Mengyu Zhao", "Shaowei Cai" diff --git a/submissions/Z3-Parti-Z3pp-p8.json b/submissions/Z3-Parti-Z3pp-p8.json index 2e24e12c..451aedd8 100644 --- a/submissions/Z3-Parti-Z3pp-p8.json +++ b/submissions/Z3-Parti-Z3pp-p8.json @@ -1,5 +1,5 @@ { - "name": "Z3-Parti-Z3pp-16core", + "name": "Z3-Parti-Z3pp-8core", "contributors": [ "Mengyu Zhao", "Shaowei Cai" diff --git a/submissions/bitwuzla_parallel_16.json b/submissions/bitwuzla_parallel_16.json index 9e8bfd03..f2b6c74e 100644 --- a/submissions/bitwuzla_parallel_16.json +++ b/submissions/bitwuzla_parallel_16.json @@ -1,5 +1,5 @@ { - "name": "Bitwuzla-32core", + "name": "Bitwuzla-16core", "contributors": ["Aina Niemetz", "Mathias Preiner"], "contacts": ["Mathias Preiner "], "final" : true, diff --git a/submissions/bitwuzla_parallel_32.json b/submissions/bitwuzla_parallel_32.json index 7e50a7f5..20bb9a0a 100644 --- a/submissions/bitwuzla_parallel_32.json +++ b/submissions/bitwuzla_parallel_32.json @@ -1,5 +1,5 @@ { - "name": "Bitwuzla-64core", + "name": "Bitwuzla-32core", "contributors": ["Aina Niemetz", "Mathias Preiner"], "contacts": ["Mathias Preiner "], "final" : true, diff --git a/submissions/z3alpha-base.json b/submissions/z3alpha-base.json index d7a879a2..42f99e04 100644 --- a/submissions/z3alpha-base.json +++ b/submissions/z3alpha-base.json @@ -16,7 +16,6 @@ "tracks": ["SingleQuery"], "divisions": [ "Arith", - "Bitvec", "QF_Bitvec", "QF_LinearIntArith", "QF_LinearRealArith", diff --git a/web/content/solver_submission/template.json b/web/content/solver_submission/template.json deleted file mode 120000 index 4f276398..00000000 --- a/web/content/solver_submission/template.json +++ /dev/null @@ -1 +0,0 @@ -../../../submissions/template/template.json \ No newline at end of file diff --git a/web/content/solver_submission/template.json b/web/content/solver_submission/template.json new file mode 100644 index 00000000..14045240 --- /dev/null +++ b/web/content/solver_submission/template.json @@ -0,0 +1,31 @@ +{ + "name": "", + "contributors": [ + "First Smith", + { "name": "Second Baker", "website": "http://baker.com/" } + ], + "contacts": ["contact name "], + "archive": { + "url": "http://example.com/solver.tar.gz", + "h": { "sha256": "012345" } + }, + "website": "http://example.com/", + "system_description": "http://example.com/system.pdf", + "command": ["relative_cmd", "default_command_line"], + "solver_type": "Standalone", + "seed": "42", + "participations": [ + { "tracks": ["SingleQuery"], "divisions": ["Equality"] }, + { + "tracks": ["SingleQuery"], + "logics": "QF_.*LRA.*", + "command": ["relative_cmd", "other_option"] + }, + { + "tracks": ["SingleQuery"], + "logics": ["LIA"], + "archive": { "url": "http://example.com/solver_lia.tar.gz" }, + "command": ["relative_cmd", "--super-lia"] + } + ] +} diff --git a/web/themes/smtcomp/assets/css/main.css b/web/themes/smtcomp/assets/css/main.css index 6d9216c3..8203b2d2 100644 --- a/web/themes/smtcomp/assets/css/main.css +++ b/web/themes/smtcomp/assets/css/main.css @@ -593,7 +593,7 @@ td.left { } td.non-competing-grey { - color: #707070; + color: gray; a { color: #808080; } diff --git a/web/themes/smtcomp/layouts/_default/result.html b/web/themes/smtcomp/layouts/_default/result.html index 8e1e5a4f..4ae2d6e0 100644 --- a/web/themes/smtcomp/layouts/_default/result.html +++ b/web/themes/smtcomp/layouts/_default/result.html @@ -6,20 +6,32 @@ {{ $trueDivision := and (isset .Params "logics") (.Params.logics | len | ne 0) }} {{ $prettyTrack := index .Site.Data.pretty_names.track .Params.track }} + {{ $hasBaseSolvers := false }} + {{ $hasNoncompetingSolvers := false }} + + +{{ if .Params.is_competitive }}

{{ .Params.division }} ({{ $prettyTrack }})

-

- Competition results for the {{ .Params.division }} - {{ if $trueDivision }} division {{ else }} logic {{ end }} - in the {{ $prettyTrack }}. -

+

+ Competition results for the {{ .Params.division }} + {{ if $trueDivision }} division {{ else }} logic {{ end }} + in the {{ $prettyTrack }}. +

Results were generated on {{ .Params.resultdate }}

+

+

+ Benchmarks: {{ .Params.n_benchmarks }}
+ Time Limit: {{ .Params.time_limit }} seconds
+ Memory Limit: {{ .Params.mem_limit }} GB +

+{{ else }} +

{{ .Params.division }} ({{ $prettyTrack }})
non-competitive

+

+ Benchmarks: {{ .Params.n_benchmarks }}
+

+{{ end }} + -

Results were generated on {{ .Params.resultdate }}

-

- Benchmarks: {{ .Params.n_benchmarks }}
- Time Limit: {{ .Params.time_limit }} seconds
- Memory Limit: {{ .Params.mem_limit }} GB -

{{ if $trueDivision }} Logics:
    @@ -40,6 +52,7 @@

    {{ .Params.division }} ({{ $prettyTrack }})

    {{ $winner_category_names := .Site.Data.pretty_names.winner_performance }} {{ $category_names := .Site.Data.pretty_names.performance }} +{{ if .Params.is_competitive }}

    Winners

    @@ -65,6 +78,7 @@

    Winners

    {{ end }}
    +{{ end }} {{ range $cat := $categories }} {{ with index $.Params $cat }} @@ -88,30 +102,61 @@

    {{ index $category_names $cat }} Performance

    {{ range $solver := . }} - - + +{{ if eq $solver.competing "no" }} + {{ $.Scratch.Set "hasNonCompetingSolvers" true }} +{{ end }} + + + + {{ $solver.name }} {{ if eq $solver.competing "no" }}n{{ end }} - {{ $solver.errorScore }} + {{ $solver.errorScore }} {{ if $solver.errorFootnote }} * {{ end }} - {{ $solver.correctScore }} - {{ $solver.CPUScore }} - {{ $solver.WallScore }} - - {{ $solver.solved }} - {{ $solver.solved_sat }} - {{ $solver.solved_unsat }} - {{ $solver.unsolved }} - {{ $solver.abstained }} - - {{ $solver.timeout }} - {{ $solver.memout }} + + {{ $solver.correctScore }} + {{ if not (eq $solver.baseSolver "") }} + {{ $.Scratch.Set "hasBaseSolvers" true }} + {{ if not (lt $solver.deltaBaseSolver 0) }} +
    (base +{{ $solver.deltaBaseSolver }}) + {{ else }} +
    (base {{ $solver.deltaBaseSolver }}) + {{ end }} + {{ end }} + + {{ $solver.CPUScore }} + {{ $solver.WallScore }} + + {{ $solver.solved }} + {{ $solver.solved_sat }} + {{ $solver.solved_unsat }} + {{ $solver.unsolved }} + {{ $solver.abstained }} + + {{ $solver.timeout }} + {{ $solver.memout }} {{ end }} + +{{ if ($.Scratch.Get "hasBaseSolvers") }} + + (base +/- n): for derived solvers: increment over base solver
    +
    +{{ end }} + +{{ if ($.Scratch.Get "hasNonCompetingSolvers") }} + + n: non-competing solver + +{{ end }} + + + {{ end }} {{ end }} diff --git a/web/themes/smtcomp/layouts/_default/result_comp.html b/web/themes/smtcomp/layouts/_default/result_comp.html index dce9e91f..5fd7a800 100644 --- a/web/themes/smtcomp/layouts/_default/result_comp.html +++ b/web/themes/smtcomp/layouts/_default/result_comp.html @@ -107,7 +107,9 @@

    {{ index $category_names $cat }}

    {{ $solver.division }} - {{ $solver.name }}{{ if eq $solver.competing "no" }}n{{ end }} + + {{ $solver.name }}{{ if eq $solver.competing "no" }}n{{ end }} + {{ $solver.contribution}} diff --git a/web/themes/smtcomp/layouts/_default/results.html b/web/themes/smtcomp/layouts/_default/results.html index cfbd8812..33ac64b4 100644 --- a/web/themes/smtcomp/layouts/_default/results.html +++ b/web/themes/smtcomp/layouts/_default/results.html @@ -15,7 +15,7 @@ "UnsatCore" "unsat-core" }} -

    SMT-COMP 2024 Results

    +

    SMT-COMP 2025 Results

    {{ if isset $data "processed" }} The processed data are available in the GitHub repository. diff --git a/web/themes/smtcomp/layouts/_default/results_summary.html b/web/themes/smtcomp/layouts/_default/results_summary.html index 0718a9ff..7766f147 100644 --- a/web/themes/smtcomp/layouts/_default/results_summary.html +++ b/web/themes/smtcomp/layouts/_default/results_summary.html @@ -25,7 +25,7 @@ {{ $categories_pretty := .Site.Data.pretty_names.performance }} -

    SMT-COMP 2024 Results - {{ $prettyTrack }}

    +

    SMT-COMP 2025 Results - {{ $prettyTrack }}

    Summary of all competition results for the {{ $prettyTrack }}. @@ -38,6 +38,8 @@

    SMT-COMP 2024 Results - {{ $prettyTrack }}

    {{ range $division := (sort .Params.divisions "division" "asc") }} {{ $divisionUrl := printf "../%s-%s" ($division.division | lower) $trackUrl }} + +{{ if $division.is_competitive }}

    {{ $division.division }}

    @@ -59,6 +61,7 @@

    {{ $division.division }}

    {{ end }}
    +{{ end }} {{ end }}