diff --git a/.github/workflows/nightly_performance_score_director.yml b/.github/workflows/nightly_performance_score_director.yml new file mode 100644 index 00000000..351e4a06 --- /dev/null +++ b/.github/workflows/nightly_performance_score_director.yml @@ -0,0 +1,24 @@ +name: Schedule a nightly run of the Score Director performance benchmark + +on: + schedule: + - cron: '59 23 * * 1-5' # Every workday at the end of the day. + +jobs: + trigger: + runs-on: ubuntu-latest + steps: + - name: Checkout timefold-solver + uses: actions/checkout@v4 + with: + repository: TimefoldAI/timefold-solver + - name: Schedule the other workflow + shell: bash + run: | + if git log --since="24 hours ago" --oneline | grep -q .; then + echo '{}' | gh workflow run performance_score_director.yml --json + echo "Launched nightly perf tests." >> $GITHUB_STEP_SUMMARY + else + # Don't waste money. + echo "No commits in the past 24 hours." >> $GITHUB_STEP_SUMMARY + fi diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 681699b8..ee2eed81 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -1,51 +1,211 @@ -name: Performance - Score Director +# - Runs entirely on a single machine. +# - The baseline is established first, then the branch under test is measured. +# - Each benchmark gives a 99.9 % confidence interval. +# - The confidence intervals are compared to determine if the branch under test is a regression or an improvement. +# - The error threshold is expected to be below +/- 2.5 %. +# We have yet to see an error of over +/- 4 %. +# With the error so high, the impact is that small regressions are not considered statistically significant. +name: Performance Regression Test - Score Director on: workflow_dispatch: inputs: jdk: - description: 'JDK version (17, 21, 23, ...)' - default: '23' + description: 'JDK version' + default: '21' required: true baseline: description: 'Timefold Solver release' default: '1.14.0' required: true branch: - description: 'Development branch to test against' + description: 'Branch to benchmark (needs to use 999-SNAPSHOT)' default: 'main' required: true branch_owner: description: 'User owning the branch' default: 'TimefoldAI' required: true + async_profiler_version: + description: 'async-profiler version' + default: '3.0' + required: true jobs: - test: - concurrency: - group: perf-score-director-${{ matrix.example }} - cancel-in-progress: true - runs-on: ubuntu-latest + benchmark: + runs-on: perf-linux-x64-2cores strategy: + fail-fast: false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue. matrix: - example: [cloudbalancing, conferencescheduling, curriculumcourse, examination, machinereassignment, meetingscheduling, nurserostering, pas, taskassigning, travelingtournament, tsp, vehiclerouting] + example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing] + env: + MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' + MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' steps: - - uses: sdkman/sdkman-action@v1 + - name: Phase 0 - Checkout timefold-solver-benchmarks + uses: actions/checkout@v4 with: - candidate: java - version: ${{ github.event.inputs.jdk }}-tem - - uses: actions/setup-java@v4 + repository: TimefoldAI/timefold-solver-benchmarks + path: ./timefold-solver-benchmarks + + - name: Phase 0 - Setup JDK and Maven + uses: actions/setup-java@v4 with: - distribution: 'jdkfile' java-version: ${{ github.event.inputs.jdk }} - jdkFile: ${{ steps.sdkman.outputs.file }} - - name: Checkout timefold-solver-benchmarks + distribution: 'temurin' + cache: 'maven' + server-id: 'timefold-solver-enterprise' + server-username: 'MVN_USERNAME' + server-password: 'MVN_PASSWORD' + + - name: Phase 0 - Setup Async Profiler + working-directory: ./timefold-solver-benchmarks + run: | + export FILENAME=async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz + wget https://github.com/async-profiler/async-profiler/releases/download/v${{ github.event.inputs.async_profiler_version }}/$FILENAME + tar -xzf $FILENAME + ls -l + + # Fine-tuned for stability on GHA. + - name: Phase 0 - Configure the benchmark + working-directory: ./timefold-solver-benchmarks + shell: bash + run: | + echo "forks=20" > scoredirector-benchmark.properties + echo "warmup_iterations=10" >> scoredirector-benchmark.properties + echo "measurement_iterations=5" >> scoredirector-benchmark.properties + echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties + echo "score_director_type=cs" >> scoredirector-benchmark.properties + echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties + cat scoredirector-benchmark.properties + chmod +x run-scoredirector.sh + + - name: Phase 1 - Compile the benchmark + working-directory: ./timefold-solver-benchmarks + shell: bash + run: mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" + + - name: Phase 1 - Run the baseline configuration + working-directory: ./timefold-solver-benchmarks + id: benchmark_baseline + env: + RUN_ID: ${{ github.event.inputs.baseline }} + shell: bash + run: | + ./run-scoredirector.sh + echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + + - name: Phase 2 - Checkout timefold-solver uses: actions/checkout@v4 with: - repository: TimefoldAI/timefold-solver-benchmarks - path: ./timefold-solver-benchmarks - - name: Compile the benchmarks + repository: ${{ github.event.inputs.branch_owner }}/timefold-solver + ref: ${{ github.event.inputs.branch }} + path: ./timefold-solver + + - name: Phase 2 - Quickly build timefold-solver + working-directory: ./timefold-solver + shell: bash + run: mvn -B -Dquickly clean install + + # Clone timefold-solver-enterprise + - name: Phase 2 - Checkout timefold-solver-enterprise (Specified) + id: checkout-solver-enterprise + uses: actions/checkout@v4 + continue-on-error: true + with: + repository: TimefoldAI/timefold-solver-enterprise + ref: ${{ github.event.inputs.branch }} + token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} + path: ./timefold-solver-enterprise + - name: Phase 2 - Checkout timefold-solver-enterprise (Fallback) + if: steps.checkout-solver-enterprise.outcome != 'success' + uses: actions/checkout@v4 + with: + repository: TimefoldAI/timefold-solver-enterprise + ref: main + token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} + path: ./timefold-solver-enterprise + + - name: Phase 2 - Quickly build timefold-solver-enterprise + working-directory: ./timefold-solver-enterprise + shell: bash + run: mvn -B -Dquickly clean install + + - name: Phase 2 - Compile the benchmarks + working-directory: ./timefold-solver-benchmarks + shell: bash + run: mvn clean install -B -Dquickly -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" + + - name: Phase 2 - Run the benchmark on the new code + id: benchmark_new + working-directory: ./timefold-solver-benchmarks + env: + RUN_ID: ${{ github.event.inputs.branch }} + shell: bash + run: | + ./run-scoredirector.sh + echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" + + - name: Phase 3 - Archive benchmark data + uses: actions/upload-artifact@v4 + with: + name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} + path: | + ./timefold-solver-benchmarks/results/scoredirector + + - name: Phase 3 - Report results working-directory: ./timefold-solver-benchmarks + env: + OLD_RANGE_START: ${{ steps.benchmark_baseline.outputs.RANGE_START }} + OLD_RANGE_MID: ${{ steps.benchmark_baseline.outputs.RANGE_MID }} + OLD_RANGE_END: ${{ steps.benchmark_baseline.outputs.RANGE_END }} + NEW_RANGE_START: ${{ steps.benchmark_new.outputs.RANGE_START }} + NEW_RANGE_MID: ${{ steps.benchmark_new.outputs.RANGE_MID }} + NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }} shell: bash - run: mvn clean install -Dai.timefold.solver.version=${{ github.event.inputs.baseline }} + run: | + export DIFF_START=$(echo "scale=2; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc) + export DIFF_MID=$(echo "scale=2; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc) + export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc) + export FAIL=false + + if (( $(echo "$DIFF_MID >= 98.00" | bc -l) && $(echo "$DIFF_MID <= 102.00"|bc -l) )); then + # Ignore differences of up to 2 %. + echo "### Performance unchanged" >> $GITHUB_STEP_SUMMARY + echo "(Decided to ignore a very small difference of under 2 %.)" >> $GITHUB_STEP_SUMMARY + else + if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then + if [ "$NEW_RANGE_START" -ge "$OLD_RANGE_MID" ]; then + echo "### 🍀 Possible improvement 🍀" >> $GITHUB_STEP_SUMMARY + elif [ "$OLD_RANGE_END" -le "$NEW_RANGE_MID" ]; then + echo "### ⚠️ Possible regression ⚠️" >> $GITHUB_STEP_SUMMARY + else + echo "### Performance unchanged " >> $GITHUB_STEP_SUMMARY + fi + elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then + echo "### 🚀🚀🚀 Statistically significant improvement 🚀🚀🚀" >> $GITHUB_STEP_SUMMARY + else + echo "### ‼️‼️‼️ Statistically significant regression ‼️‼️‼️" >> $GITHUB_STEP_SUMMARY + export FAIL=true + fi + fi + + echo "| | **Ref** | **Min** | **Mean** | **Max** |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|:-----------:|:-----------------:|:-----------------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY + echo "| _Old_ | [v${{ github.event.inputs.baseline }}](https://github.com/TimefoldAI/timefold-solver/releases/tag/v${{ github.event.inputs.baseline }}) | ${OLD_RANGE_START} | ${OLD_RANGE_MID} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + echo "| _New_ | [${{ github.event.inputs.branch_owner }}'s ${{ github.event.inputs.branch }}](https://github.com/${{ github.event.inputs.branch_owner }}/timefold-solver/tree/${{ github.event.inputs.branch }}) | ${NEW_RANGE_START} | ${NEW_RANGE_MID} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + echo "| _Diff_ | | ${DIFF_START} % | ${DIFF_MID} % | ${DIFF_END} % |" >> $GITHUB_STEP_SUMMARY + + echo "" >> $GITHUB_STEP_SUMMARY + echo "Min and max define a 99.9 % confidence interval." >> $GITHUB_STEP_SUMMARY + echo "Min and max are in operations per second. Higher is better." >> $GITHUB_STEP_SUMMARY + echo "Diff under 100 % represents an improvement, over 100 % a regression." >> $GITHUB_STEP_SUMMARY + + if [ "$FAIL" = true ]; then + exit 1 + fi \ No newline at end of file diff --git a/.github/workflows/turtle.yml b/.github/workflows/turtle.yml index b772709a..dd82cbc1 100644 --- a/.github/workflows/turtle.yml +++ b/.github/workflows/turtle.yml @@ -2,7 +2,7 @@ name: Turtle Tests on: schedule: - - cron: '0 2 * * *' # Every day at 2am UTC + - cron: '0 3 * * *' # Every day at 3am UTC jobs: test: diff --git a/pom.xml b/pom.xml index 2d784166..b74cecbf 100644 --- a/pom.xml +++ b/pom.xml @@ -34,8 +34,8 @@ - ai.timefold.solver - timefold-solver-build-parent + ai.timefold.solver.enterprise + timefold-solver-enterprise-build-parent ${version.ai.timefold.solver} pom import @@ -58,7 +58,6 @@ ai.timefold.solver.enterprise timefold-solver-enterprise-core - ${version.ai.timefold.solver} ai.timefold.solver @@ -124,7 +123,6 @@ ai.timefold.solver timefold-solver-core - ${version.ai.timefold.solver} test-jar test @@ -167,6 +165,13 @@ 3.13.0 ${java.release} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + @@ -207,7 +212,7 @@ ai.timefold.solver timefold-solver-ide-config - ${project.version} + ${version.ai.timefold.solver} @@ -242,6 +247,18 @@ + + quickly + + + quickly + + + + true + true + + jmh diff --git a/run-coldstart.sh b/run-coldstart.sh index 1ab4f2c2..ebeb6fc5 100755 --- a/run-coldstart.sh +++ b/run-coldstart.sh @@ -1,4 +1,4 @@ #!/bin/bash sudo -i sysctl kernel.perf_event_paranoid=1 sudo -i sysctl kernel.kptr_restrict=0 -nohup taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.coldstart.Main > target/nohup.out 2>&1 & \ No newline at end of file +java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.coldstart.Main \ No newline at end of file diff --git a/run-scoredirector.sh b/run-scoredirector.sh index 0bf05605..49c71f27 100755 --- a/run-scoredirector.sh +++ b/run-scoredirector.sh @@ -1,4 +1,4 @@ #!/bin/bash sudo -i sysctl kernel.perf_event_paranoid=1 sudo -i sysctl kernel.kptr_restrict=0 -nohup taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.scoredirector.Main > target/nohup.out 2>&1 & \ No newline at end of file +java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.scoredirector.Main \ No newline at end of file diff --git a/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java b/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java index 07b8423b..ac394a52 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java +++ b/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java @@ -147,9 +147,7 @@ private void readDepartmentListAndDepartmentSpecialismList() throws IOException List departmentList = new ArrayList<>(departmentListSize); idToDepartmentMap = new HashMap<>(departmentListSize); - List departmentSpecialismList = - new ArrayList<>( - departmentListSize * 5); + List departmentSpecialismList = new ArrayList<>(departmentListSize * 5); long departmentSpecialismId = 0L; for (int i = 0; i < departmentListSize; i++) { String line = bufferedReader.readLine(); @@ -229,8 +227,7 @@ private void readRoomListAndRoomSpecialismListAndRoomEquipmentList() throws IOEx String line = bufferedReader.readLine(); String[] lineTokens = splitByPipelineAndTrim(line, 6); String[] roomTokens = splitBySpace(lineTokens[0], 2); - Department department = idToDepartmentMap.get( - Long.parseLong(lineTokens[2])); + Department department = idToDepartmentMap.get(Long.parseLong(lineTokens[2])); Room room = new Room(Long.parseLong(roomTokens[0]), roomTokens[1], department, Integer.parseInt(lineTokens[1]), diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java b/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java index bfc99821..63db61c5 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java @@ -38,7 +38,6 @@ import ai.timefold.solver.benchmarks.micro.coldstart.jmh.TimeToSolverFactoryBenchmark; import ai.timefold.solver.benchmarks.micro.common.AbstractMain; -import org.openjdk.jmh.results.Result; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.ChainedOptionsBuilder; @@ -76,7 +75,7 @@ public static void main(String[] args) throws RunnerException, IOException { var relativeScoreErrorThreshold = configuration.getRelativeScoreErrorThreshold(); var thresholdForPrint = ((int) Math.round(relativeScoreErrorThreshold * 10_000)) / 100.0D; runResults.forEach(result -> { - Result primaryResult = result.getPrimaryResult(); + var primaryResult = result.getPrimaryResult(); var score = primaryResult.getScore(); var scoreError = primaryResult.getScoreError(); var relativeScoreError = scoreError / score; diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java index d73a9172..3a04c0dd 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java @@ -38,6 +38,7 @@ import java.time.Instant; import java.time.ZoneId; import java.util.Arrays; +import java.util.Objects; import java.util.Optional; import java.util.Properties; @@ -58,7 +59,9 @@ public abstract class AbstractMain { protected AbstractMain(String subpackage) { this.subpackage = subpackage; - this.resultsDirectory = Path.of("results", subpackage, getTimestamp()); + var runId = Objects.requireNonNullElse(System.getenv("RUN_ID"), getTimestamp()) + .strip(); + this.resultsDirectory = Path.of("results", subpackage, runId); resultsDirectory.toFile().mkdirs(); } @@ -175,9 +178,9 @@ public ChainedOptionsBuilder getBaseJmhConfig(C configuration) { .forks(configuration.getForkCount()) .warmupIterations(configuration.getWarmupIterations()) .measurementIterations(configuration.getMeasurementIterations()) - .jvmArgs("-XX:+UseParallelGC", "-Xms1g", "-Xmx1g") // Minimize GC overhead. - .result(resultsDirectory.resolve("benchmarkResults.csv").toAbsolutePath().toString()) - .resultFormat(ResultFormatType.CSV) + .jvmArgs("-XX:+UseParallelGC", "-Xmx2g") // Stable, predictable GC pause times. + .result(resultsDirectory.resolve("results.json").toAbsolutePath().toString()) + .resultFormat(ResultFormatType.JSON) .shouldDoGC(true); } diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java b/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java index 1bf05aed..55b39dbc 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java @@ -36,7 +36,6 @@ import ai.timefold.solver.benchmarks.micro.common.AbstractMain; -import org.openjdk.jmh.results.Result; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.ChainedOptionsBuilder; @@ -77,7 +76,7 @@ public static void main(String[] args) throws RunnerException, IOException { var relativeScoreErrorThreshold = configuration.getRelativeScoreErrorThreshold(); var thresholdForPrint = ((int) Math.round(relativeScoreErrorThreshold * 10_000)) / 100.0D; runResults.forEach(result -> { - Result primaryResult = result.getPrimaryResult(); + var primaryResult = result.getPrimaryResult(); var score = primaryResult.getScore(); var scoreError = primaryResult.getScoreError(); var relativeScoreError = scoreError / score; diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index fb3c09cc..dac603bb 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -8,7 +8,7 @@ - +