From 1a0662b6a94abf99a0f2ac17a7496a6436b9ee4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:00:51 +0200 Subject: [PATCH 01/62] Set up SDKMAN manually --- .../workflows/performance_score_director.yml | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 681699b8..01343af1 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -4,8 +4,8 @@ on: workflow_dispatch: inputs: jdk: - description: 'JDK version (17, 21, 23, ...)' - default: '23' + description: 'JDK version' + default: '23-tem' required: true baseline: description: 'Timefold Solver release' @@ -31,15 +31,14 @@ jobs: matrix: example: [cloudbalancing, conferencescheduling, curriculumcourse, examination, machinereassignment, meetingscheduling, nurserostering, pas, taskassigning, travelingtournament, tsp, vehiclerouting] steps: - - uses: sdkman/sdkman-action@v1 - with: - candidate: java - version: ${{ github.event.inputs.jdk }}-tem - - uses: actions/setup-java@v4 - with: - distribution: 'jdkfile' - java-version: ${{ github.event.inputs.jdk }} - jdkFile: ${{ steps.sdkman.outputs.file }} + - name: Setup SDKMAN + run: | + curl -s "https://get.sdkman.io" | bash + source "$HOME/.sdkman/bin/sdkman-init.sh" + sdkman install java ${{ github.event.inputs.jdk }} + java -version + sdkman install maven + mvn -version - name: Checkout timefold-solver-benchmarks uses: actions/checkout@v4 with: From 03d2c40e576a9b2d71154f149c882165a90e2592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:01:51 +0200 Subject: [PATCH 02/62] Fix --- .github/workflows/performance_score_director.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 01343af1..8eecc402 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -35,9 +35,9 @@ jobs: run: | curl -s "https://get.sdkman.io" | bash source "$HOME/.sdkman/bin/sdkman-init.sh" - sdkman install java ${{ github.event.inputs.jdk }} + sdk install java ${{ github.event.inputs.jdk }} java -version - sdkman install maven + sdk install maven mvn -version - name: Checkout timefold-solver-benchmarks uses: actions/checkout@v4 From a5cc15aba40fac20044d75e1db2120538d972fc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:06:56 +0200 Subject: [PATCH 03/62] Grab async-profiler --- .github/workflows/performance_score_director.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 8eecc402..8413b72d 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -19,6 +19,10 @@ on: description: 'User owning the branch' default: 'TimefoldAI' required: true + async_profiler_version: + description: 'async-profiler version' + default: '3.0' + required: true jobs: @@ -39,12 +43,20 @@ jobs: java -version sdk install maven mvn -version + - name: Checkout timefold-solver-benchmarks uses: actions/checkout@v4 with: repository: TimefoldAI/timefold-solver-benchmarks path: ./timefold-solver-benchmarks + + - name: Setup Async Profiler + path: ./timefold-solver-benchmarks + run: | + wget https://github.com/async-profiler/async-profiler/releases/download/v${{ github.event.inputs.async_profiler_version }}/async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz + tar -xzf async-profiler-3.0-linux-x64.tar.gz + - name: Compile the benchmarks working-directory: ./timefold-solver-benchmarks shell: bash - run: mvn clean install -Dai.timefold.solver.version=${{ github.event.inputs.baseline }} + run: mvn clean install -Dai.timefold.solver.version=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" From a0886b4a7337c26b4e9ab01b104042c215fe09d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:08:07 +0200 Subject: [PATCH 04/62] Working directory --- .github/workflows/performance_score_director.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 8413b72d..966b27b2 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -51,7 +51,7 @@ jobs: path: ./timefold-solver-benchmarks - name: Setup Async Profiler - path: ./timefold-solver-benchmarks + working-directory: ./timefold-solver-benchmarks run: | wget https://github.com/async-profiler/async-profiler/releases/download/v${{ github.event.inputs.async_profiler_version }}/async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz tar -xzf async-profiler-3.0-linux-x64.tar.gz From b3f3e30e53ad1831dd1f6c1d953436fe2daebcf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:11:21 +0200 Subject: [PATCH 05/62] Fix --- .github/workflows/performance_score_director.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 966b27b2..a4a592cc 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -27,9 +27,6 @@ on: jobs: test: - concurrency: - group: perf-score-director-${{ matrix.example }} - cancel-in-progress: true runs-on: ubuntu-latest strategy: matrix: @@ -53,8 +50,10 @@ jobs: - name: Setup Async Profiler working-directory: ./timefold-solver-benchmarks run: | - wget https://github.com/async-profiler/async-profiler/releases/download/v${{ github.event.inputs.async_profiler_version }}/async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz - tar -xzf async-profiler-3.0-linux-x64.tar.gz + export FILENAME=async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz + wget https://github.com/async-profiler/async-profiler/releases/download/v${{ github.event.inputs.async_profiler_version }}/$FILENAME + tar -xzf $FILENAME + ls -l - name: Compile the benchmarks working-directory: ./timefold-solver-benchmarks From 1bd9fe1297a57fa43a763bb09ee3bcfcec593978 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:18:35 +0200 Subject: [PATCH 06/62] Setup --- .../workflows/performance_score_director.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index a4a592cc..5a65ab49 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -5,7 +5,7 @@ on: inputs: jdk: description: 'JDK version' - default: '23-tem' + default: '23' required: true baseline: description: 'Timefold Solver release' @@ -32,14 +32,15 @@ jobs: matrix: example: [cloudbalancing, conferencescheduling, curriculumcourse, examination, machinereassignment, meetingscheduling, nurserostering, pas, taskassigning, travelingtournament, tsp, vehiclerouting] steps: - - name: Setup SDKMAN - run: | - curl -s "https://get.sdkman.io" | bash - source "$HOME/.sdkman/bin/sdkman-init.sh" - sdk install java ${{ github.event.inputs.jdk }} - java -version - sdk install maven - mvn -version + - name: Setup JDK and Maven + uses: actions/setup-java@v4 + with: + java-version: ${{ github.event.inputs.jdk }} + distribution: 'temurin' + cache: 'maven' + server-id: 'timefold-solver-enterprise' + server-username: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }} + server-password: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }} - name: Checkout timefold-solver-benchmarks uses: actions/checkout@v4 From 555d97cc33952cef7f3893ca4ec9ff5ae97064af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:19:55 +0200 Subject: [PATCH 07/62] Inverse --- .github/workflows/performance_score_director.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 5a65ab49..2ba75c5c 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -32,6 +32,12 @@ jobs: matrix: example: [cloudbalancing, conferencescheduling, curriculumcourse, examination, machinereassignment, meetingscheduling, nurserostering, pas, taskassigning, travelingtournament, tsp, vehiclerouting] steps: + - name: Checkout timefold-solver-benchmarks + uses: actions/checkout@v4 + with: + repository: TimefoldAI/timefold-solver-benchmarks + path: ./timefold-solver-benchmarks + - name: Setup JDK and Maven uses: actions/setup-java@v4 with: @@ -42,12 +48,6 @@ jobs: server-username: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }} server-password: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }} - - name: Checkout timefold-solver-benchmarks - uses: actions/checkout@v4 - with: - repository: TimefoldAI/timefold-solver-benchmarks - path: ./timefold-solver-benchmarks - - name: Setup Async Profiler working-directory: ./timefold-solver-benchmarks run: | From 6671ba91c1cbde8cf0154962da7023b3eb678d53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:21:21 +0200 Subject: [PATCH 08/62] Solver version --- .github/workflows/performance_score_director.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 2ba75c5c..21c20ef9 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -59,4 +59,4 @@ jobs: - name: Compile the benchmarks working-directory: ./timefold-solver-benchmarks shell: bash - run: mvn clean install -Dai.timefold.solver.version=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" + run: mvn clean install -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" From fa821c568a5e2bb066b19dec378d205923e54d0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:30:12 +0200 Subject: [PATCH 09/62] Fix --- .github/workflows/performance_score_director.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 21c20ef9..3ba3834f 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -59,4 +59,4 @@ jobs: - name: Compile the benchmarks working-directory: ./timefold-solver-benchmarks shell: bash - run: mvn clean install -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" + run: mvn clean install -B -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" From 38fca109b6d9ced4388812259e451f3b88dc3c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:38:27 +0200 Subject: [PATCH 10/62] Fix parent --- pom.xml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 2d784166..3064dcb7 100644 --- a/pom.xml +++ b/pom.xml @@ -34,8 +34,8 @@ - ai.timefold.solver - timefold-solver-build-parent + ai.timefold.solver.enterprise + timefold-solver-enterprise-build-parent ${version.ai.timefold.solver} pom import @@ -58,7 +58,6 @@ ai.timefold.solver.enterprise timefold-solver-enterprise-core - ${version.ai.timefold.solver} ai.timefold.solver @@ -124,7 +123,6 @@ ai.timefold.solver timefold-solver-core - ${version.ai.timefold.solver} test-jar test From 9f8b4026cf97a9843673c4d043b8d35f26bb0268 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:43:56 +0200 Subject: [PATCH 11/62] Fix auth --- .github/workflows/performance_score_director.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 3ba3834f..82df6350 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -40,13 +40,16 @@ jobs: - name: Setup JDK and Maven uses: actions/setup-java@v4 + env: + MVN_USERNAME: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }} + MVN_PASSWORD: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }} with: java-version: ${{ github.event.inputs.jdk }} distribution: 'temurin' cache: 'maven' server-id: 'timefold-solver-enterprise' - server-username: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }} - server-password: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }} + server-username: 'MVN_USERNAME' + server-password: 'MVN_PASSWORD' - name: Setup Async Profiler working-directory: ./timefold-solver-benchmarks From 18951a3093deb31e79718797a06b4bc32c00f0ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:48:28 +0200 Subject: [PATCH 12/62] Fix auth? --- .github/workflows/performance_score_director.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 82df6350..4599a41c 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -31,6 +31,9 @@ jobs: strategy: matrix: example: [cloudbalancing, conferencescheduling, curriculumcourse, examination, machinereassignment, meetingscheduling, nurserostering, pas, taskassigning, travelingtournament, tsp, vehiclerouting] + env: + MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' + MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' steps: - name: Checkout timefold-solver-benchmarks uses: actions/checkout@v4 @@ -40,9 +43,6 @@ jobs: - name: Setup JDK and Maven uses: actions/setup-java@v4 - env: - MVN_USERNAME: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }} - MVN_PASSWORD: ${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }} with: java-version: ${{ github.event.inputs.jdk }} distribution: 'temurin' From b1dae8763853fd56f6289018081e5df102fc0acf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:52:06 +0200 Subject: [PATCH 13/62] Quickly --- .github/workflows/performance_score_director.yml | 2 +- pom.xml | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 4599a41c..bb252036 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -62,4 +62,4 @@ jobs: - name: Compile the benchmarks working-directory: ./timefold-solver-benchmarks shell: bash - run: mvn clean install -B -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" + run: mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" diff --git a/pom.xml b/pom.xml index 3064dcb7..eb98a99d 100644 --- a/pom.xml +++ b/pom.xml @@ -240,6 +240,18 @@ + + quickly + + + quickly + + + + true + true + + jmh From 32e8bda5d8213d41ac7517535f3e946894c1cd4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 09:55:44 +0200 Subject: [PATCH 14/62] Quickly? --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index eb98a99d..bfd31bce 100644 --- a/pom.xml +++ b/pom.xml @@ -205,7 +205,7 @@ ai.timefold.solver timefold-solver-ide-config - ${project.version} + ${version.ai.timefold.solver} From 1b15e16e9e9ceae6d46901f0fcdbc915038c7372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 10:02:42 +0200 Subject: [PATCH 15/62] Run the baseline --- .github/workflows/performance_score_director.yml | 11 +++++++++++ pom.xml | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index bb252036..68ec75cf 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -63,3 +63,14 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" + + - name: Run the baseline configuration + working-directory: ./timefold-solver-benchmarks + shell: bash + run: | + echo "score_director_type=cs" > scoredirector-benchmark.properties + echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties + cat scoredirector-benchmark.properties + chmod +x run-scoredirector.sh + ./run-scoredirector.sh + diff --git a/pom.xml b/pom.xml index bfd31bce..39e60fee 100644 --- a/pom.xml +++ b/pom.xml @@ -249,7 +249,7 @@ true - true + true From 0c6f53cc7b11cdb30a3b5f5e870f17bb6f58e272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 10:05:19 +0200 Subject: [PATCH 16/62] Wait for the thing to exit --- run-coldstart.sh | 2 +- run-scoredirector.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/run-coldstart.sh b/run-coldstart.sh index 1ab4f2c2..24339d18 100755 --- a/run-coldstart.sh +++ b/run-coldstart.sh @@ -1,4 +1,4 @@ #!/bin/bash sudo -i sysctl kernel.perf_event_paranoid=1 sudo -i sysctl kernel.kptr_restrict=0 -nohup taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.coldstart.Main > target/nohup.out 2>&1 & \ No newline at end of file +taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.coldstart.Main \ No newline at end of file diff --git a/run-scoredirector.sh b/run-scoredirector.sh index 0bf05605..8aa39bbd 100755 --- a/run-scoredirector.sh +++ b/run-scoredirector.sh @@ -1,4 +1,4 @@ #!/bin/bash sudo -i sysctl kernel.perf_event_paranoid=1 sudo -i sysctl kernel.kptr_restrict=0 -nohup taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.scoredirector.Main > target/nohup.out 2>&1 & \ No newline at end of file +taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.scoredirector.Main \ No newline at end of file From 29448a844eabdd533f5de040af73f70132f6571f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 10:07:54 +0200 Subject: [PATCH 17/62] Fix names --- .github/workflows/performance_score_director.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 68ec75cf..14db6d3b 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -30,7 +30,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - example: [cloudbalancing, conferencescheduling, curriculumcourse, examination, machinereassignment, meetingscheduling, nurserostering, pas, taskassigning, travelingtournament, tsp, vehiclerouting] + example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, pas, task_assigning, traveling_tournament, tsp, vehicle_routing] env: MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' From 33054599bb26a933ec9fec29c41a69f14e824ae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 10:22:38 +0200 Subject: [PATCH 18/62] Include annotation processor --- pom.xml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pom.xml b/pom.xml index 39e60fee..b74cecbf 100644 --- a/pom.xml +++ b/pom.xml @@ -165,6 +165,13 @@ 3.13.0 ${java.release} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + From f23e9b6930907539057b605827e4993ca798608e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 10:27:30 +0200 Subject: [PATCH 19/62] More fixes --- .github/workflows/performance_score_director.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 14db6d3b..600c6f67 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -30,7 +30,10 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, pas, task_assigning, traveling_tournament, tsp, vehicle_routing] + example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing] + concurrency: + group: turtle-${{ matrix.example }} + cancel-in-progress: true env: MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' From 6fe642244b52b65f94598df3c31541d046b0cd0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 10:52:36 +0200 Subject: [PATCH 20/62] Second line --- .../workflows/performance_score_director.yml | 69 ++++++++++++++++--- .../benchmarks/micro/coldstart/Main.java | 5 +- 2 files changed, 61 insertions(+), 13 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 600c6f67..f79d5e84 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -5,7 +5,7 @@ on: inputs: jdk: description: 'JDK version' - default: '23' + default: '21' required: true baseline: description: 'Timefold Solver release' @@ -38,13 +38,13 @@ jobs: MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' steps: - - name: Checkout timefold-solver-benchmarks + - name: Phase 0 - Checkout timefold-solver-benchmarks uses: actions/checkout@v4 with: repository: TimefoldAI/timefold-solver-benchmarks path: ./timefold-solver-benchmarks - - name: Setup JDK and Maven + - name: Phase 0 - Setup JDK and Maven uses: actions/setup-java@v4 with: java-version: ${{ github.event.inputs.jdk }} @@ -54,7 +54,7 @@ jobs: server-username: 'MVN_USERNAME' server-password: 'MVN_PASSWORD' - - name: Setup Async Profiler + - name: Phase 0 - Setup Async Profiler working-directory: ./timefold-solver-benchmarks run: | export FILENAME=async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz @@ -62,12 +62,7 @@ jobs: tar -xzf $FILENAME ls -l - - name: Compile the benchmarks - working-directory: ./timefold-solver-benchmarks - shell: bash - run: mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" - - - name: Run the baseline configuration + - name: Phase 0 - Prepare the benchmarks working-directory: ./timefold-solver-benchmarks shell: bash run: | @@ -75,5 +70,57 @@ jobs: echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties chmod +x run-scoredirector.sh - ./run-scoredirector.sh + + - name: Phase 1 - Compile the benchmarks + working-directory: ./timefold-solver-benchmarks + shell: bash + run: mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" + + # - name: Phase 1 - Run the baseline configuration + # working-directory: ./timefold-solver-benchmarks + # shell: bash + # run: | + # ./run-scoredirector.sh + - name: Phase 2 - Checkout timefold-solver + uses: actions/checkout@v4 + with: + repository: ${{ github.event.inputs.branch_owner }}/timefold-solver + ref: ${{ github.event.inputs.branch }} + path: ./timefold-solver + + - name: Phase 2 - Quickly build timefold-solver + working-directory: ./timefold-solver + shell: bash + run: mvn -B -Dquickly clean install + + - name: Phase 2 - Checkout timefold-solver-enterprise + uses: actions/checkout@v4 + with: + repository: TimefoldAI/timefold-solver-enterprise + ref: ${{ github.event.inputs.branch }} + token: ${{ secrets.JRELEASER_GITHUB_TOKEN }} # Safe; only used to clone the repo and not stored in the fork. + path: ./timefold-solver-enterprise + + - name: Phase 2 - Quickly build timefold-solver-enterprise + working-directory: ./timefold-solver-enterprise + shell: bash + run: mvn -B -Dquickly clean install + + - name: Phase 2 - Compile the benchmarks + working-directory: ./timefold-solver-benchmarks + shell: bash + run: mvn clean install -B -Dquickly -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" + + - name: Phase 2 - Run the baseline configuration + working-directory: ./timefold-solver-benchmarks + shell: bash + run: | + ./run-scoredirector.sh + + - name: Phase 3 - Archive production artifacts + uses: actions/upload-artifact@v4 + with: + name: results-${{ matrix.example }} + path: | + ./timefold-solver-benchmarks/results diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java b/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java index bfc99821..5b2b3dfa 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java @@ -85,8 +85,9 @@ public static void main(String[] args) throws RunnerException, IOException { var benchmarkName = benchParams.getBenchmark() + " " + benchParams.getParam("example"); var relativeScoreErrorForPrint = ((int) Math.round(relativeScoreError * 10_000)) / 100.0D; if (relativeScoreError > relativeScoreErrorThreshold) { - LOGGER.warn("Score error for '{}' is too high: ± {} % (threshold: ± {} %).", benchmarkName, - relativeScoreErrorForPrint, thresholdForPrint); + throw new IllegalStateException("Score error for '%s' is too high: ± %s pct (threshold: ± %s pct)." + .formatted(benchmarkName, relativeScoreErrorForPrint, thresholdForPrint)); + } else if (relativeScoreError > (relativeScoreErrorThreshold * 0.9)) { LOGGER.info("Score error for '{}' approaching threshold: ± {} % (threshold: ± {} %).", benchmarkName, relativeScoreErrorForPrint, thresholdForPrint); From a88c1aff5a3621eb4c011267ab1c726215d8190a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 11:01:07 +0200 Subject: [PATCH 21/62] Fail when benchmark over threshold --- .github/workflows/performance_score_director.yml | 2 +- .../solver/benchmarks/micro/coldstart/Main.java | 12 +++++++++--- .../solver/benchmarks/micro/scoredirector/Main.java | 10 ++++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index f79d5e84..ef39ef05 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -12,7 +12,7 @@ on: default: '1.14.0' required: true branch: - description: 'Development branch to test against' + description: 'Branch to benchmark (needs to use 999-SNAPSHOT)' default: 'main' required: true branch_owner: diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java b/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java index 5b2b3dfa..8b2a3515 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.concurrent.atomic.AtomicBoolean; import ai.timefold.solver.benchmarks.micro.coldstart.jmh.TimeToFirstScoreBenchmark; import ai.timefold.solver.benchmarks.micro.coldstart.jmh.TimeToSolverFactoryBenchmark; @@ -75,6 +76,7 @@ public static void main(String[] args) throws RunnerException, IOException { var relativeScoreErrorThreshold = configuration.getRelativeScoreErrorThreshold(); var thresholdForPrint = ((int) Math.round(relativeScoreErrorThreshold * 10_000)) / 100.0D; + var wasSuccess = new AtomicBoolean(true); runResults.forEach(result -> { Result primaryResult = result.getPrimaryResult(); var score = primaryResult.getScore(); @@ -85,14 +87,18 @@ public static void main(String[] args) throws RunnerException, IOException { var benchmarkName = benchParams.getBenchmark() + " " + benchParams.getParam("example"); var relativeScoreErrorForPrint = ((int) Math.round(relativeScoreError * 10_000)) / 100.0D; if (relativeScoreError > relativeScoreErrorThreshold) { - throw new IllegalStateException("Score error for '%s' is too high: ± %s pct (threshold: ± %s pct)." - .formatted(benchmarkName, relativeScoreErrorForPrint, thresholdForPrint)); + LOGGER.error("Score error for '{}' is too high: ± {} % (threshold: ± {} %).", benchmarkName, + relativeScoreErrorForPrint, thresholdForPrint); + wasSuccess.set(false); } else if (relativeScoreError > (relativeScoreErrorThreshold * 0.9)) { - LOGGER.info("Score error for '{}' approaching threshold: ± {} % (threshold: ± {} %).", benchmarkName, + LOGGER.warn("Score error for '{}' approaching threshold: ± {} % (threshold: ± {} %).", benchmarkName, relativeScoreErrorForPrint, thresholdForPrint); } }); + if (wasSuccess.get()) { + System.exit(1); + } } private static ChainedOptionsBuilder processBenchmark(ChainedOptionsBuilder options, Configuration configuration) { diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java b/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java index 1bf05aed..5afad126 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.concurrent.atomic.AtomicBoolean; import ai.timefold.solver.benchmarks.micro.common.AbstractMain; @@ -85,13 +86,18 @@ public static void main(String[] args) throws RunnerException, IOException { var benchParams = result.getParams(); var benchmarkName = benchParams.getBenchmark() + " " + benchParams.getParam("csExample"); var relativeScoreErrorForPrint = ((int) Math.round(relativeScoreError * 10_000)) / 100.0D; + var wasSuccess = new AtomicBoolean(true); if (relativeScoreError > relativeScoreErrorThreshold) { - LOGGER.warn("Score error for '{}' is too high: ± {} % (threshold: ± {} %).", benchmarkName, + LOGGER.error("Score error for '{}' is too high: ± {} % (threshold: ± {} %).", benchmarkName, relativeScoreErrorForPrint, thresholdForPrint); + wasSuccess.set(false); } else if (relativeScoreError > (relativeScoreErrorThreshold * 0.9)) { - LOGGER.info("Score error for '{}' approaching threshold: ± {} % (threshold: ± {} %).", benchmarkName, + LOGGER.warn("Score error for '{}' approaching threshold: ± {} % (threshold: ± {} %).", benchmarkName, relativeScoreErrorForPrint, thresholdForPrint); } + if (!wasSuccess.get()) { + System.exit(1); + } }); } From 87492158e9132cf0eecdc0cd9769f3be508516fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 11:33:05 +0200 Subject: [PATCH 22/62] RUN_ID --- .github/workflows/performance_score_director.yml | 14 ++++++++------ .../benchmarks/micro/common/AbstractMain.java | 3 +++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index ef39ef05..d4a07bbf 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -76,11 +76,12 @@ jobs: shell: bash run: mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" - # - name: Phase 1 - Run the baseline configuration - # working-directory: ./timefold-solver-benchmarks - # shell: bash - # run: | - # ./run-scoredirector.sh + - name: Phase 1 - Run the baseline configuration + working-directory: ./timefold-solver-benchmarks + shell: bash + run: | + EXPORT RUN_ID="${{github.workflow.inputs.baseline}}" + ./run-scoredirector.sh - name: Phase 2 - Checkout timefold-solver uses: actions/checkout@v4 @@ -116,11 +117,12 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | + EXPORT RUN_ID="${{github.workflow.inputs.branch}}" ./run-scoredirector.sh - name: Phase 3 - Archive production artifacts uses: actions/upload-artifact@v4 with: - name: results-${{ matrix.example }} + name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} path: | ./timefold-solver-benchmarks/results diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java index d73a9172..da29ec68 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java @@ -54,10 +54,13 @@ public abstract class AbstractMain { protected final Logger LOGGER = LoggerFactory.getLogger(getClass()); private final String subpackage; + private final String runId; private final Path resultsDirectory; protected AbstractMain(String subpackage) { this.subpackage = subpackage; + var runId = System.getenv("RUN_ID"); + this.runId = runId == null || runId.isBlank() ? getTimestamp() : runId.strip(); this.resultsDirectory = Path.of("results", subpackage, getTimestamp()); resultsDirectory.toFile().mkdirs(); } From a465f95e32755dc570d9d4e1d131502f3c843232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 11:34:52 +0200 Subject: [PATCH 23/62] Export --- .github/workflows/performance_score_director.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index d4a07bbf..e9295308 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -80,7 +80,7 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - EXPORT RUN_ID="${{github.workflow.inputs.baseline}}" + export RUN_ID="${{github.workflow.inputs.baseline}}" ./run-scoredirector.sh - name: Phase 2 - Checkout timefold-solver @@ -117,7 +117,7 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - EXPORT RUN_ID="${{github.workflow.inputs.branch}}" + export RUN_ID="${{github.workflow.inputs.branch}}" ./run-scoredirector.sh - name: Phase 3 - Archive production artifacts From d066707538c59c7a894bc6fa9befd96f8c4a40a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 11:57:08 +0200 Subject: [PATCH 24/62] No concurrency group --- .github/workflows/performance_score_director.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index e9295308..81fe0282 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -1,4 +1,9 @@ -name: Performance - Score Director +# - Runs entirely on a single machine. +# - The baseline is established first, then the branch under test is measured. +# - Both runs fail if the benchmark error is over predefined thresholds. +# - Then, if both are below thresholds and neither failed, those results must be directly comparable. +# - Therefore, if the difference between the two is over the threshold, then the branch is considered to have regressed. +name: Performance Regression Test - Score Director on: workflow_dispatch: @@ -31,9 +36,6 @@ jobs: strategy: matrix: example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing] - concurrency: - group: turtle-${{ matrix.example }} - cancel-in-progress: true env: MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' @@ -113,7 +115,7 @@ jobs: shell: bash run: mvn clean install -B -Dquickly -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" - - name: Phase 2 - Run the baseline configuration + - name: Phase 2 - Run the new configuration working-directory: ./timefold-solver-benchmarks shell: bash run: | From 6ab672d049718d2bbda9cf5c0a788d607fb97570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 12:18:59 +0200 Subject: [PATCH 25/62] Attempt to increase test stability --- .github/workflows/performance_score_director.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 81fe0282..1b188fd5 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -68,7 +68,8 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "score_director_type=cs" > scoredirector-benchmark.properties + echo "warmup_iterations=10" > scoredirector-benchmark.properties + echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties chmod +x run-scoredirector.sh From 82e447cb5a57d454696f00d20ef0fff9cb68b2eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 12:48:22 +0200 Subject: [PATCH 26/62] Try again --- .../workflows/performance_score_director.yml | 18 ++++++++++++++++-- .../benchmarks/micro/common/AbstractMain.java | 4 ++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 1b188fd5..f004e6ea 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -34,6 +34,7 @@ jobs: test: runs-on: ubuntu-latest strategy: + fail-fast: false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue. matrix: example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing] env: @@ -68,7 +69,7 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "warmup_iterations=10" > scoredirector-benchmark.properties + echo "forks=15" > scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties @@ -123,7 +124,20 @@ jobs: export RUN_ID="${{github.workflow.inputs.branch}}" ./run-scoredirector.sh - - name: Phase 3 - Archive production artifacts + - name: Phase 3 - Publish benchmark result + uses: benchmark-action/github-action-benchmark@v1 + with: + name: scoredirector-${{ matrix.example }} + tool: 'jmh' + output-file-path: ./timefold-solver-benchmarks/results/results.json + github-token: ${{ secrets.JRELEASER_GITHUB_TOKEN }} + auto-push: true + alert-threshold: '105%' + comment-on-alert: true + fail-on-alert: true + alert-comment-cc-users: '@triceo' + + - name: Phase 3 - Archive benchmark data uses: actions/upload-artifact@v4 with: name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java index da29ec68..099bc7c2 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java @@ -179,8 +179,8 @@ public ChainedOptionsBuilder getBaseJmhConfig(C configuration) { .warmupIterations(configuration.getWarmupIterations()) .measurementIterations(configuration.getMeasurementIterations()) .jvmArgs("-XX:+UseParallelGC", "-Xms1g", "-Xmx1g") // Minimize GC overhead. - .result(resultsDirectory.resolve("benchmarkResults.csv").toAbsolutePath().toString()) - .resultFormat(ResultFormatType.CSV) + .result(resultsDirectory.resolve("results.json").toAbsolutePath().toString()) + .resultFormat(ResultFormatType.JSON) .shouldDoGC(true); } From ced00edc4a39d8640e05f1b4b6747e944cf9b9ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 13:48:58 +0200 Subject: [PATCH 27/62] Give it more memory to see if GC overhead is behind the instability --- .../workflows/performance_score_director.yml | 20 +++++++++---------- .../benchmarks/micro/common/AbstractMain.java | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index f004e6ea..699feb1e 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -124,22 +124,22 @@ jobs: export RUN_ID="${{github.workflow.inputs.branch}}" ./run-scoredirector.sh + - name: Phase 3 - Archive benchmark data + uses: actions/upload-artifact@v4 + with: + name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} + path: | + ./timefold-solver-benchmarks/results + - name: Phase 3 - Publish benchmark result uses: benchmark-action/github-action-benchmark@v1 with: name: scoredirector-${{ matrix.example }} tool: 'jmh' - output-file-path: ./timefold-solver-benchmarks/results/results.json - github-token: ${{ secrets.JRELEASER_GITHUB_TOKEN }} + output-file-path: ./timefold-solver-benchmarks/results/${{ github.event.inputs.branch }}/results.json + github-token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} auto-push: true alert-threshold: '105%' comment-on-alert: true fail-on-alert: true - alert-comment-cc-users: '@triceo' - - - name: Phase 3 - Archive benchmark data - uses: actions/upload-artifact@v4 - with: - name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} - path: | - ./timefold-solver-benchmarks/results + alert-comment-cc-users: '@triceo' \ No newline at end of file diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java index 099bc7c2..a4d1ccc4 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java @@ -178,7 +178,7 @@ public ChainedOptionsBuilder getBaseJmhConfig(C configuration) { .forks(configuration.getForkCount()) .warmupIterations(configuration.getWarmupIterations()) .measurementIterations(configuration.getMeasurementIterations()) - .jvmArgs("-XX:+UseParallelGC", "-Xms1g", "-Xmx1g") // Minimize GC overhead. + .jvmArgs("-XX:+UseParallelGC", "-Xmx2g") // Minimize GC overhead. .result(resultsDirectory.resolve("results.json").toAbsolutePath().toString()) .resultFormat(ResultFormatType.JSON) .shouldDoGC(true); From 4e94d9021e4a9feac84f90d20d78aca2dea44902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Mon, 30 Sep 2024 21:03:07 +0200 Subject: [PATCH 28/62] More forks --- .github/workflows/performance_score_director.yml | 8 +++++--- .../pas/persistence/PatientAdmissionScheduleImporter.java | 7 ++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 699feb1e..fa0e3888 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -69,7 +69,7 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "forks=15" > scoredirector-benchmark.properties + echo "forks=20" > scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties @@ -82,9 +82,10 @@ jobs: - name: Phase 1 - Run the baseline configuration working-directory: ./timefold-solver-benchmarks + env: + RUN_ID: ${{ github.event.inputs.baseline }} shell: bash run: | - export RUN_ID="${{github.workflow.inputs.baseline}}" ./run-scoredirector.sh - name: Phase 2 - Checkout timefold-solver @@ -119,9 +120,10 @@ jobs: - name: Phase 2 - Run the new configuration working-directory: ./timefold-solver-benchmarks + env: + RUN_ID: ${{ github.event.inputs.baseline }} shell: bash run: | - export RUN_ID="${{github.workflow.inputs.branch}}" ./run-scoredirector.sh - name: Phase 3 - Archive benchmark data diff --git a/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java b/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java index 07b8423b..ac394a52 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java +++ b/src/main/java/ai/timefold/solver/benchmarks/examples/pas/persistence/PatientAdmissionScheduleImporter.java @@ -147,9 +147,7 @@ private void readDepartmentListAndDepartmentSpecialismList() throws IOException List departmentList = new ArrayList<>(departmentListSize); idToDepartmentMap = new HashMap<>(departmentListSize); - List departmentSpecialismList = - new ArrayList<>( - departmentListSize * 5); + List departmentSpecialismList = new ArrayList<>(departmentListSize * 5); long departmentSpecialismId = 0L; for (int i = 0; i < departmentListSize; i++) { String line = bufferedReader.readLine(); @@ -229,8 +227,7 @@ private void readRoomListAndRoomSpecialismListAndRoomEquipmentList() throws IOEx String line = bufferedReader.readLine(); String[] lineTokens = splitByPipelineAndTrim(line, 6); String[] roomTokens = splitBySpace(lineTokens[0], 2); - Department department = idToDepartmentMap.get( - Long.parseLong(lineTokens[2])); + Department department = idToDepartmentMap.get(Long.parseLong(lineTokens[2])); Room room = new Room(Long.parseLong(roomTokens[0]), roomTokens[1], department, Integer.parseInt(lineTokens[1]), From 83e8af23719dca749ff5cfb63978a7371a4f9633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 08:14:26 +0200 Subject: [PATCH 29/62] Use default GC to attempt to stabilize --- .../ai/timefold/solver/benchmarks/micro/common/AbstractMain.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java index a4d1ccc4..0887c253 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java @@ -178,7 +178,6 @@ public ChainedOptionsBuilder getBaseJmhConfig(C configuration) { .forks(configuration.getForkCount()) .warmupIterations(configuration.getWarmupIterations()) .measurementIterations(configuration.getMeasurementIterations()) - .jvmArgs("-XX:+UseParallelGC", "-Xmx2g") // Minimize GC overhead. .result(resultsDirectory.resolve("results.json").toAbsolutePath().toString()) .resultFormat(ResultFormatType.JSON) .shouldDoGC(true); From 82c00373509405c5744ac29ae70d2d2bac944231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 08:17:07 +0200 Subject: [PATCH 30/62] Respect run ID --- .../solver/benchmarks/micro/common/AbstractMain.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java index 0887c253..9a7ebe4d 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java @@ -38,6 +38,7 @@ import java.time.Instant; import java.time.ZoneId; import java.util.Arrays; +import java.util.Objects; import java.util.Optional; import java.util.Properties; @@ -54,14 +55,13 @@ public abstract class AbstractMain { protected final Logger LOGGER = LoggerFactory.getLogger(getClass()); private final String subpackage; - private final String runId; private final Path resultsDirectory; protected AbstractMain(String subpackage) { this.subpackage = subpackage; - var runId = System.getenv("RUN_ID"); - this.runId = runId == null || runId.isBlank() ? getTimestamp() : runId.strip(); - this.resultsDirectory = Path.of("results", subpackage, getTimestamp()); + var runId = Objects.requireNonNullElse(System.getenv("RUN_ID"), getTimestamp()) + .strip(); + this.resultsDirectory = Path.of("results", subpackage, runId); resultsDirectory.toFile().mkdirs(); } From 9d169469a7fefcf26b7b17e5e7931cfd1dd75a42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 09:56:45 +0200 Subject: [PATCH 31/62] Use ZGC to attempt to stabilize --- .github/workflows/performance_score_director.yml | 2 +- .../timefold/solver/benchmarks/micro/common/AbstractMain.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index fa0e3888..a0002dae 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -121,7 +121,7 @@ jobs: - name: Phase 2 - Run the new configuration working-directory: ./timefold-solver-benchmarks env: - RUN_ID: ${{ github.event.inputs.baseline }} + RUN_ID: ${{ github.event.inputs.branch }} shell: bash run: | ./run-scoredirector.sh diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java index 9a7ebe4d..6279b3d4 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java @@ -178,6 +178,7 @@ public ChainedOptionsBuilder getBaseJmhConfig(C configuration) { .forks(configuration.getForkCount()) .warmupIterations(configuration.getWarmupIterations()) .measurementIterations(configuration.getMeasurementIterations()) + .jvmArgs("-XX:+UseZGC") // Minimize GC pause times. .result(resultsDirectory.resolve("results.json").toAbsolutePath().toString()) .resultFormat(ResultFormatType.JSON) .shouldDoGC(true); From 1e6bcf56825520582bd90ebe0c631e124a5b7065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 10:36:48 +0200 Subject: [PATCH 32/62] Increase warmup --- .github/workflows/performance_score_director.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index a0002dae..a8359c83 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -70,6 +70,7 @@ jobs: shell: bash run: | echo "forks=20" > scoredirector-benchmark.properties + echo "warmup_iterations=10" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties From 9cf2380da8fb0b29e2094849032b6ab5b9625485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 10:39:45 +0200 Subject: [PATCH 33/62] Increase iterations, decrease forks --- .github/workflows/performance_score_director.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index a8359c83..2cb3251e 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -69,8 +69,8 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "forks=20" > scoredirector-benchmark.properties - echo "warmup_iterations=10" >> scoredirector-benchmark.properties + echo "warmup_iterations=5" > scoredirector-benchmark.properties + echo "measurement_iterations=10" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties From 0660578505281a7e6e503390b55bfafc697efc1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 11:30:55 +0200 Subject: [PATCH 34/62] Finishing touches? --- .../workflows/performance_score_director.yml | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 2cb3251e..2a03ed89 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -65,18 +65,20 @@ jobs: tar -xzf $FILENAME ls -l - - name: Phase 0 - Prepare the benchmarks + # Fine-tuned for stability on GHA. + - name: Phase 0 - Configure the benchmark working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "warmup_iterations=5" > scoredirector-benchmark.properties + echo "forks=15" > scoredirector-benchmark.properties + echo "warmup_iterations=5" >> scoredirector-benchmark.properties echo "measurement_iterations=10" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties chmod +x run-scoredirector.sh - - name: Phase 1 - Compile the benchmarks + - name: Phase 1 - Compile the benchmark working-directory: ./timefold-solver-benchmarks shell: bash run: mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" @@ -101,12 +103,23 @@ jobs: shell: bash run: mvn -B -Dquickly clean install - - name: Phase 2 - Checkout timefold-solver-enterprise + # Clone timefold-solver-enterprise + - name: Phase 2 - Checkout timefold-solver-enterprise (PR) # Checkout the PR branch first, if it exists + id: checkout-solver-enterprise uses: actions/checkout@v4 + continue-on-error: true with: repository: TimefoldAI/timefold-solver-enterprise ref: ${{ github.event.inputs.branch }} - token: ${{ secrets.JRELEASER_GITHUB_TOKEN }} # Safe; only used to clone the repo and not stored in the fork. + token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} + path: ./timefold-solver-enterprise + - name: Phase 2 - Checkout timefold-solver-enterprise (main) # Checkout the main branch if the PR branch does not exist + if: steps.checkout-solver-enterprise.outcome != 'success' + uses: actions/checkout@v4 + with: + repository: TimefoldAI/timefold-solver-enterprise + ref: main + token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} path: ./timefold-solver-enterprise - name: Phase 2 - Quickly build timefold-solver-enterprise @@ -119,7 +132,7 @@ jobs: shell: bash run: mvn clean install -B -Dquickly -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" - - name: Phase 2 - Run the new configuration + - name: Phase 2 - Run the benchmark on the new code working-directory: ./timefold-solver-benchmarks env: RUN_ID: ${{ github.event.inputs.branch }} @@ -142,7 +155,3 @@ jobs: output-file-path: ./timefold-solver-benchmarks/results/${{ github.event.inputs.branch }}/results.json github-token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} auto-push: true - alert-threshold: '105%' - comment-on-alert: true - fail-on-alert: true - alert-comment-cc-users: '@triceo' \ No newline at end of file From 3ec0259bdea41d874266ddc838536fcfb05c5fdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 11:48:07 +0200 Subject: [PATCH 35/62] Add Xmx --- .../timefold/solver/benchmarks/micro/common/AbstractMain.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java index 6279b3d4..8a6b4be6 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java @@ -178,7 +178,7 @@ public ChainedOptionsBuilder getBaseJmhConfig(C configuration) { .forks(configuration.getForkCount()) .warmupIterations(configuration.getWarmupIterations()) .measurementIterations(configuration.getMeasurementIterations()) - .jvmArgs("-XX:+UseZGC") // Minimize GC pause times. + .jvmArgs("-XX:+UseZGC", "-Xmx4g") // Stable, predictable GC pause times. .result(resultsDirectory.resolve("results.json").toAbsolutePath().toString()) .resultFormat(ResultFormatType.JSON) .shouldDoGC(true); From 4af9d53790bf65160762b2f9f2a185bb658c18c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 11:57:01 +0200 Subject: [PATCH 36/62] Another shot --- .github/workflows/performance_score_director.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 2a03ed89..415ec20a 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -145,13 +145,13 @@ jobs: with: name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} path: | - ./timefold-solver-benchmarks/results + ./timefold-solver-benchmarks/results/scoredirector* - name: Phase 3 - Publish benchmark result uses: benchmark-action/github-action-benchmark@v1 with: name: scoredirector-${{ matrix.example }} tool: 'jmh' - output-file-path: ./timefold-solver-benchmarks/results/${{ github.event.inputs.branch }}/results.json + output-file-path: ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json github-token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} auto-push: true From 52f887b79fd20350910d4281fb98d2df33656705 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 18:47:23 +0200 Subject: [PATCH 37/62] More stabilization --- .github/workflows/performance_score_director.yml | 4 ++-- .../timefold/solver/benchmarks/micro/common/AbstractMain.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 415ec20a..dda0ba17 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -70,9 +70,9 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "forks=15" > scoredirector-benchmark.properties + echo "forks=10" > scoredirector-benchmark.properties echo "warmup_iterations=5" >> scoredirector-benchmark.properties - echo "measurement_iterations=10" >> scoredirector-benchmark.properties + echo "measurement_iterations=15" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java index 8a6b4be6..3a04c0dd 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/common/AbstractMain.java @@ -178,7 +178,7 @@ public ChainedOptionsBuilder getBaseJmhConfig(C configuration) { .forks(configuration.getForkCount()) .warmupIterations(configuration.getWarmupIterations()) .measurementIterations(configuration.getMeasurementIterations()) - .jvmArgs("-XX:+UseZGC", "-Xmx4g") // Stable, predictable GC pause times. + .jvmArgs("-XX:+UseParallelGC", "-Xmx2g") // Stable, predictable GC pause times. .result(resultsDirectory.resolve("results.json").toAbsolutePath().toString()) .resultFormat(ResultFormatType.JSON) .shouldDoGC(true); From 46616f91f3260b9f5905fa78c40ed4280da71332 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 18:48:06 +0200 Subject: [PATCH 38/62] Remove the benchmark publishing --- .github/workflows/performance_score_director.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index dda0ba17..5d1c71ba 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -145,13 +145,4 @@ jobs: with: name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} path: | - ./timefold-solver-benchmarks/results/scoredirector* - - - name: Phase 3 - Publish benchmark result - uses: benchmark-action/github-action-benchmark@v1 - with: - name: scoredirector-${{ matrix.example }} - tool: 'jmh' - output-file-path: ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json - github-token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} - auto-push: true + ./timefold-solver-benchmarks/results/scoredirector \ No newline at end of file From 638035894010175de13ecfea7e4096eaed1e18d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 19:27:41 +0200 Subject: [PATCH 39/62] Allow more threads --- .github/workflows/performance_score_director.yml | 4 ++-- run-coldstart.sh | 2 +- run-scoredirector.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 5d1c71ba..4945de6a 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -71,8 +71,8 @@ jobs: shell: bash run: | echo "forks=10" > scoredirector-benchmark.properties - echo "warmup_iterations=5" >> scoredirector-benchmark.properties - echo "measurement_iterations=15" >> scoredirector-benchmark.properties + echo "warmup_iterations=10" >> scoredirector-benchmark.properties + echo "measurement_iterations=10" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties diff --git a/run-coldstart.sh b/run-coldstart.sh index 24339d18..ebeb6fc5 100755 --- a/run-coldstart.sh +++ b/run-coldstart.sh @@ -1,4 +1,4 @@ #!/bin/bash sudo -i sysctl kernel.perf_event_paranoid=1 sudo -i sysctl kernel.kptr_restrict=0 -taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.coldstart.Main \ No newline at end of file +java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.coldstart.Main \ No newline at end of file diff --git a/run-scoredirector.sh b/run-scoredirector.sh index 8aa39bbd..49c71f27 100755 --- a/run-scoredirector.sh +++ b/run-scoredirector.sh @@ -1,4 +1,4 @@ #!/bin/bash sudo -i sysctl kernel.perf_event_paranoid=1 sudo -i sysctl kernel.kptr_restrict=0 -taskset -c 0 java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.scoredirector.Main \ No newline at end of file +java -cp target/benchmarks.jar ai.timefold.solver.benchmarks.micro.scoredirector.Main \ No newline at end of file From cfcd0e22624eb492e8ec0d8015c02c721ad78ea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 20:11:26 +0200 Subject: [PATCH 40/62] Last attempt --- .github/workflows/performance_score_director.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 4945de6a..7f5f0b8a 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -70,9 +70,9 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "forks=10" > scoredirector-benchmark.properties - echo "warmup_iterations=10" >> scoredirector-benchmark.properties - echo "measurement_iterations=10" >> scoredirector-benchmark.properties + echo "forks=15" > scoredirector-benchmark.properties + echo "warmup_iterations=5 >> scoredirector-benchmark.properties + echo "measurement_iterations=15" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties From 0f58f44f2657129678ab12594426f1374fba1dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 20:13:35 +0200 Subject: [PATCH 41/62] Fix --- .github/workflows/performance_score_director.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 7f5f0b8a..28a97d63 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -71,7 +71,7 @@ jobs: shell: bash run: | echo "forks=15" > scoredirector-benchmark.properties - echo "warmup_iterations=5 >> scoredirector-benchmark.properties + echo "warmup_iterations=5" >> scoredirector-benchmark.properties echo "measurement_iterations=15" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties From a8024f622cd2f485af766f25079c679e93d020fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Tue, 1 Oct 2024 21:12:52 +0200 Subject: [PATCH 42/62] Update the threshold to 2.5 % --- .github/workflows/performance_score_director.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 28a97d63..3bae1d2c 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -73,6 +73,7 @@ jobs: echo "forks=15" > scoredirector-benchmark.properties echo "warmup_iterations=5" >> scoredirector-benchmark.properties echo "measurement_iterations=15" >> scoredirector-benchmark.properties + echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties From 7b388df59c95876a2fea2fb704897c7b600e685a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 10:50:49 +0200 Subject: [PATCH 43/62] Outputs --- .../workflows/performance_score_director.yml | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 3bae1d2c..e190b246 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -73,7 +73,7 @@ jobs: echo "forks=15" > scoredirector-benchmark.properties echo "warmup_iterations=5" >> scoredirector-benchmark.properties echo "measurement_iterations=15" >> scoredirector-benchmark.properties - echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties + echo "relative_score_error_threshold=0.025C" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties @@ -86,12 +86,15 @@ jobs: - name: Phase 1 - Run the baseline configuration working-directory: ./timefold-solver-benchmarks + id: benchmark_baseline env: RUN_ID: ${{ github.event.inputs.baseline }} shell: bash run: | ./run-scoredirector.sh - + echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + - name: Phase 2 - Checkout timefold-solver uses: actions/checkout@v4 with: @@ -134,16 +137,33 @@ jobs: run: mvn clean install -B -Dquickly -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" - name: Phase 2 - Run the benchmark on the new code + id: benchmark_new working-directory: ./timefold-solver-benchmarks env: RUN_ID: ${{ github.event.inputs.branch }} shell: bash run: | ./run-scoredirector.sh + echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" - name: Phase 3 - Archive benchmark data uses: actions/upload-artifact@v4 with: name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} path: | - ./timefold-solver-benchmarks/results/scoredirector \ No newline at end of file + ./timefold-solver-benchmarks/results/scoredirector + + - name: Compare baseline with the branch + working-directory: ./timefold-solver-benchmarks + env: + OLD_RANGE_START: ${{ steps.benchmark_baseline.outputs.RANGE_START }} + OLD_RANGE_END: ${{ steps.benchmark_baseline.outputs.RANGE_END }} + NEW_RANGE_START: ${{ steps.benchmark_new.outputs.RANGE_START }} + NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }} + shell: bash + run: | + echo "OLD_RANGE_START=$OLD_RANGE_START" + echo "OLD_RANGE_END=$OLD_RANGE_END" + echo "NEW_RANGE_START=$NEW_RANGE_START" + echo "NEW_RANGE_END=$NEW_RANGE_END" \ No newline at end of file From 26e75f81f60277900c74f4464f81f43238311b0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 11:01:01 +0200 Subject: [PATCH 44/62] Fix --- .../workflows/performance_score_director.yml | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index e190b246..aefaea60 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -73,7 +73,7 @@ jobs: echo "forks=15" > scoredirector-benchmark.properties echo "warmup_iterations=5" >> scoredirector-benchmark.properties echo "measurement_iterations=15" >> scoredirector-benchmark.properties - echo "relative_score_error_threshold=0.025C" >> scoredirector-benchmark.properties + echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties cat scoredirector-benchmark.properties @@ -92,6 +92,7 @@ jobs: shell: bash run: | ./run-scoredirector.sh + # The benchmark gives the 99.9 % confidence interval. echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" @@ -144,6 +145,7 @@ jobs: shell: bash run: | ./run-scoredirector.sh + # The benchmark gives the 99.9 % confidence interval. echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" @@ -163,7 +165,16 @@ jobs: NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }} shell: bash run: | - echo "OLD_RANGE_START=$OLD_RANGE_START" - echo "OLD_RANGE_END=$OLD_RANGE_END" - echo "NEW_RANGE_START=$NEW_RANGE_START" - echo "NEW_RANGE_END=$NEW_RANGE_END" \ No newline at end of file + echo "Baseline result with 99.9 % confidence: " + echo " [$OLD_RANGE_START, $OLD_RANGE_END]" + echo " New result with 99.9 % confidence: " + echo " [$NEW_RANGE_START, $NEW_RANGE_END]" + echo "" + if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then + echo "Result is not statistically significant." + elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then + echo "Statistically significant improvement." + else + echo "Statistically significant regression." + exit 1 + fi \ No newline at end of file From 63991d250cbfe2048aaa79803f3d682518d1fec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 11:20:03 +0200 Subject: [PATCH 45/62] Formatting --- .../workflows/performance_score_director.yml | 61 ++++++++++++------- .../benchmarks/micro/coldstart/Main.java | 14 +---- 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index aefaea60..179f95d1 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -1,8 +1,11 @@ # - Runs entirely on a single machine. # - The baseline is established first, then the branch under test is measured. -# - Both runs fail if the benchmark error is over predefined thresholds. -# - Then, if both are below thresholds and neither failed, those results must be directly comparable. -# - Therefore, if the difference between the two is over the threshold, then the branch is considered to have regressed. +# - Each benchmark gives a 99.9 % confidence interval. +# - The confidence intervals are compared to determine if the branch under test is a regression or an improvement. +# - The error threshold is expected to be below +/- 2.5 %, +# but sometimes it gets higher due to the nature of public GitHub runners. +# We have yet to see an error of over +/- 4 %. +# With the error so high, the impact is that small regressions are not considered statistically significant. name: Performance Regression Test - Score Director on: @@ -31,7 +34,7 @@ on: jobs: - test: + benchmark: runs-on: ubuntu-latest strategy: fail-fast: false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue. @@ -70,9 +73,9 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "forks=15" > scoredirector-benchmark.properties - echo "warmup_iterations=5" >> scoredirector-benchmark.properties - echo "measurement_iterations=15" >> scoredirector-benchmark.properties + echo "forks=1" > scoredirector-benchmark.properties + echo "warmup_iterations=1" >> scoredirector-benchmark.properties + echo "measurement_iterations=1" >> scoredirector-benchmark.properties echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties @@ -92,9 +95,8 @@ jobs: shell: bash run: | ./run-scoredirector.sh - # The benchmark gives the 99.9 % confidence interval. - echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" - echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" - name: Phase 2 - Checkout timefold-solver uses: actions/checkout@v4 @@ -145,18 +147,18 @@ jobs: shell: bash run: | ./run-scoredirector.sh - # The benchmark gives the 99.9 % confidence interval. - echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" - echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' ./timefold-solver-benchmarks/results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" - name: Phase 3 - Archive benchmark data uses: actions/upload-artifact@v4 with: name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} path: | + ./timefold-solver-benchmarks/scoredirector-benchmark.properties ./timefold-solver-benchmarks/results/scoredirector - - name: Compare baseline with the branch + - name: Phase 3 - Report results working-directory: ./timefold-solver-benchmarks env: OLD_RANGE_START: ${{ steps.benchmark_baseline.outputs.RANGE_START }} @@ -165,16 +167,29 @@ jobs: NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }} shell: bash run: | - echo "Baseline result with 99.9 % confidence: " - echo " [$OLD_RANGE_START, $OLD_RANGE_END]" - echo " New result with 99.9 % confidence: " - echo " [$NEW_RANGE_START, $NEW_RANGE_END]" - echo "" + export FAIL = false if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then - echo "Result is not statistically significant." + export OLD_MEAN=$(((OLD_RANGE_END - OLD_RANGE_START)/2)+OLD_RANGE_START) + export NEW_MEAN=$(((NEW_RANGE_END - NEW_RANGE_START)/2)+NEW_RANGE_START) + if [ "$NEW_RANGE_START" -ge "$OLD_MEAN" ]; then + echo "### Possible improvement ⁉️" >> $GITHUB_STEP_SUMMARY + elif [ "$OLD_RANGE_END" -le "$NEW_MEAN" ]; then + echo "### Possible regression ⁉️" >> $GITHUB_STEP_SUMMARY + else + echo "### Statistically insignificant result ⁉️" >> $GITHUB_STEP_SUMMARY + fi elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then - echo "Statistically significant improvement." + echo "### Statistically significant improvement 🚀" >> $GITHUB_STEP_SUMMARY else - echo "Statistically significant regression." - exit 1 + echo "### Statistically significant regression 🛑" >> $GITHUB_STEP_SUMMARY + export FAIL = true + fi + + echo "| | **Ref** | **Min** | **Max** |" >> $GITHUB_STEP_SUMMARY + echo "|:-----:|:-----------:|:-----------:|:-----------:|" >> $GITHUB_STEP_SUMMARY + echo "| _Old_ | `${{ github.event.inputs.baseline }}` | ${OLD_RANGE_START%.*} | ${OLD_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY + echo "| _New_ | `${{ github.event.inputs.branch }}` | ${NEW_RANGE_START%.*} | ${NEW_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY + + if [ "$FAIL" = true ]; then + exit 1 fi \ No newline at end of file diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java b/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java index 8b2a3515..63db61c5 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/coldstart/Main.java @@ -33,13 +33,11 @@ import java.io.IOException; import java.io.InputStream; -import java.util.concurrent.atomic.AtomicBoolean; import ai.timefold.solver.benchmarks.micro.coldstart.jmh.TimeToFirstScoreBenchmark; import ai.timefold.solver.benchmarks.micro.coldstart.jmh.TimeToSolverFactoryBenchmark; import ai.timefold.solver.benchmarks.micro.common.AbstractMain; -import org.openjdk.jmh.results.Result; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.ChainedOptionsBuilder; @@ -76,9 +74,8 @@ public static void main(String[] args) throws RunnerException, IOException { var relativeScoreErrorThreshold = configuration.getRelativeScoreErrorThreshold(); var thresholdForPrint = ((int) Math.round(relativeScoreErrorThreshold * 10_000)) / 100.0D; - var wasSuccess = new AtomicBoolean(true); runResults.forEach(result -> { - Result primaryResult = result.getPrimaryResult(); + var primaryResult = result.getPrimaryResult(); var score = primaryResult.getScore(); var scoreError = primaryResult.getScoreError(); var relativeScoreError = scoreError / score; @@ -87,18 +84,13 @@ public static void main(String[] args) throws RunnerException, IOException { var benchmarkName = benchParams.getBenchmark() + " " + benchParams.getParam("example"); var relativeScoreErrorForPrint = ((int) Math.round(relativeScoreError * 10_000)) / 100.0D; if (relativeScoreError > relativeScoreErrorThreshold) { - LOGGER.error("Score error for '{}' is too high: ± {} % (threshold: ± {} %).", benchmarkName, + LOGGER.warn("Score error for '{}' is too high: ± {} % (threshold: ± {} %).", benchmarkName, relativeScoreErrorForPrint, thresholdForPrint); - wasSuccess.set(false); - } else if (relativeScoreError > (relativeScoreErrorThreshold * 0.9)) { - LOGGER.warn("Score error for '{}' approaching threshold: ± {} % (threshold: ± {} %).", benchmarkName, + LOGGER.info("Score error for '{}' approaching threshold: ± {} % (threshold: ± {} %).", benchmarkName, relativeScoreErrorForPrint, thresholdForPrint); } }); - if (wasSuccess.get()) { - System.exit(1); - } } private static ChainedOptionsBuilder processBenchmark(ChainedOptionsBuilder options, Configuration configuration) { From 08836cbad5a9696b13d7d73235e0f848947e4861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 13:13:40 +0200 Subject: [PATCH 46/62] Fix --- .github/workflows/performance_score_director.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 179f95d1..a531423c 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -167,7 +167,7 @@ jobs: NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }} shell: bash run: | - export FAIL = false + export FAIL=false if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then export OLD_MEAN=$(((OLD_RANGE_END - OLD_RANGE_START)/2)+OLD_RANGE_START) export NEW_MEAN=$(((NEW_RANGE_END - NEW_RANGE_START)/2)+NEW_RANGE_START) @@ -182,7 +182,7 @@ jobs: echo "### Statistically significant improvement 🚀" >> $GITHUB_STEP_SUMMARY else echo "### Statistically significant regression 🛑" >> $GITHUB_STEP_SUMMARY - export FAIL = true + export FAIL=true fi echo "| | **Ref** | **Min** | **Max** |" >> $GITHUB_STEP_SUMMARY From 7c62a1970ff092259a52c6f93b844a2809fce795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 13:16:52 +0200 Subject: [PATCH 47/62] Do ranges --- .github/workflows/performance_score_director.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index a531423c..c9126854 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -73,7 +73,7 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "forks=1" > scoredirector-benchmark.properties + echo "forks=2" > scoredirector-benchmark.properties echo "warmup_iterations=1" >> scoredirector-benchmark.properties echo "measurement_iterations=1" >> scoredirector-benchmark.properties echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties From 5f950d214ce02919078b2087fa5d8ac310884f25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 13:23:17 +0200 Subject: [PATCH 48/62] Do ranges actually --- .github/workflows/performance_score_director.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index c9126854..6bb9a09a 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -75,7 +75,7 @@ jobs: run: | echo "forks=2" > scoredirector-benchmark.properties echo "warmup_iterations=1" >> scoredirector-benchmark.properties - echo "measurement_iterations=1" >> scoredirector-benchmark.properties + echo "measurement_iterations=2" >> scoredirector-benchmark.properties echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties @@ -155,7 +155,6 @@ jobs: with: name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} path: | - ./timefold-solver-benchmarks/scoredirector-benchmark.properties ./timefold-solver-benchmarks/results/scoredirector - name: Phase 3 - Report results @@ -187,8 +186,8 @@ jobs: echo "| | **Ref** | **Min** | **Max** |" >> $GITHUB_STEP_SUMMARY echo "|:-----:|:-----------:|:-----------:|:-----------:|" >> $GITHUB_STEP_SUMMARY - echo "| _Old_ | `${{ github.event.inputs.baseline }}` | ${OLD_RANGE_START%.*} | ${OLD_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY - echo "| _New_ | `${{ github.event.inputs.branch }}` | ${NEW_RANGE_START%.*} | ${NEW_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY + echo "| _Old_ | ${{ github.event.inputs.baseline }} | ${OLD_RANGE_START%.*} | ${OLD_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY + echo "| _New_ | ${{ github.event.inputs.branch }} | ${NEW_RANGE_START%.*} | ${NEW_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY if [ "$FAIL" = true ]; then exit 1 From e48055aa95628e24614dd211defa267bd410f988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 13:35:47 +0200 Subject: [PATCH 49/62] Don't fail prematurely --- .../solver/benchmarks/micro/scoredirector/Main.java | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java b/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java index 5afad126..55b39dbc 100644 --- a/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java +++ b/src/main/java/ai/timefold/solver/benchmarks/micro/scoredirector/Main.java @@ -33,11 +33,9 @@ import java.io.IOException; import java.io.InputStream; -import java.util.concurrent.atomic.AtomicBoolean; import ai.timefold.solver.benchmarks.micro.common.AbstractMain; -import org.openjdk.jmh.results.Result; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.ChainedOptionsBuilder; @@ -78,7 +76,7 @@ public static void main(String[] args) throws RunnerException, IOException { var relativeScoreErrorThreshold = configuration.getRelativeScoreErrorThreshold(); var thresholdForPrint = ((int) Math.round(relativeScoreErrorThreshold * 10_000)) / 100.0D; runResults.forEach(result -> { - Result primaryResult = result.getPrimaryResult(); + var primaryResult = result.getPrimaryResult(); var score = primaryResult.getScore(); var scoreError = primaryResult.getScoreError(); var relativeScoreError = scoreError / score; @@ -86,18 +84,13 @@ public static void main(String[] args) throws RunnerException, IOException { var benchParams = result.getParams(); var benchmarkName = benchParams.getBenchmark() + " " + benchParams.getParam("csExample"); var relativeScoreErrorForPrint = ((int) Math.round(relativeScoreError * 10_000)) / 100.0D; - var wasSuccess = new AtomicBoolean(true); if (relativeScoreError > relativeScoreErrorThreshold) { - LOGGER.error("Score error for '{}' is too high: ± {} % (threshold: ± {} %).", benchmarkName, + LOGGER.warn("Score error for '{}' is too high: ± {} % (threshold: ± {} %).", benchmarkName, relativeScoreErrorForPrint, thresholdForPrint); - wasSuccess.set(false); } else if (relativeScoreError > (relativeScoreErrorThreshold * 0.9)) { - LOGGER.warn("Score error for '{}' approaching threshold: ± {} % (threshold: ± {} %).", benchmarkName, + LOGGER.info("Score error for '{}' approaching threshold: ± {} % (threshold: ± {} %).", benchmarkName, relativeScoreErrorForPrint, thresholdForPrint); } - if (!wasSuccess.get()) { - System.exit(1); - } }); } From 38db78b0127af91fc061e2841035dc9928f2e7a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 13:48:31 +0200 Subject: [PATCH 50/62] Convert to integer --- .github/workflows/performance_score_director.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 6bb9a09a..a6eba637 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -95,8 +95,8 @@ jobs: shell: bash run: | ./run-scoredirector.sh - echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" - echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" - name: Phase 2 - Checkout timefold-solver uses: actions/checkout@v4 @@ -147,8 +147,8 @@ jobs: shell: bash run: | ./run-scoredirector.sh - echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" - echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" - name: Phase 3 - Archive benchmark data uses: actions/upload-artifact@v4 @@ -186,8 +186,8 @@ jobs: echo "| | **Ref** | **Min** | **Max** |" >> $GITHUB_STEP_SUMMARY echo "|:-----:|:-----------:|:-----------:|:-----------:|" >> $GITHUB_STEP_SUMMARY - echo "| _Old_ | ${{ github.event.inputs.baseline }} | ${OLD_RANGE_START%.*} | ${OLD_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY - echo "| _New_ | ${{ github.event.inputs.branch }} | ${NEW_RANGE_START%.*} | ${NEW_RANGE_END%.*} |" >> $GITHUB_STEP_SUMMARY + echo "| _Old_ | ${{ github.event.inputs.baseline }} | ${OLD_RANGE_START} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + echo "| _New_ | ${{ github.event.inputs.branch }} | ${NEW_RANGE_START} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY if [ "$FAIL" = true ]; then exit 1 From 16d6f8a7afd1d98fe8d23e6b8f7b13bf5e22e066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 13:55:55 +0200 Subject: [PATCH 51/62] Add back the proper settings --- .github/workflows/performance_score_director.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index a6eba637..49ac10ec 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -73,9 +73,9 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "forks=2" > scoredirector-benchmark.properties - echo "warmup_iterations=1" >> scoredirector-benchmark.properties - echo "measurement_iterations=2" >> scoredirector-benchmark.properties + echo "forks=15" > scoredirector-benchmark.properties + echo "warmup_iterations=5" >> scoredirector-benchmark.properties + echo "measurement_iterations=15" >> scoredirector-benchmark.properties echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties From 2759db9498c9e8cec003dee4a286171fe636b651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 14:48:15 +0200 Subject: [PATCH 52/62] Cleanup --- .../workflows/performance_score_director.yml | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 49ac10ec..d40a9303 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -111,7 +111,7 @@ jobs: run: mvn -B -Dquickly clean install # Clone timefold-solver-enterprise - - name: Phase 2 - Checkout timefold-solver-enterprise (PR) # Checkout the PR branch first, if it exists + - name: Phase 2 - Checkout timefold-solver-enterprise (Specified) id: checkout-solver-enterprise uses: actions/checkout@v4 continue-on-error: true @@ -120,7 +120,7 @@ jobs: ref: ${{ github.event.inputs.branch }} token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} path: ./timefold-solver-enterprise - - name: Phase 2 - Checkout timefold-solver-enterprise (main) # Checkout the main branch if the PR branch does not exist + - name: Phase 2 - Checkout timefold-solver-enterprise (Fallback) if: steps.checkout-solver-enterprise.outcome != 'success' uses: actions/checkout@v4 with: @@ -184,10 +184,18 @@ jobs: export FAIL=true fi - echo "| | **Ref** | **Min** | **Max** |" >> $GITHUB_STEP_SUMMARY - echo "|:-----:|:-----------:|:-----------:|:-----------:|" >> $GITHUB_STEP_SUMMARY - echo "| _Old_ | ${{ github.event.inputs.baseline }} | ${OLD_RANGE_START} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY - echo "| _New_ | ${{ github.event.inputs.branch }} | ${NEW_RANGE_START} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + export DIFF_START=$(echo "scale=2; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc) + export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc) + + echo "| | **Ref** | **Min** | **Max** |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|:-----------:|:-----------------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY + echo "| _Old_ | ${{ github.event.inputs.baseline }} | ${OLD_RANGE_START} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + echo "| _New_ | ${{ github.event.inputs.branch }} | ${NEW_RANGE_START} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + echo "| _Diff_ | | ${DIFF_START} % | ${DIFF_END} % |" >> $GITHUB_STEP_SUMMARY + + echo "Min and max define a 99.9 % confidence interval." >> $GITHUB_STEP_SUMMARY + echo "Min and max are in operations per second. Higher is better." >> $GITHUB_STEP_SUMMARY + echo "Diff over 100 % represents an improvement, under 100 % a regression." >> $GITHUB_STEP_SUMMARY if [ "$FAIL" = true ]; then exit 1 From efb025c49b2d41d0d07d826184b954b6de9d8128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Wed, 2 Oct 2024 15:56:17 +0200 Subject: [PATCH 53/62] Signage --- .github/workflows/performance_score_director.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index d40a9303..1e686c2b 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -171,16 +171,16 @@ jobs: export OLD_MEAN=$(((OLD_RANGE_END - OLD_RANGE_START)/2)+OLD_RANGE_START) export NEW_MEAN=$(((NEW_RANGE_END - NEW_RANGE_START)/2)+NEW_RANGE_START) if [ "$NEW_RANGE_START" -ge "$OLD_MEAN" ]; then - echo "### Possible improvement ⁉️" >> $GITHUB_STEP_SUMMARY + echo "### 🍀Possible improvement 🍀" >> $GITHUB_STEP_SUMMARY elif [ "$OLD_RANGE_END" -le "$NEW_MEAN" ]; then - echo "### Possible regression ⁉️" >> $GITHUB_STEP_SUMMARY + echo "### ⚠️Possible regression ⚠️" >> $GITHUB_STEP_SUMMARY else - echo "### Statistically insignificant result ⁉️" >> $GITHUB_STEP_SUMMARY + echo "### Performance unchanged" >> $GITHUB_STEP_SUMMARY fi elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then - echo "### Statistically significant improvement 🚀" >> $GITHUB_STEP_SUMMARY + echo "### 🚀🚀🚀 Statistically significant improvement 🚀🚀🚀" >> $GITHUB_STEP_SUMMARY else - echo "### Statistically significant regression 🛑" >> $GITHUB_STEP_SUMMARY + echo "### ‼️‼️‼️ Statistically significant regression ‼️‼️‼️" >> $GITHUB_STEP_SUMMARY export FAIL=true fi From 6e72173ed86688a87abc0f32c31c49edc08dad67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Thu, 3 Oct 2024 07:42:24 +0200 Subject: [PATCH 54/62] Finishing touches? --- .../workflows/performance_score_director.yml | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 1e686c2b..1f9bb500 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -73,7 +73,7 @@ jobs: working-directory: ./timefold-solver-benchmarks shell: bash run: | - echo "forks=15" > scoredirector-benchmark.properties + echo "forks=20" > scoredirector-benchmark.properties echo "warmup_iterations=5" >> scoredirector-benchmark.properties echo "measurement_iterations=15" >> scoredirector-benchmark.properties echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties @@ -97,6 +97,7 @@ jobs: ./run-scoredirector.sh echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" - name: Phase 2 - Checkout timefold-solver uses: actions/checkout@v4 @@ -149,6 +150,7 @@ jobs: ./run-scoredirector.sh echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" + echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" - name: Phase 3 - Archive benchmark data uses: actions/upload-artifact@v4 @@ -161,21 +163,21 @@ jobs: working-directory: ./timefold-solver-benchmarks env: OLD_RANGE_START: ${{ steps.benchmark_baseline.outputs.RANGE_START }} + OLD_RANGE_MID: ${{ steps.benchmark_baseline.outputs.RANGE_MID }} OLD_RANGE_END: ${{ steps.benchmark_baseline.outputs.RANGE_END }} NEW_RANGE_START: ${{ steps.benchmark_new.outputs.RANGE_START }} + NEW_RANGE_MID: ${{ steps.benchmark_new.outputs.RANGE_MID }} NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }} shell: bash run: | export FAIL=false if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then - export OLD_MEAN=$(((OLD_RANGE_END - OLD_RANGE_START)/2)+OLD_RANGE_START) - export NEW_MEAN=$(((NEW_RANGE_END - NEW_RANGE_START)/2)+NEW_RANGE_START) - if [ "$NEW_RANGE_START" -ge "$OLD_MEAN" ]; then + if [ "$NEW_RANGE_START" -ge "$OLD_RANGE_MID" ]; then echo "### 🍀Possible improvement 🍀" >> $GITHUB_STEP_SUMMARY - elif [ "$OLD_RANGE_END" -le "$NEW_MEAN" ]; then + elif [ "$OLD_RANGE_END" -le "$NEW_RANGE_MID" ]; then echo "### ⚠️Possible regression ⚠️" >> $GITHUB_STEP_SUMMARY else - echo "### Performance unchanged" >> $GITHUB_STEP_SUMMARY + echo "### Performance unchanged " >> $GITHUB_STEP_SUMMARY fi elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then echo "### 🚀🚀🚀 Statistically significant improvement 🚀🚀🚀" >> $GITHUB_STEP_SUMMARY @@ -185,14 +187,16 @@ jobs: fi export DIFF_START=$(echo "scale=2; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc) + export DIFF_MID=$(echo "scale=2; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc) export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc) - echo "| | **Ref** | **Min** | **Max** |" >> $GITHUB_STEP_SUMMARY - echo "|:------:|:-----------:|:-----------------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY - echo "| _Old_ | ${{ github.event.inputs.baseline }} | ${OLD_RANGE_START} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY - echo "| _New_ | ${{ github.event.inputs.branch }} | ${NEW_RANGE_START} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY - echo "| _Diff_ | | ${DIFF_START} % | ${DIFF_END} % |" >> $GITHUB_STEP_SUMMARY + echo "| | **Ref** | **Min** | **Mean** | **Max** |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|:-----------:|:-----------------:|:-----------------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY + echo "| _Old_ | ${{ github.event.inputs.baseline }} | ${OLD_RANGE_START} | ${OLD_RANGE_MID} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + echo "| _New_ | ${{ github.event.inputs.branch }} | ${NEW_RANGE_START} | ${NEW_RANGE_MID} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + echo "| _Diff_ | | ${DIFF_START} % | ${DIFF_MID} % | ${DIFF_END} % |" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY echo "Min and max define a 99.9 % confidence interval." >> $GITHUB_STEP_SUMMARY echo "Min and max are in operations per second. Higher is better." >> $GITHUB_STEP_SUMMARY echo "Diff over 100 % represents an improvement, under 100 % a regression." >> $GITHUB_STEP_SUMMARY From def184ea968f7eae256f5b3ee540e94931fe8c95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Thu, 3 Oct 2024 08:03:18 +0200 Subject: [PATCH 55/62] Add the trigger job --- .../nightly_performance_score_director.yml | 14 ++++++++++++++ .github/workflows/turtle.yml | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/nightly_performance_score_director.yml diff --git a/.github/workflows/nightly_performance_score_director.yml b/.github/workflows/nightly_performance_score_director.yml new file mode 100644 index 00000000..1344aba4 --- /dev/null +++ b/.github/workflows/nightly_performance_score_director.yml @@ -0,0 +1,14 @@ +name: Schedule a nightly run of the Score Director performance benchmark + +on: + schedule: + - cron: '0 0 * * *' # Every day at the start of the day + +jobs: + trigger: + runs-on: ubuntu-latest + steps: + - name: Schedule the other workflow + shell: bash + run: | + echo '{}' | gh workflow run performance_score_director.yml --json \ No newline at end of file diff --git a/.github/workflows/turtle.yml b/.github/workflows/turtle.yml index b772709a..dd82cbc1 100644 --- a/.github/workflows/turtle.yml +++ b/.github/workflows/turtle.yml @@ -2,7 +2,7 @@ name: Turtle Tests on: schedule: - - cron: '0 2 * * *' # Every day at 2am UTC + - cron: '0 3 * * *' # Every day at 3am UTC jobs: test: From a5540000aae3462d66a76878f4335f16f6cd6aed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Thu, 3 Oct 2024 10:41:18 +0200 Subject: [PATCH 56/62] Finishing touches --- .../workflows/performance_score_director.yml | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 1f9bb500..d1bea83c 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -170,36 +170,42 @@ jobs: NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }} shell: bash run: | - export FAIL=false - if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then - if [ "$NEW_RANGE_START" -ge "$OLD_RANGE_MID" ]; then - echo "### 🍀Possible improvement 🍀" >> $GITHUB_STEP_SUMMARY - elif [ "$OLD_RANGE_END" -le "$NEW_RANGE_MID" ]; then - echo "### ⚠️Possible regression ⚠️" >> $GITHUB_STEP_SUMMARY - else - echo "### Performance unchanged " >> $GITHUB_STEP_SUMMARY - fi - elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then - echo "### 🚀🚀🚀 Statistically significant improvement 🚀🚀🚀" >> $GITHUB_STEP_SUMMARY - else - echo "### ‼️‼️‼️ Statistically significant regression ‼️‼️‼️" >> $GITHUB_STEP_SUMMARY - export FAIL=true - fi - export DIFF_START=$(echo "scale=2; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc) export DIFF_MID=$(echo "scale=2; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc) export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc) + export FAIL=false + + if [ "$DIFF_MID" -gt "98" ] && [ "$DIFF_MID" -lt "102" ]; then + # Ignore differences of up to 2 %. + echo "### Performance unchanged" >> $GITHUB_STEP_SUMMARY + echo "(Decided to ignore a very small difference of under 2 %.)" >> $GITHUB_STEP_SUMMARY + else + if [ "$NEW_RANGE_START" -le "$OLD_RANGE_END" ] && [ "$NEW_RANGE_END" -ge "$OLD_RANGE_START" ]; then + if [ "$NEW_RANGE_START" -ge "$OLD_RANGE_MID" ]; then + echo "### 🍀 Possible improvement 🍀" >> $GITHUB_STEP_SUMMARY + elif [ "$OLD_RANGE_END" -le "$NEW_RANGE_MID" ]; then + echo "### ⚠️ Possible regression ⚠️" >> $GITHUB_STEP_SUMMARY + else + echo "### Performance unchanged " >> $GITHUB_STEP_SUMMARY + fi + elif [ "$NEW_RANGE_START" -gt "$OLD_RANGE_END" ]; then + echo "### 🚀🚀🚀 Statistically significant improvement 🚀🚀🚀" >> $GITHUB_STEP_SUMMARY + else + echo "### ‼️‼️‼️ Statistically significant regression ‼️‼️‼️" >> $GITHUB_STEP_SUMMARY + export FAIL=true + fi + fi echo "| | **Ref** | **Min** | **Mean** | **Max** |" >> $GITHUB_STEP_SUMMARY echo "|:------:|:-----------:|:-----------------:|:-----------------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY - echo "| _Old_ | ${{ github.event.inputs.baseline }} | ${OLD_RANGE_START} | ${OLD_RANGE_MID} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY - echo "| _New_ | ${{ github.event.inputs.branch }} | ${NEW_RANGE_START} | ${NEW_RANGE_MID} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + echo "| _Old_ | [v${{ github.event.inputs.baseline }}](https://github.com/TimefoldAI/timefold-solver/releases/tag/v${{ github.event.inputs.baseline }}) | ${OLD_RANGE_START} | ${OLD_RANGE_MID} | ${OLD_RANGE_END} |" >> $GITHUB_STEP_SUMMARY + echo "| _New_ | [${{ github.event.inputs.branch_owner }}'s ${{ github.event.inputs.branch }}](https://github.com/${{ github.event.inputs.branch_owner }}/timefold-solver/tree/${{ github.event.inputs.branch }}) | ${NEW_RANGE_START} | ${NEW_RANGE_MID} | ${NEW_RANGE_END} |" >> $GITHUB_STEP_SUMMARY echo "| _Diff_ | | ${DIFF_START} % | ${DIFF_MID} % | ${DIFF_END} % |" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "Min and max define a 99.9 % confidence interval." >> $GITHUB_STEP_SUMMARY echo "Min and max are in operations per second. Higher is better." >> $GITHUB_STEP_SUMMARY - echo "Diff over 100 % represents an improvement, under 100 % a regression." >> $GITHUB_STEP_SUMMARY + echo "Diff under 100 % represents an improvement, over 100 % a regression." >> $GITHUB_STEP_SUMMARY if [ "$FAIL" = true ]; then exit 1 From cf2e6c322342ff387cdf990fbc37bf830e581bfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Thu, 3 Oct 2024 13:30:07 +0200 Subject: [PATCH 57/62] Fix issues --- .github/workflows/performance_score_director.yml | 10 +++++----- src/main/resources/logback.xml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index d1bea83c..b9274700 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -74,8 +74,8 @@ jobs: shell: bash run: | echo "forks=20" > scoredirector-benchmark.properties - echo "warmup_iterations=5" >> scoredirector-benchmark.properties - echo "measurement_iterations=15" >> scoredirector-benchmark.properties + echo "warmup_iterations=10" >> scoredirector-benchmark.properties + echo "measurement_iterations=10" >> scoredirector-benchmark.properties echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties @@ -170,9 +170,9 @@ jobs: NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }} shell: bash run: | - export DIFF_START=$(echo "scale=2; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc) - export DIFF_MID=$(echo "scale=2; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc) - export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc) + export DIFF_START=$(echo "scale=0; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc) + export DIFF_MID=$(echo "scale=0; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc) + export DIFF_END=$(echo "scale=0; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc) export FAIL=false if [ "$DIFF_MID" -gt "98" ] && [ "$DIFF_MID" -lt "102" ]; then diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index fb3c09cc..dac603bb 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -8,7 +8,7 @@ - + From 565c2d89b13192f67d1b9c621f66a2fdf795c615 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Thu, 3 Oct 2024 14:16:17 +0200 Subject: [PATCH 58/62] Try paid runners --- .github/workflows/performance_score_director.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index b9274700..a8ec33d5 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -35,7 +35,7 @@ on: jobs: benchmark: - runs-on: ubuntu-latest + runs-on: perf-linux-x64-2cores strategy: fail-fast: false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue. matrix: From 62cdf76239649f30a712444d5c78839359fff245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Thu, 3 Oct 2024 14:24:31 +0200 Subject: [PATCH 59/62] Reduce run time significantly --- .github/workflows/performance_score_director.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index a8ec33d5..2383fde6 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -75,7 +75,7 @@ jobs: run: | echo "forks=20" > scoredirector-benchmark.properties echo "warmup_iterations=10" >> scoredirector-benchmark.properties - echo "measurement_iterations=10" >> scoredirector-benchmark.properties + echo "measurement_iterations=5" >> scoredirector-benchmark.properties echo "relative_score_error_threshold=0.025" >> scoredirector-benchmark.properties echo "score_director_type=cs" >> scoredirector-benchmark.properties echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties From 813d1714d4328ac13ddfc268cd022b8d3f70b439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Thu, 3 Oct 2024 15:01:54 +0200 Subject: [PATCH 60/62] Adjust cron --- .../nightly_performance_score_director.yml | 14 ++++++++++++-- .github/workflows/performance_score_director.yml | 3 +-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/nightly_performance_score_director.yml b/.github/workflows/nightly_performance_score_director.yml index 1344aba4..351e4a06 100644 --- a/.github/workflows/nightly_performance_score_director.yml +++ b/.github/workflows/nightly_performance_score_director.yml @@ -2,13 +2,23 @@ name: Schedule a nightly run of the Score Director performance benchmark on: schedule: - - cron: '0 0 * * *' # Every day at the start of the day + - cron: '59 23 * * 1-5' # Every workday at the end of the day. jobs: trigger: runs-on: ubuntu-latest steps: + - name: Checkout timefold-solver + uses: actions/checkout@v4 + with: + repository: TimefoldAI/timefold-solver - name: Schedule the other workflow shell: bash run: | - echo '{}' | gh workflow run performance_score_director.yml --json \ No newline at end of file + if git log --since="24 hours ago" --oneline | grep -q .; then + echo '{}' | gh workflow run performance_score_director.yml --json + echo "Launched nightly perf tests." >> $GITHUB_STEP_SUMMARY + else + # Don't waste money. + echo "No commits in the past 24 hours." >> $GITHUB_STEP_SUMMARY + fi diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index 2383fde6..cfd28b23 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -2,8 +2,7 @@ # - The baseline is established first, then the branch under test is measured. # - Each benchmark gives a 99.9 % confidence interval. # - The confidence intervals are compared to determine if the branch under test is a regression or an improvement. -# - The error threshold is expected to be below +/- 2.5 %, -# but sometimes it gets higher due to the nature of public GitHub runners. +# - The error threshold is expected to be below +/- 2.5 %. # We have yet to see an error of over +/- 4 %. # With the error so high, the impact is that small regressions are not considered statistically significant. name: Performance Regression Test - Score Director From afa4d1ea31aaedbf79427445b2b0e8cf1a2addd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Thu, 3 Oct 2024 16:35:08 +0200 Subject: [PATCH 61/62] Fix math --- .github/workflows/performance_score_director.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index cfd28b23..a4e00995 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -169,12 +169,12 @@ jobs: NEW_RANGE_END: ${{ steps.benchmark_new.outputs.RANGE_END }} shell: bash run: | - export DIFF_START=$(echo "scale=0; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc) - export DIFF_MID=$(echo "scale=0; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc) - export DIFF_END=$(echo "scale=0; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc) + export DIFF_START=$(echo "scale=2; ($OLD_RANGE_START / $NEW_RANGE_START) * 100" | bc) + export DIFF_MID=$(echo "scale=2; ($OLD_RANGE_MID / $NEW_RANGE_MID) * 100" | bc) + export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc) export FAIL=false - if [ "$DIFF_MID" -gt "98" ] && [ "$DIFF_MID" -lt "102" ]; then + if (( $(echo "$DIFF_MID > 98.00" | bc -l) && $(echo "$DIFF_MID < 102.00"|bc -l) )); then # Ignore differences of up to 2 %. echo "### Performance unchanged" >> $GITHUB_STEP_SUMMARY echo "(Decided to ignore a very small difference of under 2 %.)" >> $GITHUB_STEP_SUMMARY From cb48878c406ead4b1f3fa09f75e6bba085684677 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Petrovick=C3=BD?= Date: Thu, 3 Oct 2024 18:41:23 +0200 Subject: [PATCH 62/62] Fix math --- .github/workflows/performance_score_director.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/performance_score_director.yml b/.github/workflows/performance_score_director.yml index a4e00995..ee2eed81 100644 --- a/.github/workflows/performance_score_director.yml +++ b/.github/workflows/performance_score_director.yml @@ -174,7 +174,7 @@ jobs: export DIFF_END=$(echo "scale=2; ($OLD_RANGE_END / $NEW_RANGE_END) * 100" | bc) export FAIL=false - if (( $(echo "$DIFF_MID > 98.00" | bc -l) && $(echo "$DIFF_MID < 102.00"|bc -l) )); then + if (( $(echo "$DIFF_MID >= 98.00" | bc -l) && $(echo "$DIFF_MID <= 102.00"|bc -l) )); then # Ignore differences of up to 2 %. echo "### Performance unchanged" >> $GITHUB_STEP_SUMMARY echo "(Decided to ignore a very small difference of under 2 %.)" >> $GITHUB_STEP_SUMMARY