diff --git a/.github/workflows/pr-evaluation-run.yml b/.github/workflows/pr-evaluation-run.yml
deleted file mode 100644
index a417b8e..0000000
--- a/.github/workflows/pr-evaluation-run.yml
+++ /dev/null
@@ -1,292 +0,0 @@
-name: Router Submission Evaluation
-
-on:
-  workflow_dispatch:
-    inputs:
-      pr_number:
-        description: Pull request number to evaluate
-        required: true
-        type: string
-      base_ref:
-        description: Base branch ref for evaluation scripts checkout
-        required: true
-        type: string
-      base_sha:
-        description: Base commit SHA for PR diff/evaluation
-        required: true
-        type: string
-
-jobs:
-  evaluate-router:
-    runs-on: self-hosted
-    permissions:
-      contents: read
-      issues: write
-      checks: write
-      pull-requests: write
-    env:
-      PR_NUMBER: ${{ inputs.pr_number }}
-      BASE_REF: ${{ inputs.base_ref }}
-      BASE_SHA: ${{ inputs.base_sha }}
-      PR_CHECKOUT_REF: ${{ format('refs/pull/{0}/head', inputs.pr_number) }}
-    steps:
-      - name: Fetch PR head SHA
-        id: prmeta
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const pr = await github.rest.pulls.get({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              pull_number: Number('${{ env.PR_NUMBER }}')
-            });
-            core.setOutput('head_sha', pr.data.head.sha);
-
-      - name: Create in-progress PR check
-        id: checkrun
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const result = await github.rest.checks.create({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              name: 'Router Submission Evaluation (/evaluate)',
-              head_sha: '${{ steps.prmeta.outputs.head_sha }}',
-              status: 'in_progress',
-              started_at: new Date().toISOString(),
-              details_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
-              output: {
-                title: 'Evaluation started',
-                summary: 'Router evaluation is running.'
-              }
-            });
-            core.setOutput('id', String(result.data.id));
-
-      - name: Checkout base repository (for evaluation scripts)
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.BASE_REF }}
-          path: base
-          fetch-depth: 0
-
-      - name: Checkout PR branch (for prediction file only)
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.PR_CHECKOUT_REF }}
-          path: pr
-          fetch-depth: 0
-
-      - name: Detect changed prediction file
-        id: detect
-        shell: bash
-        working-directory: pr
-        run: |
-          set -euo pipefail
-          BASE_REF="${{ env.BASE_REF }}"
-          BASE_SHA="${{ env.BASE_SHA }}"
-
-          if [[ -z "$BASE_SHA" ]]; then
-            echo "Error: Could not determine PR base SHA" >&2
-            exit 1
-          fi
-
-          git fetch origin "$BASE_REF" || true
-
-          if ! git cat-file -e "$BASE_SHA" 2>/dev/null; then
-            echo "Base SHA $BASE_SHA not found locally, attempting to fetch..."
-            git fetch origin "$BASE_SHA" || git fetch origin "$BASE_REF" || true
-          fi
-
-          mapfile -t CHANGED_FILES < <(git diff --name-status "$BASE_SHA"...HEAD -- router_inference/predictions/*.json 2>/dev/null | awk '$1 == "A" || $1 == "M" {print $2}')
-
-          if [[ ${#CHANGED_FILES[@]} -eq 0 ]]; then
-            echo "No changed prediction file detected; skipping evaluation."
-            echo "router=" >> "$GITHUB_OUTPUT"
-            exit 0
-          fi
-
-          router_name=""
-          has_base=0
-          has_robustness=0
-
-          for file in "${CHANGED_FILES[@]}"; do
-            filename=$(basename "$file")
-            name="${filename%.json}"
-            if [[ "$name" == *-robustness ]]; then
-              has_robustness=1
-              name="${name%-robustness}"
-            else
-              has_base=1
-            fi
-
-            if [[ -z "$name" ]]; then
-              echo "Unable to determine router name from $file" >&2
-              exit 1
-            fi
-
-            if [[ -z "$router_name" ]]; then
-              router_name="$name"
-            elif [[ "$router_name" != "$name" ]]; then
-              echo "Prediction files belong to different routers:" >&2
-              printf '  %s\n' "${CHANGED_FILES[@]}" >&2
-              exit 1
-            fi
-          done
-
-          if [[ ${#CHANGED_FILES[@]} -ne 2 || $has_base -ne 1 || $has_robustness -ne 1 ]]; then
-            echo "Expected exactly two prediction files (router and router-robustness), found:" >&2
-            printf '  %s\n' "${CHANGED_FILES[@]}" >&2
-            exit 1
-          fi
-
-          ROUTER_NAME="$router_name"
-          echo "router=$ROUTER_NAME" >> "$GITHUB_OUTPUT"
-
-          PREDICTION_FILE="router_inference/predictions/${ROUTER_NAME}.json"
-          if [[ ! -f "$PREDICTION_FILE" ]]; then
-            echo "Error: Prediction file not found at $PREDICTION_FILE" >&2
-            exit 1
-          fi
-
-          ENTRY_COUNT=$(python3 -c "import json; print(len(json.load(open('$PREDICTION_FILE'))))")
-          echo "Prediction file contains $ENTRY_COUNT entries"
-
-          if [[ "$ENTRY_COUNT" -eq 8400 ]]; then
-            SPLIT="full"
-          elif [[ "$ENTRY_COUNT" -eq 809 ]]; then
-            SPLIT="sub_10"
-          else
-            echo "Warning: Unexpected prediction file size ($ENTRY_COUNT entries). Defaulting to full." >&2
-            SPLIT="full"
-          fi
-          echo "split=$SPLIT" >> "$GITHUB_OUTPUT"
-
-      - name: Show detected router
-        if: ${{ steps.detect.outputs.router != '' }}
-        run: |
-          set -euo pipefail
-          echo "Detected router submission: ${{ steps.detect.outputs.router }}"
-          echo "Detected split: ${{ steps.detect.outputs.split }}"
-
-      - name: Prepare dataset
-        if: ${{ steps.detect.outputs.router != '' }}
-        working-directory: base
-        run: |
-          set -euo pipefail
-          echo "Preparing dataset..."
-          mkdir -p "${{ github.workspace }}/dataset"
-          uv run python scripts/process_datasets/prep_datasets.py
-
-      - name: Copy PR prediction file to base workspace
-        if: ${{ steps.detect.outputs.router != '' }}
-        run: |
-          set -euo pipefail
-          ROUTER_NAME="${{ steps.detect.outputs.router }}"
-          mkdir -p base/router_inference/predictions
-          cp "pr/router_inference/predictions/${ROUTER_NAME}.json" \
-             "base/router_inference/predictions/${ROUTER_NAME}.json"
-          cp "pr/router_inference/predictions/${ROUTER_NAME}-robustness.json" \
-             "base/router_inference/predictions/${ROUTER_NAME}-robustness.json"
-          echo "Copied prediction files from PR to base workspace"
-
-      - name: Evaluate submission
-        if: ${{ steps.detect.outputs.router != '' }}
-        id: evaluate
-        working-directory: base
-        env:
-          ROUTERARENA_DATASET_DIR: ${{ github.workspace }}/dataset
-        run: |
-          set -euo pipefail; trap 'cat evaluation_output.txt' EXIT
-          BASE_SHA="${{ env.BASE_SHA }}"
-          uv run python automation/process_pr_submission.py \
-            --pr "${{ env.PR_NUMBER }}" \
-            --router "${{ steps.detect.outputs.router }}" \
-            --split "${{ steps.detect.outputs.split }}" \
-            --base-ref "$BASE_SHA" > evaluation_output.txt 2>&1
-          cat evaluation_output.txt
-
-      - name: Post evaluation results as PR comment
-        if: ${{ steps.detect.outputs.router != '' && steps.evaluate.outcome == 'success' }}
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const fs = require('fs');
-            const path = require('path');
-
-            let comment = '## Router Evaluation Results\n\n';
-            comment += `**Router:** \`${{ steps.detect.outputs.router }}\`\n`;
-            comment += `**Dataset Split:** \`${{ steps.detect.outputs.split }}\`\n\n`;
-
-            const metricsPath = path.join('base', 'metrics.json');
-            if (!fs.existsSync(metricsPath)) {
-              throw new Error(`metrics.json not found at ${metricsPath}. Evaluation must produce metrics.json file.`);
-            }
-
-            const metrics = JSON.parse(fs.readFileSync(metricsPath, 'utf8'));
-            comment += '### RouterArena Metrics\n\n';
-            comment += '| Metric | Value |\n';
-            comment += '|--------|-------|\n';
-            comment += `| **RouterArena Score** | ${metrics.arena_score.toFixed(4)} |\n`;
-            comment += `| **Accuracy** | ${(metrics.accuracy * 100).toFixed(2)}% |\n`;
-            comment += `| **Total Cost** | $${metrics.total_cost.toFixed(6)} |\n`;
-            comment += `| **Avg Cost per Query** | $${metrics.avg_cost_per_query.toFixed(6)} |\n`;
-            comment += `| **Avg Cost per 1K Queries** | $${metrics.avg_cost_per_1000.toFixed(4)} |\n`;
-            comment += `| **Number of Queries** | ${metrics.num_queries} |\n`;
-            const robustnessScore = metrics.robustness_score;
-            const robustnessCell = robustnessScore !== undefined ? robustnessScore.toFixed(4) : 'N/A';
-            comment += `| **Robustness Score** | ${robustnessCell} |\n`;
-
-            if (metrics.optimality) {
-              comment += '\n### Optimality Metrics\n\n';
-              comment += '| Metric | Value |\n';
-              comment += '|--------|-------|\n';
-              comment += `| **Opt.Sel** (Optimal Selection) | ${metrics.optimality.opt_sel.toFixed(4)} |\n`;
-              comment += `| **Opt.Cost** (Cost Efficiency) | ${metrics.optimality.opt_cost.toFixed(4)} |\n`;
-              comment += `| **Opt.Acc** (Accuracy vs Optimal) | ${metrics.optimality.opt_acc.toFixed(4)} |\n`;
-            }
-
-            comment += '\n---\n';
-            comment += '*Evaluation completed by RouterArena automated workflow*';
-
-            await github.rest.issues.createComment({
-              issue_number: Number('${{ env.PR_NUMBER }}'),
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: comment
-            });
-            console.log('Successfully posted evaluation results as PR comment');
-
-      - name: Complete PR check
-        if: ${{ always() && steps.checkrun.outputs.id != '' }}
-        uses: actions/github-script@v7
-        env:
-          DETECTED_ROUTER: ${{ steps.detect.outputs.router }}
-          DETECT_OUTCOME: ${{ steps.detect.outcome }}
-          EVALUATE_OUTCOME: ${{ steps.evaluate.outcome }}
-        with:
-          script: |
-            let conclusion = 'success';
-            let title = 'Evaluation completed';
-            let summary = 'Router evaluation finished successfully.';
-
-            if (!process.env.DETECTED_ROUTER) {
-              conclusion = process.env.DETECT_OUTCOME === 'success' ? 'neutral' : 'failure';
-              title = process.env.DETECT_OUTCOME === 'success' ? 'No router file detected' : 'Evaluation setup failed';
-              summary = process.env.DETECT_OUTCOME === 'success'
-                ? 'No changed prediction file was detected for this PR, so evaluation was skipped.'
-                : 'Failed while detecting prediction files for this PR.';
-            } else if (process.env.EVALUATE_OUTCOME !== 'success') {
-              conclusion = 'failure';
-              title = 'Evaluation failed';
-              summary = 'The evaluation step failed. Check this workflow run logs for details.';
-            }
-
-            await github.rest.checks.update({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              check_run_id: Number('${{ steps.checkrun.outputs.id }}'),
-              status: 'completed',
-              conclusion,
-              completed_at: new Date().toISOString(),
-              output: { title, summary }
-            });
diff --git a/.github/workflows/pr-evaluation.yml b/.github/workflows/pr-evaluation.yml
index 5635825..b34998b 100644
--- a/.github/workflows/pr-evaluation.yml
+++ b/.github/workflows/pr-evaluation.yml
@@ -1,11 +1,11 @@
-name: Router Submission Evaluation Trigger
+name: Router Submission Evaluation
 
 on:
   issue_comment:
     types: [created]
 
 jobs:
-  request-evaluation:
+  evaluate-router:
     if: >-
       github.event.issue.pull_request &&
       startsWith(github.event.comment.body, '/evaluate') &&
@@ -17,32 +17,20 @@ jobs:
       )
     runs-on: self-hosted
     permissions:
-      actions: write
-      issues: write
-      pull-requests: write
       checks: write
       contents: read
+      pull-requests: write
     steps:
       - name: Acknowledge /evaluate command
         uses: actions/github-script@v7
         with:
           script: |
-            try {
-              await github.rest.reactions.createForIssueComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                comment_id: context.payload.comment.id,
-                content: 'eyes'
-              });
-            } catch (error) {
-              // Some org/repo token policies disallow reactions for GITHUB_TOKEN.
-              // Do not block evaluation trigger on this cosmetic action.
-              if (error.status === 403) {
-                core.warning(`Skipping reaction due to permission restriction: ${error.message}`);
-              } else {
-                throw error;
-              }
-            }
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: context.payload.comment.id,
+              content: 'eyes'
+            });
 
       - name: Fetch PR details
         id: pr
@@ -54,22 +42,281 @@ jobs:
               repo: context.repo.repo,
               pull_number: context.payload.issue.number
             });
-            core.setOutput('number', String(pr.data.number));
+            core.setOutput('head_sha', pr.data.head.sha);
             core.setOutput('base_ref', pr.data.base.ref);
             core.setOutput('base_sha', pr.data.base.sha);
+            core.setOutput('number', pr.data.number);
 
-      - name: Dispatch evaluation workflow
+      - name: Create in-progress PR check
+        id: checkrun
         uses: actions/github-script@v7
         with:
           script: |
-            await github.rest.actions.createWorkflowDispatch({
+            const result = await github.rest.checks.create({
               owner: context.repo.owner,
               repo: context.repo.repo,
-              workflow_id: 'pr-evaluation-run.yml',
-              ref: '${{ steps.pr.outputs.base_ref }}',
-              inputs: {
-                pr_number: '${{ steps.pr.outputs.number }}',
-                base_ref: '${{ steps.pr.outputs.base_ref }}',
-                base_sha: '${{ steps.pr.outputs.base_sha }}'
+              name: 'Router Submission Evaluation (/evaluate)',
+              head_sha: '${{ steps.pr.outputs.head_sha }}',
+              status: 'in_progress',
+              started_at: new Date().toISOString(),
+              details_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
+              output: {
+                title: 'Evaluation started',
+                summary: 'Router evaluation was triggered via `/evaluate` and is now running.'
               }
             });
+            core.setOutput('id', String(result.data.id));
+
+      - name: Checkout base repository (for evaluation scripts)
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.pr.outputs.base_ref }}
+          path: base
+          fetch-depth: 0
+
+      - name: Checkout PR branch (for prediction file only)
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ steps.pr.outputs.head_sha }}
+          path: pr
+          fetch-depth: 0
+
+      - name: Detect changed prediction file
+        id: detect
+        shell: bash
+        working-directory: pr
+        run: |
+          set -euo pipefail
+          # Compare against the upstream base branch
+          # This ensures each router submission is evaluated independently
+          BASE_REF="${{ steps.pr.outputs.base_ref }}"
+          BASE_SHA="${{ steps.pr.outputs.base_sha }}"
+
+          if [[ -z "$BASE_SHA" ]]; then
+            echo "Error: Could not determine PR base SHA" >&2
+            exit 1
+          fi
+
+          # Fetch the base branch to ensure it's available for comparison
+          git fetch origin "$BASE_REF" || true
+
+          # Try to fetch the specific base SHA if it's not already available
+          if ! git cat-file -e "$BASE_SHA" 2>/dev/null; then
+            echo "Base SHA $BASE_SHA not found locally, attempting to fetch..."
+            git fetch origin "$BASE_SHA" || git fetch origin "$BASE_REF" || true
+          fi
+
+          # Compare against base to show only changes in this PR
+          # Use three-dot diff to show changes from merge-base to HEAD (only PR changes)
+          mapfile -t CHANGED_FILES < <(git diff --name-status "$BASE_SHA"...HEAD -- router_inference/predictions/*.json 2>/dev/null | awk '$1 == "A" || $1 == "M" {print $2}')
+
+          if [[ ${#CHANGED_FILES[@]} -eq 0 ]]; then
+            echo "No changed prediction file detected; skipping evaluation."
+            echo "router=" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+
+          router_name=""
+          has_base=0
+          has_robustness=0
+
+          for file in "${CHANGED_FILES[@]}"; do
+            filename=$(basename "$file")
+            name="${filename%.json}"
+            if [[ "$name" == *-robustness ]]; then
+              has_robustness=1
+              name="${name%-robustness}"
+            else
+              has_base=1
+            fi
+
+            if [[ -z "$name" ]]; then
+              echo "Unable to determine router name from $file" >&2
+              exit 1
+            fi
+
+            if [[ -z "$router_name" ]]; then
+              router_name="$name"
+            elif [[ "$router_name" != "$name" ]]; then
+              echo "Prediction files belong to different routers:" >&2
+              printf '  %s\n' "${CHANGED_FILES[@]}" >&2
+              exit 1
+            fi
+          done
+
+          if [[ ${#CHANGED_FILES[@]} -ne 2 || $has_base -ne 1 || $has_robustness -ne 1 ]]; then
+            echo "Expected exactly two prediction files (router and router-robustness), found:" >&2
+            printf '  %s\n' "${CHANGED_FILES[@]}" >&2
+            exit 1
+          fi
+
+          ROUTER_NAME="$router_name"
+          echo "router=$ROUTER_NAME" >> "$GITHUB_OUTPUT"
+
+          # Detect split based on prediction file size (from PR branch)
+          PREDICTION_FILE="router_inference/predictions/${ROUTER_NAME}.json"
+          if [[ ! -f "$PREDICTION_FILE" ]]; then
+            echo "Error: Prediction file not found at $PREDICTION_FILE" >&2
+            exit 1
+          fi
+          ENTRY_COUNT=$(python3 -c "import json; print(len(json.load(open('$PREDICTION_FILE'))))")
+          echo "Prediction file contains $ENTRY_COUNT entries"
+
+          if [[ "$ENTRY_COUNT" -eq 8400 ]]; then
+            SPLIT="full"
+          elif [[ "$ENTRY_COUNT" -eq 809 ]]; then
+            SPLIT="sub_10"
+          else
+            echo "Warning: Unexpected prediction file size ($ENTRY_COUNT entries). Defaulting to full." >&2
+            SPLIT="full"
+          fi
+          echo "split=$SPLIT" >> "$GITHUB_OUTPUT"
+
+      - name: Show detected router
+        if: ${{ steps.detect.outputs.router != '' }}
+        run: |
+          set -euo pipefail
+          echo "Detected router submission: ${{ steps.detect.outputs.router }}"
+          echo "Detected split: ${{ steps.detect.outputs.split }}"
+
+      - name: Prepare dataset
+        if: ${{ steps.detect.outputs.router != '' }}
+        working-directory: base
+        run: |
+          set -euo pipefail
+          # Prepare dataset from public repository
+          # Uses base repo's script (safe - not from PR)
+          echo "Preparing dataset..."
+          mkdir -p "${{ github.workspace }}/dataset"
+          uv run python scripts/process_datasets/prep_datasets.py
+
+      - name: Copy PR prediction file to base workspace
+        if: ${{ steps.detect.outputs.router != '' }}
+        run: |
+          set -euo pipefail
+          ROUTER_NAME="${{ steps.detect.outputs.router }}"
+          # Copy prediction file from PR to base workspace
+          mkdir -p base/router_inference/predictions
+          cp "pr/router_inference/predictions/${ROUTER_NAME}.json" \
+             "base/router_inference/predictions/${ROUTER_NAME}.json"
+          cp "pr/router_inference/predictions/${ROUTER_NAME}-robustness.json" \
+             "base/router_inference/predictions/${ROUTER_NAME}-robustness.json"
+          echo "Copied prediction file from PR to base workspace"
+
+      - name: Evaluate submission
+        if: ${{ steps.detect.outputs.router != '' }}
+        id: evaluate
+        working-directory: base
+        env:
+          ROUTERARENA_DATASET_DIR: ${{ github.workspace }}/dataset
+        run: |
+          set -euo pipefail; trap 'cat evaluation_output.txt' EXIT
+          # Uses base repo's evaluation script (safe - not from PR)
+          BASE_SHA="${{ steps.pr.outputs.base_sha }}"
+          uv run python automation/process_pr_submission.py \
+            --pr "${{ steps.pr.outputs.number }}" \
+            --router "${{ steps.detect.outputs.router }}" \
+            --split "${{ steps.detect.outputs.split }}" \
+            --base-ref "$BASE_SHA" > evaluation_output.txt 2>&1
+          cat evaluation_output.txt
+
+      - name: Post evaluation results as PR comment
+        if: ${{ steps.detect.outputs.router != '' && steps.evaluate.outcome == 'success' }}
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const path = require('path');
+
+            let comment = '## Router Evaluation Results\n\n';
+            comment += `**Router:** \`${{ steps.detect.outputs.router }}\`\n`;
+            comment += `**Dataset Split:** \`${{ steps.detect.outputs.split }}\`\n\n`;
+
+            // Read metrics from metrics.json file (required - no fallback)
+            const metricsPath = path.join('base', 'metrics.json');
+            if (!fs.existsSync(metricsPath)) {
+              throw new Error(`metrics.json not found at ${metricsPath}. Evaluation must produce metrics.json file.`);
+            }
+
+            const metrics = JSON.parse(fs.readFileSync(metricsPath, 'utf8'));
+            comment += '### RouterArena Metrics\n\n';
+            comment += '| Metric | Value |\n';
+            comment += '|--------|-------|\n';
+            comment += `| **RouterArena Score** | ${metrics.arena_score.toFixed(4)} |\n`;
+            comment += `| **Accuracy** | ${(metrics.accuracy * 100).toFixed(2)}% |\n`;
+            comment += `| **Total Cost** | $${metrics.total_cost.toFixed(6)} |\n`;
+            comment += `| **Avg Cost per Query** | $${metrics.avg_cost_per_query.toFixed(6)} |\n`;
+            comment += `| **Avg Cost per 1K Queries** | $${metrics.avg_cost_per_1000.toFixed(4)} |\n`;
+            comment += `| **Number of Queries** | ${metrics.num_queries} |\n`;
+            const robustnessScore = metrics.robustness_score;
+            const robustnessCell = robustnessScore !== undefined ? robustnessScore.toFixed(4) : 'N/A';
+            comment += `| **Robustness Score** | ${robustnessCell} |\n`;
+
+            // Add optimality scores if available
+            if (metrics.optimality) {
+              comment += '\n### Optimality Metrics\n\n';
+              comment += '| Metric | Value |\n';
+              comment += '|--------|-------|\n';
+              comment += `| **Opt.Sel** (Optimal Selection) | ${metrics.optimality.opt_sel.toFixed(4)} |\n`;
+              comment += `| **Opt.Cost** (Cost Efficiency) | ${metrics.optimality.opt_cost.toFixed(4)} |\n`;
+              comment += `| **Opt.Acc** (Accuracy vs Optimal) | ${metrics.optimality.opt_acc.toFixed(4)} |\n`;
+            }
+
+            comment += '\n---\n';
+            comment += '*Evaluation completed by RouterArena automated workflow*';
+
+            // Post comment to PR
+            await github.rest.issues.createComment({
+              issue_number: context.payload.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: comment
+            });
+            console.log('Successfully posted evaluation results as PR comment');
+
+      - name: React with success
+        if: ${{ steps.detect.outputs.router != '' && steps.evaluate.outcome == 'success' }}
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: context.payload.comment.id,
+              content: 'rocket'
+            });
+
+      - name: Complete PR check
+        if: ${{ always() && steps.checkrun.outputs.id != '' }}
+        uses: actions/github-script@v7
+        env:
+          DETECTED_ROUTER: ${{ steps.detect.outputs.router }}
+          DETECT_OUTCOME: ${{ steps.detect.outcome }}
+          EVALUATE_OUTCOME: ${{ steps.evaluate.outcome }}
+        with:
+          script: |
+            let conclusion = 'success';
+            let title = 'Evaluation completed';
+            let summary = 'Router evaluation finished successfully.';
+
+            if (!process.env.DETECTED_ROUTER) {
+              conclusion = process.env.DETECT_OUTCOME === 'success' ? 'neutral' : 'failure';
+              title = process.env.DETECT_OUTCOME === 'success' ? 'No router file detected' : 'Evaluation setup failed';
+              summary = process.env.DETECT_OUTCOME === 'success'
+                ? 'No changed prediction file was detected for this PR, so evaluation was skipped.'
+                : 'Failed while detecting prediction files for this PR.';
+            } else if (process.env.EVALUATE_OUTCOME !== 'success') {
+              conclusion = 'failure';
+              title = 'Evaluation failed';
+              summary = 'The evaluation step failed. Check this workflow run logs for details.';
+            }
+
+            await github.rest.checks.update({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              check_run_id: Number('${{ steps.checkrun.outputs.id }}'),
+              status: 'completed',
+              conclusion,
+              completed_at: new Date().toISOString(),
+              output: { title, summary }
+            });