diff --git a/docs/mcp.md b/docs/mcp.md index 213be95c..50ad161b 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -771,29 +771,34 @@ Validates an XML test case for schema correctness (validity score) and best prac **Input** -| Parameter | Type | Required | Description | -| ----------- | ------ | ------------------------------------------- | ---------------------------------------------- | -| `content` | string | one of `content`/`xml`/`file_path` required | XML content to validate (MCP field name) | -| `xml` | string | one of `content`/`xml`/`file_path` required | XML content to validate (API-compatible alias) | -| `file_path` | string | one of `content`/`xml`/`file_path` required | Path to the `.testcase` XML file | +| Parameter | Type | Required | Description | +| ----------------- | --------------------------------- | ------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `content` | string | one of `content`/`xml`/`file_path` required | XML content to validate (MCP field name) | +| `xml` | string | one of `content`/`xml`/`file_path` required | XML content to validate (API-compatible alias) | +| `file_path` | string | one of `content`/`xml`/`file_path` required | Path to the `.testcase` XML file | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: is_valid, scores, and stop signal only. `"standard"`/`"full"`: full issues list (default). | +| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved issues since that run (`{ added, resolved, unchanged_count, run_id }`). Returns `BASELINE_NOT_FOUND` if the run ID is unknown. | **Output** -| Field | Type | Description | -| -------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------ | -| `is_valid` | boolean | `true` if zero ERROR-level schema violations | -| `validity_score` | number (0–100) | Schema compliance score (100 − errorCount × 20) | -| `quality_score` | number (0–100) | Best-practices score (weighted deduction formula) | -| `error_count` | integer | Schema error count | -| `warning_count` | integer | Schema warning count | -| `step_count` | integer | Number of `` steps | -| `test_case_id` | string | Value of the `id` attribute | -| `test_case_name` | string | Value of the `name` attribute | -| `issues` | array | Schema issues with `rule_id`, `severity`, `message` | -| `best_practices_violations` | array | Best-practices violations with `rule_id`, `severity`, `weight`, `message` | -| `best_practices_rules_evaluated` | integer | How many best-practices rules were checked | -| `validation_source` | string | `quality_hub`, `local`, or `local_fallback` — see Authentication section | -| `validation_warning` | string | Present when `validation_source` is `local` (onboarding) or `local_fallback` (explains why API failed) | +| Field | Type | Description | +| -------------------------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------ | +| `run_id` | string | Stable identifier for this validation run. Pass as `baseline_run_id` in the next call to receive only new/resolved issues. | +| `completeness_score` | number (0–1) | Ratio of valid test cases to total test cases validated (`0.0`–`1.0`). | +| `recommended_next_action` | string | `"stop"` (all passing), `"continue"` (issues remain), or `"escalate"` (no baseline yet — run without `baseline_run_id` first). | +| `is_valid` | boolean | `true` if zero ERROR-level schema violations | +| `validity_score` | number (0–100) | Schema compliance score (100 − errorCount × 20) | +| `quality_score` | number (0–100) | Best-practices score (weighted deduction formula) | +| `error_count` | integer | Schema error count | +| `warning_count` | integer | Schema warning count | +| `step_count` | integer | Number of `` steps | +| `test_case_id` | string | Value of the `id` attribute | +| `test_case_name` | string | Value of the `name` attribute | +| `issues` | array | Schema issues with `rule_id`, `severity`, `message` | +| `best_practices_violations` | array | Best-practices violations with `rule_id`, `severity`, `weight`, `message` | +| `best_practices_rules_evaluated` | integer | How many best-practices rules were checked | +| `validation_source` | string | `quality_hub`, `local`, or `local_fallback` — see Authentication section | +| `validation_warning` | string | Present when `validation_source` is `local` (onboarding) or `local_fallback` (explains why API failed) | **Key schema rules:** TC_001 (missing XML declaration), TC_002 (malformed XML), TC_003 (wrong root element), TC_010/011/012 (missing/invalid id/guid), TC_031 (invalid apiCall guid), TC_034/035 (non-integer testItemId). @@ -807,6 +812,15 @@ Validates an XML test case for schema correctness (validity score) and best prac - **VAR-REF-001** — An argument value looks like a variable reference (`{VarName}` or `{Obj.Field}`) but is stored as `class="value" valueClass="string"`. Provar will treat it as a literal string, not resolve the variable. Replace with `class="variable"` and `` elements. - **VAR-REF-002** — A `{VarName}` token is embedded inside a larger plain string (e.g. `SELECT Id FROM Account WHERE Id = '{AccountId}'`). Provar does not perform `{…}` interpolation in string values at runtime; the braces are emitted literally. Use `class="compound"` with `` children to split the literal text and variable references. In `provar_testcase_generate`, pass the value with `{VarName}` placeholders — the generator emits compound XML automatically. +**Error codes** + +| Code | Meaning | +| -------------------- | ------------------------------------------------------------------------------------------------- | +| `BASELINE_NOT_FOUND` | The `baseline_run_id` was not found. Run without `baseline_run_id` first to establish a baseline. | +| `VALIDATE_ERROR` | Unexpected validation error | +| `FILE_NOT_FOUND` | `file_path` does not exist | +| `PATH_NOT_ALLOWED` | `file_path` is outside the server's `--allowed-paths` | + --- ### `provar_testsuite_validate` @@ -815,15 +829,23 @@ Validates a Provar test suite — checks for empty suites, duplicate names (with **Input** -| Parameter | Type | Required | Description | -| ------------------- | -------------- | -------- | ------------------------------------------------------------------------------------------------------------ | -| `suite_name` | string | yes | Name of the test suite | -| `test_cases` | array | no | Test cases directly in this suite. Each item: `{ name, xml_content \| xml }` | -| `child_suites` | array | no | Child suites (up to 2 levels of nesting). Each item: `{ name, test_cases?, test_suites?, test_case_count? }` | -| `test_case_count` | integer | no | Override total count for the size check (useful when not sending full XML) | -| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be "valid" (default: 80) | +| Parameter | Type | Required | Description | +| ------------------- | --------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | +| `suite_name` | string | yes | Name of the test suite | +| `test_cases` | array | no | Test cases directly in this suite. Each item: `{ name, xml_content \| xml }` | +| `child_suites` | array | no | Child suites (up to 2 levels of nesting). Each item: `{ name, test_cases?, test_suites?, test_case_count? }` | +| `test_case_count` | integer | no | Override total count for the size check (useful when not sending full XML) | +| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be "valid" (default: 80) | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: name, scores, and stop signal only. `"standard"`/`"full"`: full violations and per-test-case results (default). | +| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved violations since that run. Returns `BASELINE_NOT_FOUND` if the run ID is unknown. | -**Output** — `{ name, level: "suite", quality_score, violations[], test_cases[], test_suites[], summary }` +**Output** — `{ run_id, completeness_score, recommended_next_action, name, level: "suite", quality_score, violations[], test_cases[], test_suites[], summary }` + +| Field | Type | Description | +| ------------------------- | ------------ | ------------------------------------------------------------------------------------------------------------------- | +| `run_id` | string | Stable identifier for this run. Pass as `baseline_run_id` in the next call to receive only new/resolved violations. | +| `completeness_score` | number (0–1) | Ratio of valid test cases to total (`0.0`–`1.0`). | +| `recommended_next_action` | string | `"stop"`, `"continue"`, or `"escalate"` — see [Quality scores explained](#quality-scores-explained). | **Violation rule IDs:** SUITE-EMPTY-001, SUITE-DUP-001, SUITE-DUP-002, SUITE-SIZE-001, SUITE-NAMING-001, SUITE-NAMING-002 @@ -835,14 +857,15 @@ Validates a Provar test plan — checks for empty plans, duplicate suite names, **Input** -| Parameter | Type | Required | Description | -| ------------------- | -------------- | -------- | --------------------------------------- | -| `plan_name` | string | yes | Name of the test plan | -| `test_suites` | array | no | Test suites in this plan | -| `test_cases` | array | no | Test cases directly in this plan | -| `test_suite_count` | integer | no | Override suite count for the size check | -| `metadata` | object | no | Plan completeness metadata (see below) | -| `quality_threshold` | number (0–100) | no | Minimum quality score (default: 80) | +| Parameter | Type | Required | Description | +| ------------------- | --------------------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------- | +| `plan_name` | string | yes | Name of the test plan | +| `test_suites` | array | no | Test suites in this plan | +| `test_cases` | array | no | Test cases directly in this plan | +| `test_suite_count` | integer | no | Override suite count for the size check | +| `metadata` | object | no | Plan completeness metadata (see below) | +| `quality_threshold` | number (0–100) | no | Minimum quality score (default: 80) | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: name, scores, and stop signal only. `"standard"`/`"full"`: full violations and hierarchy results (default). | **`metadata` fields** @@ -857,7 +880,12 @@ Validates a Provar test plan — checks for empty plans, duplicate suite names, | `test_data_strategy` | How test data is prepared and cleaned up | | `risks` | Identified risks and mitigations | -**Output** — `{ name, level: "plan", quality_score, violations[], test_suites[], test_cases[], summary }` +**Output** — `{ completeness_score, recommended_next_action, name, level: "plan", quality_score, violations[], test_suites[], test_cases[], summary }` + +| Field | Type | Description | +| ------------------------- | ------------ | ---------------------------------------------------------------------------------------------------- | +| `completeness_score` | number (0–1) | Ratio of valid test cases to total (`0.0`–`1.0`). | +| `recommended_next_action` | string | `"stop"`, `"continue"`, or `"escalate"` — see [Quality scores explained](#quality-scores-explained). | **Violation rule IDs:** PLAN-EMPTY-001, PLAN-DUP-001, PLAN-SIZE-001, PLAN-NAMING-001, PLAN-META-001 through PLAN-META-007 @@ -871,27 +899,32 @@ Validates a Provar project directly from its directory on disk. Reads the plan/s **Input** -| Parameter | Type | Required | Description | -| ---------------------- | -------------- | -------- | -------------------------------------------------------------------------------------------------------------------- | -| `project_path` | string | yes | Absolute path to the Provar project root (directory containing `.testproject`) | -| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be considered valid (default: 80) | -| `save_results` | boolean | no | Write a QH-compatible JSON report to `{project_path}/provardx/validation/` (default: true) | -| `results_dir` | string | no | Override the output directory for the saved report (must be within `allowed-paths`) | -| `include_plan_details` | boolean | no | Include full per-suite and per-test-case data in the response (default: false — keep false to avoid token explosion) | -| `max_uncovered` | integer | no | Maximum uncovered test case paths to return (default: 20; set to `0` for none) | -| `max_violations` | integer | no | When `include_plan_details: true`, caps project violations returned (default: 50) | +| Parameter | Type | Required | Description | +| ---------------------- | --------------------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `project_path` | string | yes | Absolute path to the Provar project root (directory containing `.testproject`) | +| `quality_threshold` | number (0–100) | no | Minimum quality score for a test case to be considered valid (default: 80) | +| `save_results` | boolean | no | Write a QH-compatible JSON report to `{project_path}/provardx/validation/` (default: true) | +| `results_dir` | string | no | Override the output directory for the saved report (must be within `allowed-paths`) | +| `detail` | `summary` \| `standard` \| `full` | no | Response verbosity. `"summary"`: key scores and stop signal only. `"standard"`: slim violation summary (default). `"full"`: full per-suite and per-test-case data. | +| `baseline_run_id` | string | no | `run_id` from a previous call. Returns only new/resolved project violations since that run. Returns `BASELINE_NOT_FOUND` if the run ID is unknown. Requires `save_results: true`. | +| `include_plan_details` | boolean | no | **@deprecated** — use `detail="full"` instead. Include full per-suite and per-test-case data (default: false). | +| `max_uncovered` | integer | no | **@deprecated** — response is automatically scoped by `detail` level. Maximum uncovered test case paths to return (default: 20). | +| `max_violations` | integer | no | **@deprecated** — response is automatically scoped by `detail` level. Caps project violations returned when `include_plan_details: true` (default: 50). | **Output** (slim mode, `include_plan_details: false`) -| Field | Description | -| ------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `quality_score` | Project quality score (0–100) | -| `coverage_percent` | Percentage of test cases covered by at least one plan | -| `violation_summary` | Map of `rule_id → count` for all violations found | -| `plan_scores` | Array of `{ name, quality_score }` per plan | -| `uncovered_test_cases` | Uncovered test case paths (capped at `max_uncovered`) | -| `save_error` | Present only if the results file could not be written | -| `plan_integrity_warnings` | Present when any plan or suite directory is missing a `.planitem` file — test instances in those directories are silently invisible to the Provar runner | +| Field | Description | +| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `run_id` | Stable identifier for this run (only present when `save_results: true`). Pass as `baseline_run_id` in the next call to receive only new/resolved violations. | +| `completeness_score` | Ratio of valid test cases to total (`0.0`–`1.0`). | +| `recommended_next_action` | `"stop"`, `"continue"`, or `"escalate"` — see [Quality scores explained](#quality-scores-explained). | +| `quality_score` | Project quality score (0–100) | +| `coverage_percent` | Percentage of test cases covered by at least one plan | +| `violation_summary` | Map of `rule_id → count` for all violations found | +| `plan_scores` | Array of `{ name, quality_score }` per plan | +| `uncovered_test_cases` | Uncovered test case paths (capped at `max_uncovered`) | +| `save_error` | Present only if the results file could not be written | +| `plan_integrity_warnings` | Present when any plan or suite directory is missing a `.planitem` file — test instances in those directories are silently invisible to the Provar runner | When `include_plan_details: true`, the response additionally includes full `test_plans[]` with nested suite and per-test-case data. @@ -899,7 +932,7 @@ When `include_plan_details: true`, the response additionally includes full `test **Violation rule IDs:** PROJ-EMPTY-001, PROJ-DUP-001, PROJ-DUP-002, PROJ-CALLABLE-001, PROJ-CALLABLE-002, PROJ-CONN-001, PROJ-ENV-001, PROJ-ENV-002, PROJ-SECRET-001 -**Error codes:** `NOT_A_PROJECT`, `AMBIGUOUS_PROJECT`, `PATH_NOT_FOUND`, `PATH_NOT_ALLOWED`, `PATH_TRAVERSAL` +**Error codes:** `NOT_A_PROJECT`, `AMBIGUOUS_PROJECT`, `PATH_NOT_FOUND`, `PATH_NOT_ALLOWED`, `PATH_TRAVERSAL`, `BASELINE_NOT_FOUND` (baseline run not found — run without `baseline_run_id` first to establish a baseline) --- diff --git a/src/mcp/tools/projectValidateFromPath.ts b/src/mcp/tools/projectValidateFromPath.ts index d990f731..481bedab 100644 --- a/src/mcp/tools/projectValidateFromPath.ts +++ b/src/mcp/tools/projectValidateFromPath.ts @@ -6,6 +6,7 @@ */ /* eslint-disable camelcase */ +import path from 'node:path'; import { z } from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import type { ServerConfig } from '../server.js'; @@ -14,6 +15,16 @@ import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; import { validateProjectFromPath, ProjectValidationError } from '../../services/projectValidation.js'; import type { ProjectValidationResult, ValidatedPlan } from '../../services/projectValidation.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; +import { + generateRunId, + saveRun, + hasAnyRun, + loadBaselineViolations, + computeDiff, + type DiffableViolation, +} from '../utils/validationDiff.js'; import { desc } from './descHelper.js'; // ── Response shaping ────────────────────────────────────────────────────────── @@ -104,8 +115,29 @@ function shapeResponse( }; } +// ── Helpers ─────────────────────────────────────────────────────────────────── + +function classifyError(err: Error & { code?: string }): { code: string; isUserError: boolean } { + if (err instanceof PathPolicyError || err instanceof ProjectValidationError) { + return { code: err.code, isUserError: true }; + } + return { code: err.code ?? 'VALIDATE_ERROR', isUserError: false }; +} + // ── Tool registration ───────────────────────────────────────────────────────── +const PROJECT_VALIDATE_SUMMARY_FIELDS = [ + 'requestId', + 'project_path', + 'project_name', + 'quality_score', + 'quality_tier', + 'saved_to', + 'run_id', + 'completeness_score', + 'recommended_next_action', +]; + export function registerProjectValidateFromPath(server: McpServer, config: ServerConfig): void { server.registerTool( 'provar_project_validate', @@ -120,16 +152,17 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve 'the full validation rule set.', 'Returns a compact quality score, violation summary, and per-plan/suite scores.', 'By default returns a slim summary response to avoid token explosion.', - 'Pass include_plan_details:true to get full per-suite and per-test-case data.', + 'Pass include_plan_details:true or detail:full to get full per-suite and per-test-case data.', 'By default saves a QH-compatible JSON report to', '{project_path}/provardx/validation/ (created if absent).', 'Plan integrity: if any plan or suite directory is missing a .planitem file, the response includes a plan_integrity_warnings array.', 'Test instances in those directories are silently ignored by the Provar runner — fix these before running tests.', + 'Every response includes run_id — pass it as baseline_run_id in the next call to receive only new/resolved violations.', 'IMPORTANT: Use this tool for whole-project validation —', 'DO NOT read individual test case files and pass XML content inline.', 'Pass a project_path and let this tool handle all file reading.', ].join(' '), - 'Validate a Provar project from disk; returns quality score and violation summary.' + 'Validate a Provar project from disk; quality score, violation summary, run_id for diff.' ), inputSchema: { project_path: z @@ -177,9 +210,9 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve .default(false) .describe( desc( - 'When true, include full per-suite and per-test-case violation data in the response. ' + - 'Default false to keep response small. Use only when you need to inspect specific test case failures.', - 'bool, optional; default false, include full per-suite violation data' + '@deprecated — use detail="full" instead. When true, include full per-suite and per-test-case violation data in the response. ' + + 'Default false to keep response small.', + 'bool, optional, @deprecated; use detail="full" instead' ) ), max_uncovered: z @@ -190,8 +223,8 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve .default(20) .describe( desc( - 'Maximum number of uncovered test case paths to include in the response (default: 20). Set to 0 for none, or a large number for all.', - 'int ≥0, optional; max uncovered test case paths returned' + '@deprecated — no replacement; response is automatically scoped by detail level. Maximum number of uncovered test case paths to include in the response (default: 20).', + 'int ≥0, optional, @deprecated; auto-scoped by detail' ) ), max_violations: z @@ -202,10 +235,23 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve .default(50) .describe( desc( - 'When include_plan_details:true, caps project_violations returned (default: 50). Ignored in slim mode where violations are grouped by rule_id instead.', - 'int ≥0, optional; max violations returned in detail mode' + '@deprecated — no replacement; response is automatically scoped by detail level. When include_plan_details:true, caps project_violations returned (default: 50).', + 'int ≥0, optional, @deprecated; auto-scoped by detail' ) ), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + 'Response verbosity. "summary": key scores and stop signal only. "standard": slim violation summary (default). "full": full per-suite and per-test-case data (implies include_plan_details:true).' + ), + baseline_run_id: z + .string() + .optional() + .describe( + 'run_id from a previous call. When provided, returns only project-level violations that are new or resolved since that run: { added, resolved, unchanged_count, run_id }. If not found, returns error BASELINE_NOT_FOUND.' + ), }, }, ({ @@ -216,6 +262,8 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve include_plan_details, max_uncovered, max_violations, + detail, + baseline_run_id, }) => { const requestId = makeRequestId(); log('info', 'provar_project_validate', { requestId, project_path, include_plan_details }); @@ -224,6 +272,9 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve assertPathAllowed(project_path, config.allowedPaths); if (results_dir) assertPathAllowed(results_dir, config.allowedPaths); + const storageDir = results_dir ?? path.join(project_path, 'provardx', 'validation'); + const runId = generateRunId(project_path); + const result = validateProjectFromPath({ project_path, quality_threshold, @@ -235,22 +286,84 @@ export function registerProjectValidateFromPath(server: McpServer, config: Serve log('warn', 'provar_project_validate: could not save results', { requestId, error: result.save_error }); } - const shaped = shapeResponse(result, include_plan_details, max_uncovered, max_violations); - const response = { requestId, ...shaped }; + const currentViolations = result.project_violations as unknown as DiffableViolation[]; + + // Load baseline BEFORE saving to prevent eviction of the requested baseline + const baseline = + save_results !== false && baseline_run_id !== undefined && baseline_run_id !== '' + ? loadBaselineViolations(storageDir, baseline_run_id) + : null; + + const hasBaseline = save_results !== false ? hasAnyRun(storageDir) : false; + + if (save_results !== false) { + try { + saveRun(storageDir, runId, currentViolations); + } catch (saveErr) { + log('warn', 'provar_project_validate: could not save run for diff', { + requestId, + error: (saveErr as Error).message, + }); + } + } + + // Diff mode + if (baseline_run_id !== undefined && baseline_run_id !== '') { + if (!baseline) { + const errResult = makeError( + 'BASELINE_NOT_FOUND', + 'Baseline run not found. Run validation without baseline_run_id first to establish a baseline.', + requestId, + false, + { suggestion: 'Run provar_project_validate without baseline_run_id first to establish a baseline.' } + ); + return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(errResult) }] }; + } + const diff = computeDiff(baseline, currentViolations); + const completeness_score = calcCompletenessScore( + result.summary.test_cases_valid, + result.summary.total_test_cases + ); + const recommended_next_action = calcNextAction(completeness_score, true, currentViolations.length); + const diffResponse = { + requestId, + ...(save_results !== false ? { run_id: runId } : {}), + ...diff, + completeness_score, + recommended_next_action, + }; + return { + content: [{ type: 'text' as const, text: JSON.stringify(diffResponse) }], + structuredContent: diffResponse, + }; + } + + const completeness_score = calcCompletenessScore( + result.summary.test_cases_valid, + result.summary.total_test_cases + ); + const recommended_next_action = calcNextAction(completeness_score, hasBaseline, currentViolations.length); + + const usePlanDetails = include_plan_details || detail === 'full'; + const shaped = shapeResponse(result, usePlanDetails, max_uncovered, max_violations); + const response = { + requestId, + ...(save_results !== false ? { run_id: runId } : {}), + completeness_score, + recommended_next_action, + ...shaped, + }; + + const detailLevel = (detail ?? 'standard') as DetailLevel; + const finalResponse = applyDetailLevel(response, detailLevel, PROJECT_VALIDATE_SUMMARY_FIELDS); return { - content: [{ type: 'text' as const, text: JSON.stringify(response) }], - structuredContent: response, + content: [{ type: 'text' as const, text: JSON.stringify(finalResponse) }], + structuredContent: finalResponse, }; } catch (err: unknown) { const error = err as Error & { code?: string }; - const code = - error instanceof PathPolicyError - ? error.code - : error instanceof ProjectValidationError - ? error.code - : error.code ?? 'VALIDATE_ERROR'; - const isUserError = error instanceof PathPolicyError || error instanceof ProjectValidationError; + const { code, isUserError } = classifyError(error); const errResult = makeError(code, error.message, requestId, !isUserError); log('error', 'provar_project_validate failed', { requestId, error: error.message }); return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(errResult) }] }; diff --git a/src/mcp/tools/testCaseValidate.ts b/src/mcp/tools/testCaseValidate.ts index f1e014e7..0b163922 100644 --- a/src/mcp/tools/testCaseValidate.ts +++ b/src/mcp/tools/testCaseValidate.ts @@ -7,7 +7,9 @@ /* eslint-disable camelcase */ import fs from 'node:fs'; +import os from 'node:os'; import path from 'node:path'; +import { createHash } from 'node:crypto'; import { z } from 'zod'; import { XMLParser } from 'fast-xml-parser'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; @@ -23,6 +25,16 @@ import { QualityHubRateLimitError, REQUEST_ACCESS_URL, } from '../../services/qualityHub/client.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; +import { + generateRunId, + saveRun, + hasAnyRun, + loadBaselineViolations, + computeDiff, + type DiffableViolation, +} from '../utils/validationDiff.js'; import { runBestPractices } from './bestPracticesEngine.js'; import { desc } from './descHelper.js'; @@ -42,14 +54,76 @@ const UNREACHABLE_WARNING = 'Quality Hub API unreachable. Running local validation only (structural rules, no quality scoring).\n' + 'For CI/CD: set PROVAR_QUALITY_HUB_URL and PROVAR_API_KEY environment variables.'; +const TC_VALIDATE_SUMMARY_FIELDS = [ + 'requestId', + 'is_valid', + 'validity_score', + 'quality_score', + 'validation_source', + 'run_id', + 'completeness_score', + 'recommended_next_action', +]; + +/** Storage dir for testcase diff runs (namespaced to avoid cross-tool baseline collisions). */ +function tcStorageDir(): string { + return path.join(os.homedir(), '.provardx', 'validation', 'testcase'); +} + +/** Resolve validation result from QualityHub API or fall back to local. */ +async function resolveBaseResult( + source: string, + apiKey: string | null, + requestId: string +): Promise { + if (!apiKey) { + return { ...validateTestCase(source), validation_source: 'local', validation_warning: ONBOARDING_MESSAGE }; + } + const baseUrl = getQualityHubBaseUrl(); + try { + const apiResult = await qualityHubClient.validateTestCaseViaApi(source, apiKey, baseUrl); + const localMeta = validateTestCase(source); + log('info', 'provar_testcase_validate: quality_hub', { requestId }); + return { + ...apiResult, + issues: apiResult.issues as unknown as ValidationIssue[], + step_count: localMeta.step_count, + error_count: apiResult.issues.filter((i) => i.severity === 'ERROR').length, + warning_count: apiResult.issues.filter((i) => i.severity === 'WARNING').length, + test_case_id: localMeta.test_case_id, + test_case_name: localMeta.test_case_name, + validation_source: 'quality_hub', + }; + } catch (apiErr: unknown) { + let warning: string; + if (apiErr instanceof QualityHubAuthError) { + warning = AUTH_WARNING; + log('warn', 'provar_testcase_validate: auth error, falling back', { requestId }); + } else if (apiErr instanceof QualityHubRateLimitError) { + warning = RATE_LIMIT_WARNING; + log('warn', 'provar_testcase_validate: rate limited, falling back', { requestId }); + } else { + warning = UNREACHABLE_WARNING; + log('warn', 'provar_testcase_validate: api unreachable, falling back', { requestId }); + } + return { ...validateTestCase(source), validation_source: 'local_fallback', validation_warning: warning }; + } +} + +/** Derive a stable context key for run ID generation. */ +function tcRunContext(filePath: string | undefined, xmlContent: string): string { + if (filePath) return filePath; + return createHash('sha1').update(xmlContent.slice(0, 200)).digest('hex').slice(0, 16); +} + export function registerTestCaseValidate(server: McpServer, config: ServerConfig): void { server.registerTool( 'provar_testcase_validate', { title: 'Validate Test Case', description: desc( - 'Validate a Provar XML test case for structural correctness and quality. Checks XML declaration, root element, required attributes (guid UUID v4, testItemId integer), presence, and applies best-practice rules. When a Provar API key is configured (via sf provar auth login or PROVAR_API_KEY env var), calls the Quality Hub API for full 170-rule scoring. Falls back to local validation if no key is set or the API is unavailable. Returns validity_score (schema compliance), quality_score (best practices, 0–100), and validation_source indicating which ruleset was applied. When structural errors are returned, consult the provar://docs/step-reference MCP resource for correct step attribute schemas.', - 'Validate a Provar XML test case: structure, UUIDs, steps, and quality scoring.' + 'Validate a Provar XML test case for structural correctness and quality. Checks XML declaration, root element, required attributes (guid UUID v4, testItemId integer), presence, and applies best-practice rules. When a Provar API key is configured (via sf provar auth login or PROVAR_API_KEY env var), calls the Quality Hub API for full 170-rule scoring. Falls back to local validation if no key is set or the API is unavailable. Returns validity_score (schema compliance), quality_score (best practices, 0–100), and validation_source indicating which ruleset was applied. Every response includes run_id — pass it as baseline_run_id in the next call to receive only new/resolved issues. When structural errors are returned, consult the provar://docs/step-reference MCP resource for correct step attribute schemas.', + 'Validate a Provar XML test case: structure, UUIDs, steps, quality scoring; run_id for baseline diff.' ), inputSchema: { content: z @@ -61,9 +135,28 @@ export function registerTestCaseValidate(server: McpServer, config: ServerConfig .optional() .describe(desc('XML content to validate — API-compatible alias for content', 'string, inline content')), file_path: z.string().optional().describe(desc('Path to .xml test case file', 'string, path to file')), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + desc( + 'Response verbosity. "summary": is_valid, scores, and stop signal only. "standard"/"full": full issues list (default).', + 'enum summary|standard|full, optional; default standard' + ) + ), + baseline_run_id: z + .string() + .optional() + .describe( + desc( + 'run_id from a previous call. When provided, returns only issues that are new or resolved since that run: { added, resolved, unchanged_count, run_id }. If not found, returns error BASELINE_NOT_FOUND.', + 'string, optional; prev run_id for diff response' + ) + ), }, }, - async ({ content, xml, file_path }) => { + async ({ content, xml, file_path, detail, baseline_run_id }) => { const requestId = makeRequestId(); log('info', 'provar_testcase_validate', { requestId, has_content: !!(content ?? xml), file_path }); @@ -87,63 +180,78 @@ export function registerTestCaseValidate(server: McpServer, config: ServerConfig } const apiKey = resolveApiKey(); + const baseResult = await resolveBaseResult(source, apiKey, requestId); + + const storageDir = tcStorageDir(); + const runId = generateRunId(tcRunContext(file_path, source)); + const bpViolations = (baseResult.best_practices_violations ?? []) as unknown as DiffableViolation[]; + const currentViolations: DiffableViolation[] = [ + ...(baseResult.issues as unknown as DiffableViolation[]), + ...bpViolations, + ]; + + // Load baseline BEFORE saving to prevent eviction of the requested baseline + const baseline = + baseline_run_id !== undefined && baseline_run_id !== '' + ? loadBaselineViolations(storageDir, baseline_run_id) + : null; + + const hasBaseline = hasAnyRun(storageDir); + + try { + saveRun(storageDir, runId, currentViolations); + } catch (saveErr) { + log('warn', 'provar_testcase_validate: could not save run for diff', { + requestId, + error: (saveErr as Error).message, + }); + } - if (apiKey) { - const baseUrl = getQualityHubBaseUrl(); - try { - const apiResult = await qualityHubClient.validateTestCaseViaApi(source, apiKey, baseUrl); - const localMeta = validateTestCase(source); - const result = { - requestId, - ...apiResult, - step_count: localMeta.step_count, - error_count: apiResult.issues.filter((i) => i.severity === 'ERROR').length, - warning_count: apiResult.issues.filter((i) => i.severity === 'WARNING').length, - test_case_id: localMeta.test_case_id, - test_case_name: localMeta.test_case_name, - validation_source: 'quality_hub' as const, - }; - log('info', 'provar_testcase_validate: quality_hub', { requestId }); - return { - content: [{ type: 'text' as const, text: JSON.stringify(result) }], - structuredContent: result, - }; - } catch (apiErr: unknown) { - // API failed — determine the warning and fall through to local validation - let warning: string; - if (apiErr instanceof QualityHubAuthError) { - warning = AUTH_WARNING; - log('warn', 'provar_testcase_validate: auth error, falling back', { requestId }); - } else if (apiErr instanceof QualityHubRateLimitError) { - warning = RATE_LIMIT_WARNING; - log('warn', 'provar_testcase_validate: rate limited, falling back', { requestId }); - } else { - warning = UNREACHABLE_WARNING; - log('warn', 'provar_testcase_validate: api unreachable, falling back', { requestId }); - } - const localResult = { + // Diff mode + if (baseline_run_id !== undefined && baseline_run_id !== '') { + if (!baseline) { + const errResult = makeError( + 'BASELINE_NOT_FOUND', + 'Baseline run not found. Run validation without baseline_run_id first to establish a baseline.', requestId, - ...validateTestCase(source), - validation_source: 'local_fallback' as const, - validation_warning: warning, - }; - return { - content: [{ type: 'text' as const, text: JSON.stringify(localResult) }], - structuredContent: localResult, - }; + false, + { suggestion: 'Run provar_testcase_validate without baseline_run_id first to establish a baseline.' } + ); + return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(errResult) }] }; } + const diff = computeDiff(baseline, currentViolations); + const completeness_score = calcCompletenessScore(baseResult.is_valid ? 1 : 0, 1); + const recommended_next_action = calcNextAction(completeness_score, true, currentViolations.length); + const diffResponse = { + requestId, + run_id: runId, + ...diff, + completeness_score, + recommended_next_action, + }; + return { + content: [{ type: 'text' as const, text: JSON.stringify(diffResponse) }], + structuredContent: diffResponse, + }; } - // No API key configured — run local validation with onboarding message + const completeness_score = calcCompletenessScore(baseResult.is_valid ? 1 : 0, 1); + const recommended_next_action = calcNextAction(completeness_score, hasBaseline, currentViolations.length); + const result = { requestId, - ...validateTestCase(source), - validation_source: 'local' as const, - validation_warning: ONBOARDING_MESSAGE, + run_id: runId, + completeness_score, + recommended_next_action, + ...baseResult, }; + + const detailLevel = (detail ?? 'standard') as DetailLevel; + const finalResult = applyDetailLevel(result, detailLevel, TC_VALIDATE_SUMMARY_FIELDS); + return { - content: [{ type: 'text' as const, text: JSON.stringify(result) }], - structuredContent: result, + content: [{ type: 'text' as const, text: JSON.stringify(finalResult) }], + structuredContent: finalResult, }; } catch (err: unknown) { const error = err as Error & { code?: string }; diff --git a/src/mcp/tools/testPlanValidate.ts b/src/mcp/tools/testPlanValidate.ts index c2fe3426..f9772a55 100644 --- a/src/mcp/tools/testPlanValidate.ts +++ b/src/mcp/tools/testPlanValidate.ts @@ -10,6 +10,8 @@ import { z } from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; import { validatePlan, buildHierarchySummary, type TestPlanInput } from './hierarchyValidate.js'; import { desc } from './descHelper.js'; @@ -71,14 +73,23 @@ const metadataSchema = z 'Plan completeness metadata — these fields are configured in the Provar Quality Hub app, not in local project files' ); +const PLAN_VALIDATE_SUMMARY_FIELDS = [ + 'requestId', + 'name', + 'quality_score', + 'summary', + 'completeness_score', + 'recommended_next_action', +]; + export function registerTestPlanValidate(server: McpServer): void { server.registerTool( 'provar_testplan_validate', { title: 'Validate Test Plan', description: desc( - 'Validate a Provar test plan: checks for empty plans, duplicate suite names, oversized plans (>20 suites), plan completeness (objectives, scope, methodology, environments, acceptance criteria, test data strategy, risk assessment), and naming consistency. Recursively validates child suites and test cases. Returns quality score, plan-level violations, and full hierarchy results.', - 'Validate a Provar test plan: naming, size, completeness, and per-suite quality.' + 'Validate a Provar test plan: checks for empty plans, duplicate suite names, oversized plans (>20 suites), plan completeness (objectives, scope, methodology, environments, acceptance criteria, test data strategy, risk assessment), and naming consistency. Recursively validates child suites and test cases. Returns quality score, plan-level violations, and full hierarchy results. Use completeness_score and recommended_next_action to determine whether to continue iterating.', + 'Validate a Provar test plan: naming, size, completeness, per-suite quality; stop signal via completeness_score.' ), inputSchema: { plan_name: z.string().describe(desc('Name of the test plan', 'string')), @@ -105,9 +116,19 @@ export function registerTestPlanValidate(server: McpServer): void { .describe( desc('Minimum quality score for a test case to be considered valid (default: 80)', 'number 0–100, optional') ), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + desc( + 'Response verbosity. "summary": name, scores, and stop signal only. "standard"/"full": full violations and hierarchy results (default).', + 'enum summary|standard|full, optional; default standard' + ) + ), }, }, - ({ plan_name, test_suites, test_cases, test_suite_count, metadata, quality_threshold }) => { + ({ plan_name, test_suites, test_cases, test_suite_count, metadata, quality_threshold, detail }) => { const requestId = makeRequestId(); log('info', 'provar_testplan_validate', { requestId, plan_name }); @@ -123,11 +144,24 @@ export function registerTestPlanValidate(server: McpServer): void { const result = validatePlan(input, threshold); const summary = buildHierarchySummary(result); - const response = { requestId, ...result, summary }; + + const completeness_score = calcCompletenessScore(summary.test_cases_valid, summary.total_test_cases); + const recommended_next_action = calcNextAction(completeness_score, false); + + const response = { + requestId, + completeness_score, + recommended_next_action, + ...result, + summary, + }; + + const detailLevel = (detail ?? 'standard') as DetailLevel; + const finalResponse = applyDetailLevel(response, detailLevel, PLAN_VALIDATE_SUMMARY_FIELDS); return { - content: [{ type: 'text' as const, text: JSON.stringify(response) }], - structuredContent: response, + content: [{ type: 'text' as const, text: JSON.stringify(finalResponse) }], + structuredContent: finalResponse, }; } catch (err: unknown) { const error = err as Error; diff --git a/src/mcp/tools/testSuiteValidate.ts b/src/mcp/tools/testSuiteValidate.ts index 1b0886e8..b2bc2fe6 100644 --- a/src/mcp/tools/testSuiteValidate.ts +++ b/src/mcp/tools/testSuiteValidate.ts @@ -6,13 +6,36 @@ */ /* eslint-disable camelcase */ +import os from 'node:os'; +import path from 'node:path'; import { z } from 'zod'; import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { makeError, makeRequestId } from '../schemas/common.js'; import { log } from '../logging/logger.js'; -import { validateSuite, buildHierarchySummary, type TestSuiteInput } from './hierarchyValidate.js'; +import { applyDetailLevel, type DetailLevel } from '../utils/detailLevel.js'; +import { calcCompletenessScore, calcNextAction } from '../utils/validationScore.js'; +import { + generateRunId, + saveRun, + hasAnyRun, + loadBaselineViolations, + computeDiff, + type DiffableViolation, +} from '../utils/validationDiff.js'; +import { validateSuite, buildHierarchySummary, type TestSuiteInput, type SuiteResult } from './hierarchyValidate.js'; import { desc } from './descHelper.js'; +function collectAllViolations(result: SuiteResult): DiffableViolation[] { + const all: DiffableViolation[] = [...(result.violations as unknown as DiffableViolation[])]; + for (const tc of result.test_cases) { + all.push(...(tc.issues as unknown as DiffableViolation[])); + } + for (const child of result.test_suites) { + all.push(...collectAllViolations(child)); + } + return all; +} + // ── Zod schemas ─────────────────────────────────────────────────────────────── const testCaseSchema = z @@ -43,14 +66,28 @@ const childSuiteSchema = z.object({ test_case_count: z.number().int().min(0).optional().describe('Explicit test case count for size check'), }); +const SUITE_VALIDATE_SUMMARY_FIELDS = [ + 'requestId', + 'name', + 'quality_score', + 'summary', + 'run_id', + 'completeness_score', + 'recommended_next_action', +]; + +function suiteStorageDir(): string { + return path.join(os.homedir(), '.provardx', 'validation', 'testsuite'); +} + export function registerTestSuiteValidate(server: McpServer): void { server.registerTool( 'provar_testsuite_validate', { title: 'Validate Test Suite', description: desc( - 'Validate a Provar test suite: checks for empty suites, duplicate names, oversized suites (>75 tests), and naming convention consistency. Recursively validates child suites and individual test case XML. Returns quality score, suite-level violations, and per-test-case results.', - 'Validate a Provar test suite: naming, size, duplicates, and per-test-case quality.' + 'Validate a Provar test suite: checks for empty suites, duplicate names, oversized suites (>75 tests), and naming convention consistency. Recursively validates child suites and individual test case XML. Returns quality score, suite-level violations, and per-test-case results. Every response includes run_id — pass it as baseline_run_id in the next call to receive only new/resolved violations.', + 'Validate a Provar test suite: naming, size, duplicates, per-test-case quality; run_id for diff.' ), inputSchema: { suite_name: z.string().describe(desc('Name of the test suite', 'string')), @@ -78,9 +115,28 @@ export function registerTestSuiteValidate(server: McpServer): void { .describe( desc('Minimum quality score for a test case to be considered valid (default: 80)', 'number 0–100, optional') ), + detail: z + .enum(['summary', 'standard', 'full']) + .optional() + .default('standard') + .describe( + desc( + 'Response verbosity. "summary": name, scores, and stop signal only. "standard"/"full": full violations and per-test-case results (default).', + 'enum summary|standard|full, optional; default standard' + ) + ), + baseline_run_id: z + .string() + .optional() + .describe( + desc( + 'run_id from a previous call. When provided, returns only violations that are new or resolved since that run: { added, resolved, unchanged_count, run_id }. If not found, returns error BASELINE_NOT_FOUND.', + 'string, optional; prev run_id for diff response' + ) + ), }, }, - ({ suite_name, test_cases, child_suites, test_case_count, quality_threshold }) => { + ({ suite_name, test_cases, child_suites, test_case_count, quality_threshold, detail, baseline_run_id }) => { const requestId = makeRequestId(); log('info', 'provar_testsuite_validate', { requestId, suite_name }); @@ -95,11 +151,74 @@ export function registerTestSuiteValidate(server: McpServer): void { const result = validateSuite(input, threshold); const summary = buildHierarchySummary(result); - const response = { requestId, ...result, summary }; + + const storageDir = suiteStorageDir(); + const runId = generateRunId(suite_name); + const currentViolations = collectAllViolations(result); + + // Load baseline BEFORE saving to prevent eviction of the requested baseline + const baseline = + baseline_run_id !== undefined && baseline_run_id !== '' + ? loadBaselineViolations(storageDir, baseline_run_id) + : null; + + const hasBaseline = hasAnyRun(storageDir); + + try { + saveRun(storageDir, runId, currentViolations); + } catch (saveErr) { + log('warn', 'provar_testsuite_validate: could not save run for diff', { + requestId, + error: (saveErr as Error).message, + }); + } + + // Diff mode + if (baseline_run_id !== undefined && baseline_run_id !== '') { + if (!baseline) { + const errResult = makeError( + 'BASELINE_NOT_FOUND', + 'Baseline run not found. Run validation without baseline_run_id first to establish a baseline.', + requestId, + false, + { suggestion: 'Run provar_testsuite_validate without baseline_run_id first to establish a baseline.' } + ); + return { isError: true, content: [{ type: 'text' as const, text: JSON.stringify(errResult) }] }; + } + const diff = computeDiff(baseline, currentViolations); + const completeness_score = calcCompletenessScore(summary.test_cases_valid, summary.total_test_cases); + const recommended_next_action = calcNextAction(completeness_score, true, currentViolations.length); + const diffResponse = { + requestId, + run_id: runId, + ...diff, + completeness_score, + recommended_next_action, + }; + return { + content: [{ type: 'text' as const, text: JSON.stringify(diffResponse) }], + structuredContent: diffResponse, + }; + } + + const completeness_score = calcCompletenessScore(summary.test_cases_valid, summary.total_test_cases); + const recommended_next_action = calcNextAction(completeness_score, hasBaseline, currentViolations.length); + + const response = { + requestId, + run_id: runId, + completeness_score, + recommended_next_action, + ...result, + summary, + }; + + const detailLevel = (detail ?? 'standard') as DetailLevel; + const finalResponse = applyDetailLevel(response, detailLevel, SUITE_VALIDATE_SUMMARY_FIELDS); return { - content: [{ type: 'text' as const, text: JSON.stringify(response) }], - structuredContent: response, + content: [{ type: 'text' as const, text: JSON.stringify(finalResponse) }], + structuredContent: finalResponse, }; } catch (err: unknown) { const error = err as Error; diff --git a/src/mcp/utils/detailLevel.ts b/src/mcp/utils/detailLevel.ts new file mode 100644 index 00000000..752760f1 --- /dev/null +++ b/src/mcp/utils/detailLevel.ts @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +export type DetailLevel = 'summary' | 'standard' | 'full'; + +/** + * Shape a structured response object according to the requested detail level. + * + * - `summary` — retain only the keys listed in summaryFields + * - `standard` — return data unchanged (the existing default response shape) + * - `full` — return data unchanged (callers expand gated fields before calling) + */ +export function applyDetailLevel( + data: Record, + level: DetailLevel, + summaryFields: string[] +): Record { + if (level === 'summary') { + return Object.fromEntries(Object.entries(data).filter(([k]) => summaryFields.includes(k))); + } + return data; +} diff --git a/src/mcp/utils/validationDiff.ts b/src/mcp/utils/validationDiff.ts new file mode 100644 index 00000000..a5c6a1ba --- /dev/null +++ b/src/mcp/utils/validationDiff.ts @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +/* eslint-disable camelcase */ +import fs from 'node:fs'; +import path from 'node:path'; +import { createHash } from 'node:crypto'; + +const MAX_RUNS = 20; +const INDEX_FILE = '.runs.json'; + +// ── Public types ────────────────────────────────────────────────────────────── + +export type DiffableViolation = Record; + +export interface DiffResult { + added: DiffableViolation[]; + resolved: DiffableViolation[]; + unchanged_count: number; + run_id: string; +} + +interface RunRecord { + run_id: string; + timestamp: number; + filename: string; +} + +interface RunsIndex { + runs: RunRecord[]; +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +/** Stable 8-char hash of a string for use in run IDs. */ +function shortHash(input: string): string { + return createHash('sha1').update(input).digest('hex').slice(0, 8); +} + +/** Build a unique key for a violation so additions/resolutions can be detected. */ +function violationKey(v: DiffableViolation): string { + const rule_id = String(v['rule_id'] ?? ''); + const applies_to = Array.isArray(v['applies_to']) + ? (v['applies_to'] as string[]).join(',') + : String(v['applies_to'] ?? ''); + const message = String(v['message'] ?? ''); + return `${rule_id}||${applies_to}||${message}`; +} + +function loadIndex(storageDir: string): RunsIndex { + const indexPath = path.join(storageDir, INDEX_FILE); + try { + return JSON.parse(fs.readFileSync(indexPath, 'utf-8')) as RunsIndex; + } catch { + return { runs: [] }; + } +} + +function saveIndex(storageDir: string, index: RunsIndex): void { + const indexPath = path.join(storageDir, INDEX_FILE); + fs.writeFileSync(indexPath, JSON.stringify(index, null, 2), 'utf-8'); +} + +// ── Public API ──────────────────────────────────────────────────────────────── + +/** Generate a run ID from a context string (e.g. project path or suite name). */ +export function generateRunId(context: string): string { + const rand = Math.random().toString(36).slice(2, 6); + return `${Date.now()}-${shortHash(context)}-${rand}`; +} + +/** + * Check whether any prior runs exist in the given storage directory. + * Used by calcNextAction to determine the first-run heuristic. + */ +export function hasAnyRun(storageDir: string): boolean { + const index = loadIndex(storageDir); + return index.runs.length > 0; +} + +/** + * Save the current violations as a new run in the storage directory. + * Caps the index at MAX_RUNS by evicting the oldest entry when full. + * Returns the generated run_id. + */ +export function saveRun(storageDir: string, runId: string, violations: DiffableViolation[]): string { + fs.mkdirSync(storageDir, { recursive: true }); + + const filename = `${runId}.json`; + fs.writeFileSync(path.join(storageDir, filename), JSON.stringify(violations), 'utf-8'); + + const index = loadIndex(storageDir); + index.runs.push({ run_id: runId, timestamp: Date.now(), filename }); + + // Evict oldest entries when over the cap + while (index.runs.length > MAX_RUNS) { + const evicted = index.runs.shift(); + if (evicted) { + try { + fs.unlinkSync(path.join(storageDir, evicted.filename)); + } catch { + /* best-effort eviction */ + } + } + } + + saveIndex(storageDir, index); + return runId; +} + +/** + * Load the violations array for a given baseline run ID. + * Returns null if the run is not found in the index (BASELINE_NOT_FOUND). + * The filename is looked up from the index only — the run_id itself is never + * used to construct a file path, preventing path traversal. + */ +export function loadBaselineViolations(storageDir: string, baselineRunId: string): DiffableViolation[] | null { + const index = loadIndex(storageDir); + const record = index.runs.find((r) => r.run_id === baselineRunId); + if (!record) return null; + + // Use the filename from the index, not the run_id + try { + const content = fs.readFileSync(path.join(storageDir, record.filename), 'utf-8'); + return JSON.parse(content) as DiffableViolation[]; + } catch { + return null; + } +} + +/** + * Compute the diff between a baseline and current violations array. + * Uses (rule_id + applies_to + full message) as the unique key. + * Duplicate violations (same key, multiple occurrences) are treated as + * distinct entries — each occurrence is counted separately (multiset semantics). + */ +export function computeDiff(baseline: DiffableViolation[], current: DiffableViolation[]): Omit { + // Build multiset counts keyed by violation identity + const baselineCounts = new Map(); + for (const v of baseline) { + const key = violationKey(v); + const entry = baselineCounts.get(key); + if (entry) { + entry.count++; + } else { + baselineCounts.set(key, { count: 1, sample: v }); + } + } + + const currentCounts = new Map(); + for (const v of current) { + const key = violationKey(v); + const entry = currentCounts.get(key); + if (entry) { + entry.count++; + } else { + currentCounts.set(key, { count: 1, sample: v }); + } + } + + const added: DiffableViolation[] = []; + const resolved: DiffableViolation[] = []; + let unchanged_count = 0; + + // Tally additions: occurrences in current that exceed baseline count + for (const [key, { count: curr, sample }] of currentCounts) { + const base = baselineCounts.get(key)?.count ?? 0; + unchanged_count += Math.min(base, curr); + const addedCount = curr - base; + for (let i = 0; i < addedCount; i++) added.push(sample); + } + + // Tally resolutions: occurrences in baseline that exceed current count + for (const [key, { count: base, sample }] of baselineCounts) { + const curr = currentCounts.get(key)?.count ?? 0; + const resolvedCount = base - Math.min(base, curr); + for (let i = 0; i < resolvedCount; i++) resolved.push(sample); + } + + return { added, resolved, unchanged_count }; +} diff --git a/src/mcp/utils/validationScore.ts b/src/mcp/utils/validationScore.ts new file mode 100644 index 00000000..7c239b24 --- /dev/null +++ b/src/mcp/utils/validationScore.ts @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +export type NextAction = 'stop' | 'fix_and_revalidate' | 'inspect_failures'; + +/** Fraction of passing tests expressed as 0–100 integer. Returns 0 when total is 0. */ +export function calcCompletenessScore(passing: number, total: number): number { + if (total === 0) return 0; + return Math.round((passing / total) * 100); +} + +/** + * Recommend what the agent should do next based on the completeness score, + * remaining violation count, and whether any prior runs exist on disk. + * + * - `stop` → score is 100 AND no violations remain + * - `inspect_failures` → first run (no baseline on disk) — review what's failing before trying to fix + * - `fix_and_revalidate`→ subsequent run — agent knows the failure set, should fix and re-run + * + * The secondary `remainingViolationCount` check prevents `stop` from firing when all + * tests pass but quality or best-practice violations are still present. + */ +export function calcNextAction(score: number, hasBaseline: boolean, remainingViolationCount = 0): NextAction { + if (score === 100 && remainingViolationCount === 0) return 'stop'; + if (!hasBaseline) return 'inspect_failures'; + return 'fix_and_revalidate'; +} diff --git a/test/unit/mcp/detailLevel.test.ts b/test/unit/mcp/detailLevel.test.ts new file mode 100644 index 00000000..46c48e9c --- /dev/null +++ b/test/unit/mcp/detailLevel.test.ts @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2024 Provar Limited. + * All rights reserved. + * Licensed under the BSD 3-Clause license. + * For full license text, see LICENSE.md file in the repo root or https://opensource.org/licenses/BSD-3-Clause + */ + +/* eslint-disable camelcase */ +import { strict as assert } from 'node:assert'; +import { describe, it } from 'mocha'; +import { applyDetailLevel } from '../../../src/mcp/utils/detailLevel.js'; + +const SAMPLE = { + requestId: 'req-1', + name: 'MySuite', + quality_score: 90, + issues: [{ rule_id: 'RULE-001', message: 'Missing doc' }], + run_id: 'run-123', + completeness_score: 100, + recommended_next_action: 'stop', +}; + +const SUMMARY_FIELDS = [ + 'requestId', + 'name', + 'quality_score', + 'run_id', + 'completeness_score', + 'recommended_next_action', +]; + +describe('applyDetailLevel', () => { + it('summary — retains only summaryFields keys', () => { + const result = applyDetailLevel(SAMPLE, 'summary', SUMMARY_FIELDS); + assert.deepEqual(Object.keys(result).sort(), SUMMARY_FIELDS.slice().sort()); + assert.ok(!('issues' in result), 'issues should be excluded from summary'); + }); + + it('summary — preserves values for included keys', () => { + const result = applyDetailLevel(SAMPLE, 'summary', SUMMARY_FIELDS); + assert.equal(result['requestId'], 'req-1'); + assert.equal(result['quality_score'], 90); + assert.equal(result['recommended_next_action'], 'stop'); + }); + + it('standard — returns data unchanged', () => { + const result = applyDetailLevel(SAMPLE, 'standard', SUMMARY_FIELDS); + assert.deepEqual(result, SAMPLE); + }); + + it('full — returns data unchanged', () => { + const result = applyDetailLevel(SAMPLE, 'full', SUMMARY_FIELDS); + assert.deepEqual(result, SAMPLE); + }); + + it('summary with empty summaryFields returns empty object', () => { + const result = applyDetailLevel(SAMPLE, 'summary', []); + assert.deepEqual(result, {}); + }); + + it('summary with a field absent from data is silently skipped', () => { + const result = applyDetailLevel({ a: 1 }, 'summary', ['a', 'missing_key']); + assert.deepEqual(result, { a: 1 }); + }); + + it('standard returns the same object reference as input', () => { + const data: Record = { x: 1 }; + const result = applyDetailLevel(data, 'standard', []); + assert.strictEqual(result, data); + }); +}); diff --git a/test/unit/mcp/projectValidateFromPath.test.ts b/test/unit/mcp/projectValidateFromPath.test.ts index 687d1680..d2217c4f 100644 --- a/test/unit/mcp/projectValidateFromPath.test.ts +++ b/test/unit/mcp/projectValidateFromPath.test.ts @@ -445,4 +445,95 @@ describe('provar_project_validate (from path)', () => { ); }); }); + + describe('PDX-470 — detail level', () => { + it('standard response includes quality_score and completeness_score', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { project_path: tmpDir, save_results: false }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok('quality_score' in body, 'standard should include quality_score'); + assert.ok('completeness_score' in body, 'standard should include completeness_score'); + assert.ok('recommended_next_action' in body, 'standard should include recommended_next_action'); + }); + + it('summary response includes only key fields, not violation details', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { + project_path: tmpDir, + save_results: false, + detail: 'summary', + }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok('quality_score' in body, 'summary should include quality_score'); + assert.ok('completeness_score' in body, 'summary should include completeness_score'); + assert.ok('recommended_next_action' in body, 'summary should include recommended_next_action'); + assert.ok(!('project_violations_by_rule' in body), 'summary should NOT include project_violations_by_rule'); + assert.ok(!('plans_summary' in body), 'summary should NOT include plans_summary'); + }); + }); + + describe('PDX-471 — run_id and baseline_run_id diff mode', () => { + it('run_id is present when save_results=true (default)', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { project_path: tmpDir }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok(typeof body['run_id'] === 'string' && body['run_id'].length > 0, 'run_id should be a non-empty string'); + }); + + it('run_id is absent when save_results=false', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { project_path: tmpDir, save_results: false }); + assert.equal(isError(result), false); + const body = parseText(result); + assert.ok(!('run_id' in body), 'run_id should not be present when save_results=false'); + }); + + it('returns BASELINE_NOT_FOUND for an unknown baseline_run_id', () => { + makeProject(tmpDir); + const result = server.call('provar_project_validate', { + project_path: tmpDir, + baseline_run_id: 'nonexistent-run-id-xyz', + }); + assert.equal(isError(result), true); + const body = parseText(result); + assert.equal(body['error_code'], 'BASELINE_NOT_FOUND'); + }); + + it('diff mode returns added/resolved/unchanged_count when baseline exists', () => { + makeProject(tmpDir); + const first = server.call('provar_project_validate', { project_path: tmpDir }); + assert.equal(isError(first), false); + const firstBody = parseText(first); + const runId = firstBody['run_id'] as string; + + const second = server.call('provar_project_validate', { + project_path: tmpDir, + baseline_run_id: runId, + }); + assert.equal(isError(second), false); + const diffBody = parseText(second); + assert.ok('added' in diffBody, 'diff should include added'); + assert.ok('resolved' in diffBody, 'diff should include resolved'); + assert.ok('unchanged_count' in diffBody, 'diff should include unchanged_count'); + assert.ok('run_id' in diffBody, 'diff should include run_id'); + }); + + it('diff response includes completeness_score and recommended_next_action', () => { + makeProject(tmpDir); + const first = server.call('provar_project_validate', { project_path: tmpDir }); + const firstBody = parseText(first); + const runId = firstBody['run_id'] as string; + + const second = server.call('provar_project_validate', { + project_path: tmpDir, + baseline_run_id: runId, + }); + const diffBody = parseText(second); + assert.ok('completeness_score' in diffBody, 'diff should include completeness_score'); + assert.ok('recommended_next_action' in diffBody, 'diff should include recommended_next_action'); + }); + }); }); diff --git a/test/unit/mcp/testCaseValidate.test.ts b/test/unit/mcp/testCaseValidate.test.ts index fa8ed15f..9ff5bd2e 100644 --- a/test/unit/mcp/testCaseValidate.test.ts +++ b/test/unit/mcp/testCaseValidate.test.ts @@ -1000,6 +1000,105 @@ describe('registerTestCaseValidate handler', () => { assert.equal(result['validation_source'], 'local_fallback'); assert.ok(String(result['validation_warning']).toLowerCase().includes('rate limit')); }); + + describe('PDX-470 — detail level', () => { + it('standard response includes is_valid, issues, and run_id', async () => { + const res = (await capServer.capturedHandler!({ + content: VALID_TC, + detail: 'standard', + })) as { content: Array<{ text: string }> }; + const result = JSON.parse(res.content[0].text) as Record; + assert.ok('is_valid' in result, 'standard should include is_valid'); + assert.ok('issues' in result, 'standard should include issues'); + assert.ok('run_id' in result, 'standard should include run_id'); + }); + + it('summary response includes only key fields, not issues', async () => { + const res = (await capServer.capturedHandler!({ + content: VALID_TC, + detail: 'summary', + })) as { content: Array<{ text: string }> }; + const result = JSON.parse(res.content[0].text) as Record; + assert.ok('is_valid' in result, 'summary should include is_valid'); + assert.ok('quality_score' in result, 'summary should include quality_score'); + assert.ok('completeness_score' in result, 'summary should include completeness_score'); + assert.ok('recommended_next_action' in result, 'summary should include recommended_next_action'); + assert.ok(!('issues' in result), 'summary should NOT include issues'); + }); + }); + + describe('PDX-473 — completeness_score and recommended_next_action', () => { + it('completeness_score is 100 for a valid test case', async () => { + const res = (await capServer.capturedHandler!({ content: VALID_TC })) as { + content: Array<{ text: string }>; + }; + const result = JSON.parse(res.content[0].text) as Record; + assert.equal(result['completeness_score'], 100); + }); + + it('recommended_next_action is not "stop" when quality violations remain (Bug 9)', async () => { + // VALID_TC is structurally valid (is_valid=true, score=100) but has BP violations. + // "stop" must not fire until ALL violations are resolved. + const res = (await capServer.capturedHandler!({ content: VALID_TC })) as { + content: Array<{ text: string }>; + }; + const result = JSON.parse(res.content[0].text) as Record; + assert.ok( + ['inspect_failures', 'fix_and_revalidate'].includes(result['recommended_next_action'] as string), + `Expected inspect_failures or fix_and_revalidate when BP violations remain, got: ${String( + result['recommended_next_action'] + )}` + ); + }); + + it('recommended_next_action is inspect_failures for an invalid test case (first run)', async () => { + const badXml = ''; + const res = (await capServer.capturedHandler!({ content: badXml })) as { + content: Array<{ text: string }>; + }; + const result = JSON.parse(res.content[0].text) as Record; + assert.equal(result['completeness_score'], 0); + assert.equal(result['recommended_next_action'], 'inspect_failures'); + }); + }); + + describe('PDX-471 — baseline_run_id diff mode', () => { + it('run_id is present in every response', async () => { + const res = (await capServer.capturedHandler!({ content: VALID_TC })) as { + content: Array<{ text: string }>; + }; + const result = JSON.parse(res.content[0].text) as Record; + assert.ok(typeof result['run_id'] === 'string' && result['run_id'].length > 0); + }); + + it('returns BASELINE_NOT_FOUND for an unknown baseline_run_id', async () => { + const res = (await capServer.capturedHandler!({ + content: VALID_TC, + baseline_run_id: 'nonexistent-run-id-xyz', + })) as { isError?: boolean; content: Array<{ text: string }> }; + assert.equal(res.isError, true); + const body = JSON.parse(res.content[0].text) as Record; + assert.equal(body['error_code'], 'BASELINE_NOT_FOUND'); + }); + + it('diff mode returns added/resolved/unchanged_count when baseline exists', async () => { + const first = (await capServer.capturedHandler!({ content: VALID_TC })) as { + content: Array<{ text: string }>; + }; + const firstBody = JSON.parse(first.content[0].text) as Record; + const runId = firstBody['run_id'] as string; + + const second = (await capServer.capturedHandler!({ + content: VALID_TC, + baseline_run_id: runId, + })) as { content: Array<{ text: string }> }; + assert.ok(!(second as { isError?: boolean }).isError); + const diffBody = JSON.parse(second.content[0].text) as Record; + assert.ok('added' in diffBody, 'diff should include added'); + assert.ok('resolved' in diffBody, 'diff should include resolved'); + assert.ok('unchanged_count' in diffBody, 'diff should include unchanged_count'); + }); + }); }); // ── validateTestCaseXml ─────────────────────────────────────────────────────── diff --git a/test/unit/mcp/testPlanValidate.test.ts b/test/unit/mcp/testPlanValidate.test.ts index 2c257cf6..99636f9c 100644 --- a/test/unit/mcp/testPlanValidate.test.ts +++ b/test/unit/mcp/testPlanValidate.test.ts @@ -344,4 +344,105 @@ describe('provar_testplan_validate', () => { assert.equal(isError(result), false); }); }); + + describe('PDX-470 — detail level', () => { + it('standard response includes violations and test_suites', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'DetailPlan', + test_suites: [SUITE_A], + detail: 'standard', + }); + + const body = parseText(result); + assert.ok('violations' in body, 'standard should include violations'); + assert.ok('test_suites' in body, 'standard should include test_suites'); + }); + + it('summary response includes only key fields, not violations or test_suites', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'SummaryPlan', + test_suites: [SUITE_A], + detail: 'summary', + }); + + const body = parseText(result); + assert.ok('quality_score' in body, 'summary should include quality_score'); + assert.ok('completeness_score' in body, 'summary should include completeness_score'); + assert.ok('recommended_next_action' in body, 'summary should include recommended_next_action'); + assert.ok(!('violations' in body), 'summary should NOT include violations'); + assert.ok(!('test_suites' in body), 'summary should NOT include test_suites'); + }); + + it('full response includes all fields (same as standard for plan)', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'FullPlan', + test_suites: [SUITE_A], + detail: 'full', + }); + + const body = parseText(result); + assert.ok('violations' in body, 'full should include violations'); + assert.ok('test_suites' in body, 'full should include test_suites'); + }); + }); + + describe('PDX-473 — completeness_score and recommended_next_action', () => { + const TC_VALID = { name: 'Valid.testcase', xml_content: makeXml(G.tc1, G.s1, '1') }; + const SUITE_VALID = { name: 'ValidSuite', test_cases: [TC_VALID] }; + + it('completeness_score is present in every response', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'ScorePlan', + test_suites: [SUITE_A], + }); + + const body = parseText(result); + assert.ok('completeness_score' in body, 'completeness_score should be present'); + assert.ok(typeof body['completeness_score'] === 'number'); + }); + + it('completeness_score is 0 when plan has no test cases', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'EmptyPlan', + }); + + const body = parseText(result); + assert.equal(body['completeness_score'], 0); + }); + + it('recommended_next_action is a valid string value', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'ActionPlan', + test_suites: [SUITE_A], + }); + + const body = parseText(result); + assert.ok('recommended_next_action' in body); + const valid = ['stop', 'fix_and_revalidate', 'inspect_failures']; + assert.ok(valid.includes(body['recommended_next_action'] as string)); + }); + + it('recommended_next_action is stop when all test cases are valid (score=100)', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'AllValidPlan', + test_suites: [SUITE_VALID], + metadata: fullMeta(), + }); + + const body = parseText(result); + assert.equal(body['completeness_score'], 100); + assert.equal(body['recommended_next_action'], 'stop'); + }); + + it('recommended_next_action is inspect_failures when plan has failures (no baseline)', () => { + const result = server.call('provar_testplan_validate', { + plan_name: 'FailingPlan', + test_suites: [SUITE_A], + }); + + const body = parseText(result); + assert.ok((body['completeness_score'] as number) < 100); + assert.equal(body['recommended_next_action'], 'inspect_failures'); + }); + }); }); diff --git a/test/unit/mcp/testSuiteValidate.test.ts b/test/unit/mcp/testSuiteValidate.test.ts index cbe45023..2d648847 100644 --- a/test/unit/mcp/testSuiteValidate.test.ts +++ b/test/unit/mcp/testSuiteValidate.test.ts @@ -346,4 +346,138 @@ describe('provar_testsuite_validate', () => { assert.equal(isError(result), false); }); }); + + describe('PDX-470 — detail level', () => { + it('standard response includes violations, test_cases, and run_id', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'DetailSuite', + test_cases: [TC_LOGIN], + detail: 'standard', + }); + const body = parseText(result); + assert.ok('violations' in body, 'standard should include violations'); + assert.ok('test_cases' in body, 'standard should include test_cases'); + assert.ok('run_id' in body, 'standard should include run_id'); + }); + + it('summary response includes only key metrics', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'SummarySuite', + test_cases: [TC_LOGIN], + detail: 'summary', + }); + const body = parseText(result); + assert.ok('quality_score' in body, 'summary should include quality_score'); + assert.ok('completeness_score' in body, 'summary should include completeness_score'); + assert.ok('recommended_next_action' in body, 'summary should include recommended_next_action'); + assert.ok(!('violations' in body), 'summary should NOT include violations'); + assert.ok(!('test_cases' in body), 'summary should NOT include test_cases'); + }); + + it('full response includes all fields', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'FullSuite', + test_cases: [TC_LOGIN], + detail: 'full', + }); + const body = parseText(result); + assert.ok('violations' in body, 'full should include violations'); + assert.ok('test_cases' in body, 'full should include test_cases'); + }); + }); + + describe('PDX-473 — completeness_score and recommended_next_action', () => { + // Valid XML: id="1" passes TC_010, proper UUID passes TC_011/012 + const TC_VALID = { name: 'Valid.testcase', xml_content: makeXml(G.tc1, G.s1, '1') }; + + it('completeness_score is present in response', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'CompleteSuite', + test_cases: [TC_LOGIN], + }); + const body = parseText(result); + assert.ok(typeof body['completeness_score'] === 'number', 'completeness_score should be a number'); + }); + + it('completeness_score is 0 when suite has no test cases', () => { + const result = server.call('provar_testsuite_validate', { suite_name: 'EmptySuite' }); + const body = parseText(result); + assert.equal(body['completeness_score'], 0); + }); + + it('completeness_score is 100 when all test cases are valid', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'AllValidSuite', + test_cases: [TC_VALID], + }); + const body = parseText(result); + assert.equal(body['completeness_score'], 100); + }); + + it('recommended_next_action is a string in the response', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'ActionSuite', + test_cases: [TC_LOGIN], + }); + const body = parseText(result); + const action = body['recommended_next_action']; + assert.ok(typeof action === 'string', 'recommended_next_action should be a string'); + assert.ok(['stop', 'inspect_failures', 'fix_and_revalidate'].includes(action), `Unexpected action: ${action}`); + }); + + it('recommended_next_action is "stop" when completeness_score is 100', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'StopSuite', + test_cases: [TC_VALID], + }); + const body = parseText(result); + assert.equal(body['completeness_score'], 100); + assert.equal(body['recommended_next_action'], 'stop'); + }); + }); + + describe('PDX-471 — baseline_run_id diff mode', () => { + it('run_id is present in every standard response', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'RunIdSuite', + test_cases: [TC_LOGIN], + }); + const body = parseText(result); + assert.ok(typeof body['run_id'] === 'string' && body['run_id'].length > 0); + }); + + it('returns BASELINE_NOT_FOUND for an unknown baseline_run_id', () => { + const result = server.call('provar_testsuite_validate', { + suite_name: 'DiffSuite', + test_cases: [TC_LOGIN], + baseline_run_id: 'nonexistent-run-id-xyz', + }); + assert.equal(isError(result), true); + const body = parseText(result); + assert.equal(body['error_code'], 'BASELINE_NOT_FOUND'); + }); + + it('diff mode returns added/resolved/unchanged_count when baseline exists', () => { + // First call to establish baseline + const first = server.call('provar_testsuite_validate', { + suite_name: 'BaselineSuite', + test_cases: [TC_LOGIN], + }); + const firstBody = parseText(first); + const runId = firstBody['run_id'] as string; + + // Second call with baseline_run_id should return diff + const second = server.call('provar_testsuite_validate', { + suite_name: 'BaselineSuite', + test_cases: [TC_LOGIN], + baseline_run_id: runId, + }); + assert.equal(isError(second), false); + const diffBody = parseText(second); + assert.ok('added' in diffBody, 'diff should have added'); + assert.ok('resolved' in diffBody, 'diff should have resolved'); + assert.ok('unchanged_count' in diffBody, 'diff should have unchanged_count'); + assert.ok('run_id' in diffBody, 'diff should have run_id'); + }); + }); }); diff --git a/test/unit/mcp/validationDiff.test.ts b/test/unit/mcp/validationDiff.test.ts new file mode 100644 index 00000000..931a5ded --- /dev/null +++ b/test/unit/mcp/validationDiff.test.ts @@ -0,0 +1,146 @@ +/* eslint-disable camelcase */ +import { strict as assert } from 'node:assert'; +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { describe, it, beforeEach, afterEach } from 'mocha'; +import { + generateRunId, + saveRun, + hasAnyRun, + loadBaselineViolations, + computeDiff, +} from '../../../src/mcp/utils/validationDiff.js'; + +const V1 = { rule_id: 'RULE-001', applies_to: 'TestSuite', message: 'Suite is empty' }; +const V2 = { rule_id: 'RULE-002', applies_to: 'TestPlan', message: 'Plan has no suites' }; +const V3 = { rule_id: 'RULE-003', applies_to: 'Project', message: 'No test plans' }; + +let tmpDir: string; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'valdiff-test-')); +}); + +afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +describe('generateRunId', () => { + it('produces a timestamp-hash string', () => { + const id = generateRunId('/some/project/path'); + assert.match(id, /^\d+-[0-9a-f]{8}-[0-9a-z]{4}$/); + }); + + it('produces different IDs for different contexts', () => { + const id1 = generateRunId('/path/a'); + const id2 = generateRunId('/path/b'); + // hash portion differs + assert.notEqual(id1.split('-')[1], id2.split('-')[1]); + }); +}); + +describe('hasAnyRun', () => { + it('returns false when no index file exists', () => { + assert.equal(hasAnyRun(tmpDir), false); + }); + + it('returns true after a run is saved', () => { + saveRun(tmpDir, generateRunId('ctx'), [V1]); + assert.equal(hasAnyRun(tmpDir), true); + }); +}); + +describe('saveRun / loadBaselineViolations', () => { + it('saves and retrieves violations by run_id', () => { + const runId = generateRunId('ctx'); + saveRun(tmpDir, runId, [V1, V2]); + const loaded = loadBaselineViolations(tmpDir, runId); + assert.deepEqual(loaded, [V1, V2]); + }); + + it('returns null for an unknown run_id', () => { + const result = loadBaselineViolations(tmpDir, 'nonexistent-run-id'); + assert.equal(result, null); + }); + + it('caps index at 20 entries and evicts the oldest', () => { + const ids: string[] = []; + for (let i = 0; i < 22; i++) { + const id = `${Date.now() + i}-abc${i.toString().padStart(4, '0')}`; + ids.push(id); + saveRun(tmpDir, id, [V1]); + } + // First two should be evicted + assert.equal(loadBaselineViolations(tmpDir, ids[0]), null); + assert.equal(loadBaselineViolations(tmpDir, ids[1]), null); + // Last 20 should still be present + for (let i = 2; i < 22; i++) { + assert.notEqual(loadBaselineViolations(tmpDir, ids[i]), null, `Expected run ${i} to be present`); + } + }); +}); + +describe('computeDiff', () => { + it('returns empty diff when violations are identical', () => { + const diff = computeDiff([V1, V2], [V1, V2]); + assert.deepEqual(diff.added, []); + assert.deepEqual(diff.resolved, []); + assert.equal(diff.unchanged_count, 2); + }); + + it('detects added violations', () => { + const diff = computeDiff([V1], [V1, V2]); + assert.equal(diff.added.length, 1); + assert.equal(diff.added[0]['rule_id'], 'RULE-002'); + assert.deepEqual(diff.resolved, []); + assert.equal(diff.unchanged_count, 1); + }); + + it('detects resolved violations', () => { + const diff = computeDiff([V1, V2], [V2]); + assert.deepEqual(diff.added, []); + assert.equal(diff.resolved.length, 1); + assert.equal(diff.resolved[0]['rule_id'], 'RULE-001'); + assert.equal(diff.unchanged_count, 1); + }); + + it('detects added and resolved in the same diff', () => { + const diff = computeDiff([V1, V2], [V2, V3]); + assert.equal(diff.added.length, 1); + assert.equal(diff.added[0]['rule_id'], 'RULE-003'); + assert.equal(diff.resolved.length, 1); + assert.equal(diff.resolved[0]['rule_id'], 'RULE-001'); + assert.equal(diff.unchanged_count, 1); + }); + + it('handles empty baseline (all current violations are added)', () => { + const diff = computeDiff([], [V1, V2]); + assert.equal(diff.added.length, 2); + assert.deepEqual(diff.resolved, []); + assert.equal(diff.unchanged_count, 0); + }); + + it('handles empty current (all baseline violations are resolved)', () => { + const diff = computeDiff([V1, V2], []); + assert.deepEqual(diff.added, []); + assert.equal(diff.resolved.length, 2); + assert.equal(diff.unchanged_count, 0); + }); + + it('multiset: duplicate violations are treated as distinct entries', () => { + // V1 appears twice in baseline, three times in current → 1 added, 2 unchanged + const diff = computeDiff([V1, V1], [V1, V1, V1]); + assert.equal(diff.added.length, 1, 'one extra occurrence added'); + assert.equal(diff.resolved.length, 0); + assert.equal(diff.unchanged_count, 2); + }); + + it('multiset: reducing duplicate count registers as resolved', () => { + // V1 appears three times in baseline, once in current → 2 resolved, 1 unchanged + const diff = computeDiff([V1, V1, V1], [V1]); + assert.equal(diff.added.length, 0); + assert.equal(diff.resolved.length, 2, 'two occurrences resolved'); + assert.equal(diff.unchanged_count, 1); + }); +}); diff --git a/test/unit/mcp/validationScore.test.ts b/test/unit/mcp/validationScore.test.ts new file mode 100644 index 00000000..b7074c95 --- /dev/null +++ b/test/unit/mcp/validationScore.test.ts @@ -0,0 +1,57 @@ +/* eslint-disable camelcase */ +import { strict as assert } from 'node:assert'; +import { describe, it } from 'mocha'; +import { calcCompletenessScore, calcNextAction } from '../../../src/mcp/utils/validationScore.js'; + +describe('calcCompletenessScore', () => { + it('returns 100 when all tests pass', () => { + assert.equal(calcCompletenessScore(10, 10), 100); + }); + + it('returns 0 when no tests pass', () => { + assert.equal(calcCompletenessScore(0, 10), 0); + }); + + it('returns 0 when total is 0 (no tests)', () => { + assert.equal(calcCompletenessScore(0, 0), 0); + }); + + it('rounds to nearest integer', () => { + // 1/3 ≈ 33.33 → 33 + assert.equal(calcCompletenessScore(1, 3), 33); + // 2/3 ≈ 66.67 → 67 + assert.equal(calcCompletenessScore(2, 3), 67); + }); + + it('returns 50 for half passing', () => { + assert.equal(calcCompletenessScore(5, 10), 50); + }); +}); + +describe('calcNextAction', () => { + it('returns "stop" when score is 100 and no violations remain', () => { + assert.equal(calcNextAction(100, true), 'stop'); + assert.equal(calcNextAction(100, false), 'stop'); + assert.equal(calcNextAction(100, true, 0), 'stop'); + }); + + it('returns "inspect_failures" when score < 100 and no baseline (first run)', () => { + assert.equal(calcNextAction(0, false), 'inspect_failures'); + assert.equal(calcNextAction(50, false), 'inspect_failures'); + assert.equal(calcNextAction(99, false), 'inspect_failures'); + }); + + it('returns "fix_and_revalidate" when score < 100 and baseline exists', () => { + assert.equal(calcNextAction(0, true), 'fix_and_revalidate'); + assert.equal(calcNextAction(50, true), 'fix_and_revalidate'); + assert.equal(calcNextAction(99, true), 'fix_and_revalidate'); + }); + + it('returns "fix_and_revalidate" when score is 100 but quality violations remain and baseline exists', () => { + assert.equal(calcNextAction(100, true, 3), 'fix_and_revalidate'); + }); + + it('returns "inspect_failures" when score is 100 but violations remain on first run', () => { + assert.equal(calcNextAction(100, false, 2), 'inspect_failures'); + }); +});