diff --git a/benchmark-leakage-audit-assistant/README.md b/benchmark-leakage-audit-assistant/README.md
new file mode 100644
index 0000000..d4f759a
--- /dev/null
+++ b/benchmark-leakage-audit-assistant/README.md
@@ -0,0 +1,37 @@
+# Benchmark Leakage Audit Assistant
+
+This self-contained module adds an AI research assistant slice for pre-release benchmark hygiene. It helps reviewers catch evaluation leakage before a paper, model, or scientific benchmark result is published.
+
+## What It Checks
+
+- Train/test overlap by record ID or normalized content fingerprint
+- Benchmark contamination in the training corpus
+- Final holdout or test set use during model selection
+- Missing split provenance such as deterministic method, seed, or manifest hash
+- Missing reproducibility packet evidence such as lockfiles, manifests, code archive, and preregistration
+
+## Run It
+
+```bash
+node benchmark-leakage-audit-assistant/test.js
+node benchmark-leakage-audit-assistant/demo.js
+```
+
+The module uses only Node.js standard library APIs.
+
+## Public API
+
+```js
+const { auditBenchmarkLeakage } = require("./index.js");
+
+const audit = auditBenchmarkLeakage(project);
+console.log(audit.summary.releaseDecision);
+console.log(audit.findings);
+console.log(audit.reviewerPacket.tasks);
+```
+
+The audit returns a release decision of `pass`, `needs-remediation`, or `block`, plus reviewer-ready findings with evidence and remediation tasks.
+
+## Demo
+
+The included `demo.gif` shows the module blocking a release candidate with train/test overlap, benchmark contamination, held-out tuning, weak split provenance, and missing reproducibility artifacts.
diff --git a/benchmark-leakage-audit-assistant/demo.gif b/benchmark-leakage-audit-assistant/demo.gif
new file mode 100644
index 0000000..c021b13
Binary files /dev/null and b/benchmark-leakage-audit-assistant/demo.gif differ
diff --git a/benchmark-leakage-audit-assistant/demo.js b/benchmark-leakage-audit-assistant/demo.js
new file mode 100644
index 0000000..fc2d772
--- /dev/null
+++ b/benchmark-leakage-audit-assistant/demo.js
@@ -0,0 +1,84 @@
+const { auditBenchmarkLeakage } = require("./index.js");
+
+const demoProject = {
+  title: "NeuroImaging Benchmark Release Candidate",
+  benchmark: {
+    name: "NeuroBench-26",
+    items: [
+      {
+        id: "nb26-hidden-17",
+        title: "Hidden fMRI cohort sample",
+        text: "Reserved NeuroBench-26 fMRI cohort with blinded diagnostic labels."
+      }
+    ]
+  },
+  datasets: {
+    train: [
+      {
+        id: "train-cohort-13",
+        source: "NeuroBench-26 pre-release mirror",
+        text: "Reserved NeuroBench-26 fMRI cohort with blinded diagnostic labels."
+      },
+      {
+        id: "subject-0042",
+        source: "lab import",
+        text: "Subject 0042 resting-state network features with quality-control notes."
+      }
+    ],
+    validation: [
+      {
+        id: "subject-1182",
+        source: "validation import",
+        text: "Subject 1182 task-state network features with adjudicated QC status."
+      }
+    ],
+    test: [
+      {
+        id: "subject-0042",
+        source: "holdout import",
+        text: "Subject 0042 resting-state network features with quality-control notes."
+      }
+    ]
+  },
+  split: {
+    method: "manual export",
+    seed: "",
+    manifestHash: null
+  },
+  experiments: [
+    {
+      id: "exp-neuro-7",
+      usedForSelection: "test",
+      notes: "Selected final model using best test AUROC after evaluating four checkpoints."
+    }
+  ],
+  artifacts: {
+    rawDataManifest: true,
+    splitManifest: false,
+    environmentLock: false,
+    codeArchive: true,
+    preregistration: false
+  }
+};
+
+const audit = auditBenchmarkLeakage(demoProject);
+
+console.log(`Benchmark leakage audit: ${audit.summary.projectTitle}`);
+console.log(`Decision: ${audit.summary.releaseDecision}`);
+console.log(`Reproducibility confidence: ${audit.summary.reproducibilityConfidence}`);
+console.log(`Findings: ${audit.summary.findingCount}`);
+console.log("");
+
+for (const finding of audit.findings) {
+  console.log(`[${finding.severity.toUpperCase()}] ${finding.title}`);
+  for (const evidence of finding.evidence) {
+    console.log(`  - ${evidence}`);
+  }
+  console.log(`  Remediation: ${finding.remediation}`);
+  console.log("");
+}
+
+console.log("Reviewer tasks:");
+for (const task of audit.reviewerPacket.tasks) {
+  console.log(`- ${task}`);
+}
diff --git a/benchmark-leakage-audit-assistant/index.js b/benchmark-leakage-audit-assistant/index.js
new file mode 100644
index 0000000..6924ef4
--- /dev/null
+++ b/benchmark-leakage-audit-assistant/index.js
@@ -0,0 +1,305 @@
+const crypto = require("node:crypto");
+
+const SEVERITIES = ["critical", "high", "medium", "low"];
+const REQUIRED_ARTIFACTS = [
+  "rawDataManifest",
+  "splitManifest",
+  "environmentLock",
+  "codeArchive",
+  "preregistration"
+];
+
+function auditBenchmarkLeakage(project) {
+  const normalized = normalizeProject(project);
+  const findings = [
+    ...findTrainTestOverlap(normalized),
+    ...findBenchmarkContamination(normalized),
+    ...findHeldoutTuning(normalized),
+    ...findSplitProvenanceGaps(normalized),
+    ...findReproducibilityGaps(normalized)
+  ];
+
+  const severityCounts = countSeverities(findings);
+  const summary = {
+    projectTitle: normalized.title,
+    findingCount: findings.length,
+    severityCounts,
+    releaseDecision: decideRelease(severityCounts),
+    reproducibilityConfidence: scoreReproducibility(findings)
+  };
+
+  return {
+    summary,
+    findings,
+    reviewerPacket: buildReviewerPacket(summary, findings)
+  };
+}
+
+function normalizeProject(project) {
+  if (!project || typeof project !== "object") {
+    throw new TypeError("auditBenchmarkLeakage expects a project object");
+  }
+
+  const datasets = project.datasets || {};
+
+  return {
+    title: project.title || "Untitled research project",
+    benchmark: project.benchmark || { name: "", items: [] },
+    datasets: {
+      train: Array.isArray(datasets.train) ? datasets.train : [],
+      validation: Array.isArray(datasets.validation) ? datasets.validation : [],
+      test: Array.isArray(datasets.test) ? datasets.test : []
+    },
+    split: project.split || {},
+    experiments: Array.isArray(project.experiments) ? project.experiments : [],
+    artifacts: project.artifacts || {}
+  };
+}
+
+function findTrainTestOverlap(project) {
+  const trainFingerprints = new Map();
+  const trainIds = new Map();
+
+  for (const record of project.datasets.train) {
+    const id = normalizeValue(record.id);
+    if (id) trainIds.set(id, record);
+    trainFingerprints.set(fingerprintRecord(record), record);
+  }
+
+  const overlaps = [];
+  for (const testRecord of project.datasets.test) {
+    const id = normalizeValue(testRecord.id);
+    const fingerprint = fingerprintRecord(testRecord);
+    const trainRecord = trainIds.get(id) || trainFingerprints.get(fingerprint);
+
+    if (trainRecord) {
+      overlaps.push({
+        trainId: trainRecord.id || "unknown-train-id",
+        testId: testRecord.id || "unknown-test-id",
+        snippet: compactText(testRecord.text || testRecord.title || testRecord.source || "")
+      });
+    }
+  }
+
+  if (overlaps.length === 0) return [];
+
+  return [
+    finding({
+      type: "train_test_overlap",
+      severity: "critical",
+      title: "Training records overlap with the final evaluation split",
+      evidence: overlaps.map(
+        (overlap) =>
+          `Train ${overlap.trainId} matches test ${overlap.testId}: ${overlap.snippet}`
+      ),
+      remediation:
+        "Rebuild the split from source data, remove duplicated records from the training corpus, and rerun the final holdout once."
+    })
+  ];
+}
+
+function findBenchmarkContamination(project) {
+  const benchmarkTerms = [
+    project.benchmark.name,
+    ...safeArray(project.benchmark.items).flatMap((item) => [
+      item.id,
+      item.title,
+      item.text
+    ])
+  ]
+    .map(normalizeText)
+    .filter((term) => term.length >= 8);
+
+  const contaminated = [];
+  for (const record of project.datasets.train) {
+    const recordText = normalizeText(
+      [record.source, record.title, record.text].filter(Boolean).join(" ")
+    );
+    const matchedTerm = benchmarkTerms.find((term) => recordText.includes(term));
+
+    if (matchedTerm) {
+      contaminated.push(
+        `Training record ${record.id || "unknown"} contains benchmark signal "${truncate(
+          matchedTerm,
+          80
+        )}".`
+      );
+    }
+  }
+
+  if (contaminated.length === 0) return [];
+
+  return [
+    finding({
+      type: "benchmark_contamination",
+      severity: "high",
+      title: "Training corpus appears to contain benchmark material",
+      evidence: contaminated,
+      remediation:
+        "Remove benchmark mirrors and leaderboard-derived content from training, then document an exclusion rule for future ingestion."
+    })
+  ];
+}
+
+function findHeldoutTuning(project) {
+  const riskyExperiments = project.experiments.filter(hasHeldoutTuningRisk);
+
+  if (riskyExperiments.length === 0) return [];
+
+  return [
+    finding({
+      type: "heldout_tuning",
+      severity: "high",
+      title: "Final holdout data was used during model selection",
+      evidence: riskyExperiments.map(
+        (experiment) =>
+          `${experiment.id || "unnamed experiment"}: ${compactText(
+            experiment.notes || experiment.usedForSelection || experiment.tunedOn || ""
+          )}`
+      ),
+      remediation:
+        "Move model and checkpoint selection to validation data, freeze the chosen configuration, and rerun the untouched holdout once."
+    })
+  ];
+}
+
+function hasHeldoutTuningRisk(experiment) {
+  const selectionSource = normalizeText(
+    [experiment.usedForSelection, experiment.tunedOn].filter(Boolean).join(" ")
+  );
+  if (/\b(test|holdout|held out|held out|heldout)\b/.test(selectionSource)) {
+    return true;
+  }
+
+  const notes = normalizeText(experiment.notes);
+  return (
+    /\b(test|holdout|heldout)\b.{0,40}\b(select|selected|selection|tune|tuned|checkpoint|best)\b/.test(notes) ||
+    /\b(select|selected|selection|tune|tuned|checkpoint|best)\b.{0,40}\b(test|holdout|heldout)\b/.test(notes)
+  );
+}
+
+function findSplitProvenanceGaps(project) {
+  const gaps = [];
+  if (!project.split.method) gaps.push("split method is missing");
+  if (project.split.seed === null || project.split.seed === undefined || project.split.seed === "") {
+    gaps.push("split seed is missing");
+  }
+  if (!project.split.manifestHash) gaps.push("split manifest hash is missing");
+
+  if (gaps.length === 0) return [];
+
+  return [
+    finding({
+      type: "split_provenance_gap",
+      severity: "medium",
+      title: "Dataset split provenance is not reproducible",
+      evidence: gaps,
+      remediation:
+        "Record the deterministic split method, seed, and manifest hash so reviewers can recreate train/validation/test membership."
+    })
+  ];
+}
+
+function findReproducibilityGaps(project) {
+  const missing = REQUIRED_ARTIFACTS.filter((artifact) => !project.artifacts[artifact]);
+  if (missing.length === 0) return [];
+
+  return [
+    finding({
+      type: "reproducibility_gap",
+      severity: missing.length >= 3 ? "high" : "medium",
+      title: "Reproducibility packet is missing required evidence",
+      evidence: missing.map((artifact) => `${artifact} is absent`),
+      remediation:
+        "Attach the missing manifests, lockfiles, code archive, and preregistration evidence before release review."
+    })
+  ];
+}
+
+function finding(input) {
+  return {
+    id: `${input.type}-${hash(input.evidence.join("|")).slice(0, 8)}`,
+    type: input.type,
+    severity: input.severity,
+    title: input.title,
+    evidence: input.evidence,
+    remediation: input.remediation
+  };
+}
+
+function buildReviewerPacket(summary, findings) {
+  if (findings.length === 0) {
+    return {
+      keyRisks: ["No benchmark leakage signals detected."],
+      tasks: ["Proceed with normal scientific peer review."],
+      questions: ["Have reviewers independently confirmed benchmark access controls?"]
+    };
+  }
+
+  return {
+    keyRisks: findings.map((item) => `${item.severity.toUpperCase()}: ${item.title}`),
+    tasks: findings.map((item) => item.remediation),
+    questions: [
+      `Can the team reproduce the ${summary.projectTitle} split from immutable inputs?`,
+      "Was the final holdout evaluated exactly once after model selection froze?",
+      "Are benchmark and leaderboard mirrors excluded from every training corpus?"
+    ]
+  };
+}
+
+function countSeverities(findings) {
+  const counts = Object.fromEntries(SEVERITIES.map((severity) => [severity, 0]));
+  for (const item of findings) counts[item.severity] += 1;
+  return counts;
+}
+
+function decideRelease(counts) {
+  if (counts.critical > 0 || counts.high > 1) return "block";
+  if (counts.high === 1 || counts.medium > 0) return "needs-remediation";
+  return "pass";
+}
+
+function scoreReproducibility(findings) {
+  if (findings.some((item) => item.type === "reproducibility_gap" && item.severity === "high")) {
+    return "low";
+  }
+  if (findings.some((item) => item.type === "reproducibility_gap" || item.type === "split_provenance_gap")) {
+    return "medium";
+  }
+  return "high";
+}
+
+function fingerprintRecord(record) {
+  return hash(normalizeText([record.title, record.text].filter(Boolean).join(" ")));
+}
+
+function hash(value) {
+  return crypto.createHash("sha256").update(String(value)).digest("hex");
+}
+
+function normalizeText(value) {
+  return String(value || "")
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, " ")
+    .trim();
+}
+
+function normalizeValue(value) {
+  return String(value || "").trim().toLowerCase();
+}
+
+function safeArray(value) {
+  return Array.isArray(value) ? value : [];
+}
+
+function compactText(value) {
+  return truncate(String(value).replace(/\s+/g, " ").trim(), 120);
+}
+
+function truncate(value, length) {
+  return value.length > length ? `${value.slice(0, length - 3)}...` : value;
+}
+
+module.exports = {
+  auditBenchmarkLeakage
+};
diff --git a/benchmark-leakage-audit-assistant/requirement-map.md b/benchmark-leakage-audit-assistant/requirement-map.md
new file mode 100644
index 0000000..df0e01b
--- /dev/null
+++ b/benchmark-leakage-audit-assistant/requirement-map.md
@@ -0,0 +1,29 @@
+# Requirement Map
+
+Issue: SCIBASE-AI/SCIBASE.AI#16, AI-Powered Research Assistant Suite.
+
+## Auto Peer Review Reports
+
+- The audit emits reviewer-ready findings with severity, evidence, and remediation.
+- `reviewerPacket.keyRisks`, `reviewerPacket.tasks`, and `reviewerPacket.questions` provide a structured pre-release review packet.
+
+## Reproducibility Checker
+
+- The module checks split method, split seed, split manifest hash, raw data manifest, split manifest, environment lock, code archive, and preregistration evidence.
+- The summary includes `reproducibilityConfidence` so reviewers can triage projects before publication.
+
+## Research Gap Finder Alignment
+
+- Benchmark leakage is treated as a research-quality gap: contaminated results must be remediated before they can support reliable claims.
+- The generated reviewer questions identify follow-up work needed to restore benchmark validity.
+
+## Safety And Scope
+
+- Uses only synthetic sample data.
+- Requires no external services, credentials, or network access.
+- Keeps the implementation isolated under `benchmark-leakage-audit-assistant/`.
+
+## Verification
+
+- `node benchmark-leakage-audit-assistant/test.js`
+- `node benchmark-leakage-audit-assistant/demo.js`
diff --git a/benchmark-leakage-audit-assistant/test.js b/benchmark-leakage-audit-assistant/test.js
new file mode 100644
index 0000000..110aaf1
--- /dev/null
+++ b/benchmark-leakage-audit-assistant/test.js
@@ -0,0 +1,169 @@
+const assert = require("node:assert/strict");
+const { auditBenchmarkLeakage } = require("./index.js");
+
+function test(name, fn) {
+  try {
+    fn();
+    console.log(`ok - ${name}`);
+  } catch (error) {
+    console.error(`not ok - ${name}`);
+    throw error;
+  }
+}
+
+const contaminatedProject = {
+  title: "Protein Stability Leaderboard Study",
+  benchmark: {
+    name: "ProteinBench",
+    items: [
+      {
+        id: "pb-42",
+        title: "Thermal stability mutation panel",
+        text: "A held-out thermal stability mutation panel for ProteinBench."
+      }
+    ]
+  },
+  datasets: {
+    train: [
+      {
+        id: "train-001",
+        source: "ProteinBench public leaderboard mirror",
+        text: "A held-out thermal stability mutation panel for ProteinBench."
+      },
+      {
+        id: "dup-777",
+        source: "lab notebook",
+        text: "Kinase mutant A had a melting temperature shift of 2.4 C."
+      }
+    ],
+    validation: [
+      {
+        id: "val-009",
+        source: "internal split",
+        text: "Independent validation measurement for kinase mutant B."
+      }
+    ],
+    test: [
+      {
+        id: "dup-777",
+        source: "final holdout",
+        text: "Kinase mutant A had a melting temperature shift of 2.4 C."
+      }
+    ]
+  },
+  split: {
+    method: "manual spreadsheet split",
+    seed: null,
+    manifestHash: ""
+  },
+  experiments: [
+    {
+      id: "exp-main",
+      usedForSelection: "test",
+      notes: "Selected the final checkpoint using best test accuracy after three tuning rounds."
+    }
+  ],
+  artifacts: {
+    rawDataManifest: true,
+    splitManifest: false,
+    environmentLock: false,
+    codeArchive: true,
+    preregistration: false
+  }
+};
+
+const cleanProject = {
+  title: "Blind Materials Benchmark",
+  benchmark: {
+    name: "MatBench Blind",
+    items: [
+      {
+        id: "mat-test-1",
+        title: "Hidden elastic modulus sample",
+        text: "Elastic modulus holdout measurement kept in a locked benchmark set."
+      }
+    ]
+  },
+  datasets: {
+    train: [
+      {
+        id: "mat-train-1",
+        source: "2024 lab training corpus",
+        text: "Training-only polymer synthesis run with public features."
+      }
+    ],
+    validation: [
+      {
+        id: "mat-val-1",
+        source: "validation split",
+        text: "Validation-only polymer synthesis run for model selection."
+      }
+    ],
+    test: [
+      {
+        id: "mat-test-shadow",
+        source: "sealed holdout",
+        text: "Different blinded polymer synthesis run for final reporting."
+      }
+    ]
+  },
+  split: {
+    method: "stratified hash split",
+    seed: 20260518,
+    manifestHash: "sha256:3bf44d1e2c"
+  },
+  experiments: [
+    {
+      id: "exp-clean",
+      usedForSelection: "validation",
+      notes: "Hyperparameters were chosen on validation data before one final holdout evaluation."
+    }
+  ],
+  artifacts: {
+    rawDataManifest: true,
+    splitManifest: true,
+    environmentLock: true,
+    codeArchive: true,
+    preregistration: true
+  }
+};
+
+test("flags train/test overlap, benchmark contamination, held-out tuning, weak split provenance, and missing artifacts", () => {
+  const audit = auditBenchmarkLeakage(contaminatedProject);
+  const types = audit.findings.map((finding) => finding.type);
+
+  assert.equal(audit.summary.releaseDecision, "block");
+  assert.equal(audit.summary.severityCounts.critical, 1);
+  assert.ok(types.includes("train_test_overlap"));
+  assert.ok(types.includes("benchmark_contamination"));
+  assert.ok(types.includes("heldout_tuning"));
+  assert.ok(types.includes("split_provenance_gap"));
+  assert.ok(types.includes("reproducibility_gap"));
+  assert.ok(
+    audit.reviewerPacket.tasks.some((task) =>
+      task.toLowerCase().includes("rebuild the split")
+    )
+  );
+});
+
+test("returns a pass decision for a clean project with complete reproducibility evidence", () => {
+  const audit = auditBenchmarkLeakage(cleanProject);
+
+  assert.equal(audit.summary.releaseDecision, "pass");
+  assert.equal(audit.summary.findingCount, 0);
+  assert.equal(audit.summary.reproducibilityConfidence, "high");
+  assert.deepEqual(audit.findings, []);
+  assert.ok(audit.reviewerPacket.keyRisks.includes("No benchmark leakage signals detected."));
+});
+
+test("emits reviewer-ready evidence and remediation text for each finding", () => {
+  const audit = auditBenchmarkLeakage(contaminatedProject);
+
+  for (const finding of audit.findings) {
+    assert.ok(finding.id);
+    assert.ok(["critical", "high", "medium", "low"].includes(finding.severity));
+    assert.ok(finding.title.length > 10);
+    assert.ok(finding.evidence.length > 0);
+    assert.ok(finding.remediation.length > 10);
+  }
+});