Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions .githooks/pre-commit
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ REPO_ROOT="$(cd "$HOOK_DIR/.." && pwd)"
cd "$REPO_ROOT"

node scripts/split-knowledge-large-files.js
if [ -d ".knowledge" ]; then
git add -A .knowledge
fi

if [ -d ".knowlenge" ]; then
git add -A .knowlenge
fi
while IFS= read -r -d '' knowledge_dir; do
git add -A -- "$knowledge_dir"
done < <(
find . -type d \
\( -name ".knowledge" -o -name ".knowlenge" \) \
-not -path "*/.git/*" \
-print0
)

MAX_BYTES=$((99 * 1000 * 1000))
too_large=()
Expand Down
6 changes: 3 additions & 3 deletions .githooks/pre-push
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/usr/bin/env bash
set -euo pipefail

# NOTE: pre-commit splits large .knowledge/.knowlenge files before they land in new commits.
# pre-push is a safety net: it prevents pushing any commit range containing >99MB blobs.
# NOTE: pre-commit splits/redacts knowledge files before they land in new commits.
# pre-push is a safety net: it prevents pushing commit ranges containing oversized or secret blobs
# under any nested .knowledge/.knowlenge path.

if [ "${DOCKER_GIT_SKIP_KNOWLEDGE_GUARD:-}" = "1" ]; then
exit 0
fi

node scripts/pre-push-knowledge-guard.js "$@"

1 change: 1 addition & 0 deletions packages/docker-git/tests/core/templates.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ describe("planFiles", () => {
expect(dockerfileSpec.contents).toContain("AUTO_MENU")
expect(dockerfileSpec.contents).toContain("ncurses-term")
expect(dockerfileSpec.contents).toContain("tag-order builtins commands")
expect(dockerfileSpec.contents).toContain("gitleaks version")
}

if (entrypointSpec && entrypointSpec._tag === "File") {
Expand Down
16 changes: 16 additions & 0 deletions packages/lib/src/core/templates/dockerfile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ RUN curl -fsSL https://opencode.ai/install | HOME=/usr/local bash -s -- --no-mod
RUN ln -sf /usr/local/.opencode/bin/opencode /usr/local/bin/opencode
RUN opencode --version`

const gitleaksVersion = "8.28.0"

const renderDockerfileGitleaks = (): string =>
`# Tooling: gitleaks (secret scanner for .knowledge/.knowlenge hooks)
RUN ARCH="$(uname -m)" \
&& case "$ARCH" in \
x86_64|amd64) GITLEAKS_ARCH="x64" ;; \
aarch64|arm64) GITLEAKS_ARCH="arm64" ;; \
*) echo "Unsupported arch for gitleaks: $ARCH" >&2; exit 1 ;; \
esac \
&& curl -fsSL "https://github.com/gitleaks/gitleaks/releases/download/v${gitleaksVersion}/gitleaks_${gitleaksVersion}_linux_$GITLEAKS_ARCH.tar.gz" \
| tar -xz -C /usr/local/bin gitleaks \
&& chmod +x /usr/local/bin/gitleaks \
&& gitleaks version`

const dockerfilePlaywrightMcpBlock = String.raw`RUN npm install -g @playwright/mcp@latest

# docker-git: wrapper that converts a CDP HTTP endpoint into a usable WS endpoint
Expand Down Expand Up @@ -157,6 +172,7 @@ export const renderDockerfile = (config: TemplateConfig): string =>
renderDockerfileNode(),
renderDockerfileBun(config),
renderDockerfileOpenCode(),
renderDockerfileGitleaks(),
renderDockerfileUsers(config),
renderDockerfileWorkspace(config)
].join("\n\n")
1 change: 1 addition & 0 deletions packages/lib/tests/usecases/prepare-files.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ describe("prepareProjectFiles", () => {
const entrypoint = yield* _(fs.readFileString(path.join(outDir, "entrypoint.sh")))
const composeBefore = yield* _(fs.readFileString(path.join(outDir, "docker-compose.yml")))
expect(dockerfile).toContain("docker-compose-v2")
expect(dockerfile).toContain("gitleaks version")
expect(entrypoint).toContain('DOCKER_GIT_HOME="/home/dev/.docker-git"')
expect(entrypoint).toContain('SOURCE_SHARED_AUTH="/home/dev/.codex-shared/auth.json"')
expect(entrypoint).toContain('OPENCODE_DATA_DIR="/home/dev/.local/share/opencode"')
Expand Down
107 changes: 87 additions & 20 deletions scripts/pre-commit-secret-guard.sh
Original file line number Diff line number Diff line change
@@ -1,66 +1,129 @@
#!/usr/bin/env bash
set -euo pipefail

# CHANGE: Add bash-only pre-commit guard that redacts probable GitHub/OAuth secrets in staged files.
# WHY: Avoid relying on Node runtime in hook execution and keep local push-protection checks deterministic.
# CHANGE: Add staged knowledge secret guard with external scanner support.
# WHY: Prefer proven scanners (gitleaks) when available, while keeping deterministic fallback redaction.

ROOT_DIR="$(git rev-parse --show-toplevel)"
cd "$ROOT_DIR"

command -v git >/dev/null || { echo "ERROR: git is required" >&2; exit 1; }
command -v perl >/dev/null || { echo "ERROR: perl is required" >&2; exit 1; }

SECRET_PATTERN='\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b'
SECRET_PATTERN='(\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b|\bsk-(?!ant-)(?:proj-)?[A-Za-z0-9_-]{20,}\b|\bsk-ant-[A-Za-z0-9_-]{20,}\b|-----BEGIN(?: [A-Z0-9]+)* PRIVATE KEY-----)'
HAS_GITLEAKS=0

if command -v gitleaks >/dev/null 2>&1; then
HAS_GITLEAKS=1
fi

is_knowledge_path() {
local path="$1"
[[ "$path" =~ (^|/)\.(knowledge|knowlenge)(/|$) ]]
}

scan_with_gitleaks_file() {
local file_path="$1"
if [ "$HAS_GITLEAKS" -ne 1 ]; then
printf '%s\n' "skip"
return
fi

if gitleaks stdin --no-banner --redact --log-level error < "$file_path" >/dev/null 2>&1; then
printf '%s\n' "clean"
return
fi

local code=$?
if [ "$code" -eq 1 ]; then
printf '%s\n' "hit"
return
fi

printf '%s\n' "error"
}

staged_blob_to_file() {
local path="$1"
local out="$2"
git cat-file -p ":$path" > "$out"
}

has_secret_in_staged_blob() {
local staged_blob_path="$1"
local gitleaks_state
gitleaks_state="$(scan_with_gitleaks_file "$staged_blob_path")"

if [ "$gitleaks_state" = "hit" ]; then
return 0
fi
if grep -Pq "$SECRET_PATTERN" "$staged_blob_path"; then
return 0
fi
return 1
}

redacted_count=0
manual_fix_files=()
has_staged_files=0

TMP_DIR=$(mktemp -d)
trap 'rm -rf "$TMP_DIR"' EXIT
index=0

while IFS= read -r -d '' path; do
if [ -z "$path" ]; then
continue
fi
if ! is_knowledge_path "$path"; then
continue
fi

if ! git cat-file -e ":$path" 2>/dev/null; then
continue
fi

has_staged_files=1
tmp_path="${TMP_DIR}/entry"
has_unstaged=false

if ! git diff --quiet -- "$path"; then
has_unstaged=true
has_unstaged=true
if git diff --quiet -- "$path"; then
has_unstaged=false
fi

if [ "$has_unstaged" = true ]; then
git cat-file -p ":$path" > "$tmp_path"

if grep -Pq "$SECRET_PATTERN" "$tmp_path"; then
manual_fix_files+=("$path")
fi

index=$((index + 1))
tmp_path="${TMP_DIR}/entry-${index}"
staged_blob_to_file "$path" "$tmp_path"
if ! has_secret_in_staged_blob "$tmp_path"; then
continue
fi

if ! grep -Pq "$SECRET_PATTERN" "$path"; then
if [ "$has_unstaged" = true ]; then
manual_fix_files+=("$path")
continue
fi

perl -0pi -e 's/\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b/<REDACTED_GITHUB_TOKEN>/g' "$path"
perl -0pi -e '
s/\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b/<REDACTED_GITHUB_TOKEN>/g;
s/\bsk-ant-[A-Za-z0-9_-]{20,}\b/<REDACTED_ANTHROPIC_KEY>/g;
s/\bsk-(?!ant-)(?:proj-)?[A-Za-z0-9_-]{20,}\b/<REDACTED_OPENAI_KEY>/g;
s/-----BEGIN(?: [A-Z0-9]+)* PRIVATE KEY-----[\s\S]*?-----END(?: [A-Z0-9]+)* PRIVATE KEY-----/<REDACTED_PRIVATE_KEY>/g;
' "$path"
git add -- "$path"
redacted_count=$((redacted_count + 1))

redacted_path="${TMP_DIR}/post-redacted-${index}"
staged_blob_to_file "$path" "$redacted_path"
if has_secret_in_staged_blob "$redacted_path"; then
manual_fix_files+=("$path")
else
redacted_count=$((redacted_count + 1))
fi
done < <(git diff --cached --name-only --diff-filter=ACM -z)

if [ "$has_staged_files" -eq 0 ]; then
exit 0
fi

if [ "${#manual_fix_files[@]}" -gt 0 ]; then
echo "ERROR: secret-like tokens found in staged versions with unstaged changes."
echo "ERROR: secret-like tokens found in staged .knowledge/.knowlenge files with unstaged changes."
echo "Please fix these files manually in index or clear unstaged changes, then commit again:"
for file in "${manual_fix_files[@]}"; do
echo " - $file"
Expand All @@ -70,7 +133,11 @@ if [ "${#manual_fix_files[@]}" -gt 0 ]; then
fi

if [ "$redacted_count" -gt 0 ]; then
echo "pre-commit: auto-redacted secrets in $redacted_count staged file(s)."
if [ "$HAS_GITLEAKS" -eq 1 ]; then
echo "pre-commit: auto-redacted secrets in $redacted_count staged .knowledge/.knowlenge file(s) (scanner: gitleaks + fallback)."
else
echo "pre-commit: auto-redacted secrets in $redacted_count staged .knowledge/.knowlenge file(s) (scanner: fallback regex)."
fi
fi

exit 0
Loading