diff --git a/.changeset/empty-squids-try.md b/.changeset/empty-squids-try.md new file mode 100644 index 00000000..4f4ed6c9 --- /dev/null +++ b/.changeset/empty-squids-try.md @@ -0,0 +1,5 @@ +--- +'dotenv-diff': patch +--- + +fix false positive secret warnings on charset/alphabet strings diff --git a/packages/cli/src/core/security/secretDetectors.ts b/packages/cli/src/core/security/secretDetectors.ts index d0f2727f..9c691dad 100644 --- a/packages/cli/src/core/security/secretDetectors.ts +++ b/packages/cli/src/core/security/secretDetectors.ts @@ -119,6 +119,48 @@ function ignoreUrlsMatch(url: string, ignoreUrls?: string[]): boolean { ); } +/** + * Checks if a string looks like a character set / alphabet used for ID generation + * or similar utilities (e.g. customAlphabet, nanoid, uuid generation). + * + * A charset string is characterised by: + * - Containing long runs of consecutive ASCII characters (a-z, A-Z, 0-9) + * - Low uniqueness ratio: many repeated character classes, few truly unique chars + * relative to string length + * + * Examples that should pass: + * 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' // dotenv-diff-ignore + * '0123456789abcdef' + * 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' (base32 alphabet) + */ +function looksLikeCharset(s: string): boolean { + // Must be reasonably long to bother checking + if (s.length < 16) return false; + + // Unique character ratio: a charset reuses few characters relative to its length, + // but more importantly its unique chars are a large fraction of the total charset + // space (26 lc + 26 uc + 10 digits = 62). If >50% of the possible alphanumeric + // chars appear, it's almost certainly a charset definition. + const unique = new Set(s.replace(/[^A-Za-z0-9]/g, '')).size; + if (unique >= 61) return true; // covers a-z (26), A-Z (26), 0-9 (10), or combos + + // Fallback: detect sequential runs of 6+ consecutive ASCII codes + // e.g. 'abcdef', 'ABCDEF', '012345' + const sequentialRunThreshold = 6; + let maxRun = 1; + let currentRun = 1; + for (let i = 1; i < s.length; i++) { + if (s.charCodeAt(i) === s.charCodeAt(i - 1)! + 1) { + currentRun++; + if (currentRun > maxRun) maxRun = currentRun; + } else { + currentRun = 1; + } + } + + return maxRun >= sequentialRunThreshold; +} + /** * Checks if a string looks like a harmless literal. * @param s - The string to check. @@ -137,7 +179,8 @@ function looksHarmlessLiteral(s: string): boolean { ) || // env-like keys /^[MmZzLlHhVvCcSsQqTtAa][0-9eE+.\- ,MmZzLlHhVvCcSsQqTtAa]*$/.test(s) || // SVG path data /[\s\S]*?<\/svg>/i.test(s) || // SVG markup - HARMLESS_URLS.some((rx) => rx.test(s)) // Allowlisted URLs + HARMLESS_URLS.some((rx) => rx.test(s)) || // Allowlisted URLs + looksLikeCharset(s) // character sets / alphabets used for ID generation ); } diff --git a/packages/cli/test/unit/core/security/secretDetectors.test.ts b/packages/cli/test/unit/core/security/secretDetectors.test.ts index 8fedeb73..b9df1127 100644 --- a/packages/cli/test/unit/core/security/secretDetectors.test.ts +++ b/packages/cli/test/unit/core/security/secretDetectors.test.ts @@ -456,5 +456,76 @@ const email = "user@example.com"; expect(findings).toHaveLength(0); }); + + describe('charset and alphabet detection', () => { + it('should ignore full alphanumeric alphabet (customAlphabet pattern)', () => { + // The exact case from the bug report + const source = `const createBundleId = customAlphabet( + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', + 8, +)`; + const findings = detectSecretsInSource('test.ts', source); + expect(findings).toHaveLength(0); + }); + + it('should ignore lowercase-only alphabet', () => { + const source = "const id = nanoid('abcdefghijklmnopqrstuvwxyz', 10);"; + const findings = detectSecretsInSource('test.ts', source); + expect(findings).toHaveLength(0); + }); + + it('should ignore uppercase-only alphabet', () => { + const source = + "const code = customAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 6);"; + const findings = detectSecretsInSource('test.ts', source); + expect(findings).toHaveLength(0); + }); + + it('should ignore hex charset', () => { + // 16 unique chars, has a sequential run of 10 digits + 6 letters + const source = "const hex = customAlphabet('0123456789abcdef', 32);"; + const findings = detectSecretsInSource('test.ts', source); + expect(findings).toHaveLength(0); + }); + + it('should ignore base32 alphabet', () => { + // RFC 4648 base32: A-Z + 2-7 + const source = + "const encoded = customAlphabet('ABCDEFGHIJKLMNOPQRSTUVWXYZ234567', 16);"; + const findings = detectSecretsInSource('test.ts', source); + expect(findings).toHaveLength(0); + }); + + it('should ignore digits-only charset', () => { + const source = "const pin = customAlphabet('0123456789', 6);"; + const findings = detectSecretsInSource('test.ts', source); + expect(findings).toHaveLength(0); + }); + + it('should still detect a real high-entropy secret that is not a charset', () => { + // Looks like a real token — no sequential runs, no large unique set structure + const source = + 'const token = "xK9mQwP2zLsR8tYu5nV7cJ4hFgD6eS1iO0pA3bC";'; + const findings = detectSecretsInSource('test.ts', source); + // Should still be flagged as entropy finding + expect(findings.length).toBeGreaterThan(0); + expect(findings.some((f) => f.kind === 'entropy')).toBe(true); + }); + + it('should still detect AWS key even if it superficially resembles an alphabet', () => { + const source = 'const key = "AKIAIOSFODNN7EXAMPLE";'; + const findings = detectSecretsInSource('test.ts', source); + expect(findings).toHaveLength(1); + expect(findings[0].severity).toBe('high'); + }); + + it('should ignore alphabet assigned to a variable without a function call', () => { + // Charset used as a plain constant, not inside a function + const source = + "const ALPHABET = 'abcdefghijklmnopqrstuvwxyz0123456789';"; + const findings = detectSecretsInSource('test.ts', source); + expect(findings).toHaveLength(0); + }); + }); }); });