Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions .github/scripts/generateAllowedUrls.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import fs from 'fs';
import path from 'path';

/**
* Script to extract all URLs from help articles and generate a whitelist.
* Run this at build time to update the allowed URLs list.
*
* Usage: npx ts-node .github/scripts/generateAllowedUrls.ts
*/

const DOCS_DIR = path.join(__dirname, '..', '..', 'docs');
const OUTPUT_FILE = path.join(DOCS_DIR, 'assets', 'js', 'allowedExternalUrls.json');

// Regex to match URLs in markdown
// Matches: [text](url), <url>, and bare URLs
const URL_PATTERNS = [
/\[.*?\]\((https?:\/\/[^)\s]+)\)/g, // Markdown links [text](url)
/<(https?:\/\/[^>\s]+)>/g, // Angle bracket URLs <url>
/(?<![([])(https?:\/\/[^\s)\]>"']+)/g, // Bare URLs
];

function findMarkdownFiles(dir: string): string[] {
const files: string[] = [];
const items = fs.readdirSync(dir, {withFileTypes: true});

for (const item of items) {
const fullPath = path.join(dir, item.name);
if (item.isDirectory() && !item.name.startsWith('_site')) {
files.push(...findMarkdownFiles(fullPath));
} else if (item.isFile() && item.name.endsWith('.md')) {
files.push(fullPath);
}
}
return files;
}

function extractUrls(content: string): Set<string> {
const urls = new Set<string>();

for (const pattern of URL_PATTERNS) {
const regex = new RegExp(pattern.source, pattern.flags);
let match = regex.exec(content);
while (match !== null) {
// Get the captured group (URL) or the full match
const url = match[1] || match[0];
// Clean up trailing punctuation that might be captured
const cleanUrl = url.replace(/[.,;:!?)]+$/, '');
if (cleanUrl.startsWith('http')) {
urls.add(cleanUrl);
}
match = regex.exec(content);
}
}
return urls;
}

function main() {
console.log('Scanning markdown files for URLs...');

const allUrls = new Set<string>();
const markdownFiles = findMarkdownFiles(DOCS_DIR);

console.log(`Found ${markdownFiles.length} markdown files`);

for (const file of markdownFiles) {
const content = fs.readFileSync(file, 'utf-8');
const urls = extractUrls(content);
for (const url of urls) {
allUrls.add(url);
}
}

// Filter out Expensify URLs (check domain properly) and sort
const urlList = Array.from(allUrls)
.filter((url) => {
try {
const hostname = new URL(url).hostname;
return hostname !== 'expensify.com' && !hostname.endsWith('.expensify.com');
} catch {
return false;
}
})
.sort();

console.log(`Found ${urlList.length} unique URLs`);

// Write to JSON file
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(urlList, null, 2));
console.log(`Written to ${OUTPUT_FILE}`);
}

main();
3 changes: 3 additions & 0 deletions .github/workflows/deployExpensifyHelp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ jobs:
- name: Enforce iframe and Cloudflare CDN usage
run: ./.github/scripts/enforceVideoFormats.sh

- name: Generate allowed URLs whitelist for AI search
run: npx ts-node .github/scripts/generateAllowedUrls.ts

- name: Build with Jekyll
uses: actions/jekyll-build-pages@0143c158f4fa0c5dcd99499a5d00859d79f70b0e
with:
Expand Down
1 change: 1 addition & 0 deletions docs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ _site
.jekyll-metadata
vendor
_data/routes.yml
assets/js/allowedExternalUrls.json
1 change: 1 addition & 0 deletions docs/_layouts/default.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
<script src="https://kit.fontawesome.com/263e5e8608.js" crossorigin="anonymous"></script>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/tocbot/4.12.0/tocbot.js"></script>
<script defer src="https://vjs.zencdn.net/8.5.2/video.min.js"></script>
<script src="https://unpkg.com/dompurify@3.2.4/dist/purify.min.js"></script>
<script defer src="{{ '/assets/js/main.js' | cache_bust }}"></script>
<script defer src="{{ '/assets/js/platform-tabs.js' | cache_bust }}"></script>
<script defer src="{{ '/assets/js/selector.js' | cache_bust }}"></script>
Expand Down
42 changes: 40 additions & 2 deletions docs/assets/js/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,37 @@ function injectFooterCopyright() {
const SEARCH_API_URL = 'https://www.expensify.com/api/SearchHelpsite';
const ASK_AI_API_URL = 'https://www.expensify.com/api/AskHelpsiteAI';

let allowedDomains = [];
fetch('/assets/js/allowedExternalUrls.json')
.then((response) => response.json())
.then((urls) => {
allowedDomains = urls
.map((url) => {
try {
return new URL(url).hostname;
} catch {
return null;
}
})
.filter(Boolean);
})
.catch(() => {});

DOMPurify.addHook('afterSanitizeAttributes', (node) => {
if (node.tagName === 'A' && node.hasAttribute('href')) {
const href = node.getAttribute('href');
try {
const hostname = new URL(href).hostname;
const isExpensifyLink = hostname === 'expensify.com' || hostname.endsWith('.expensify.com');
if (!isExpensifyLink && allowedDomains.length > 0 && !allowedDomains.includes(hostname)) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Block unapproved links when whitelist isn't loaded

The allowlist check currently fails open: allowedDomains starts empty and stays empty if /assets/js/allowedExternalUrls.json fails to load, and the guard allowedDomains.length > 0 means non-Expensify links are not removed in that state. In any case where the JSON fetch is delayed or errors, AI responses can still render arbitrary external anchors, which defeats the security hardening this change is meant to provide.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Member Author

@rushatgabhane rushatgabhane Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah that's fine. we don't wanna wait on it

node.remove();
}
} catch {
node.remove();
}
}
});

function getTitleFromURL(url) {
return url.split('/').pop().replace(/-/g, ' ');
}
Expand Down Expand Up @@ -173,7 +204,11 @@ function askHelpsiteAI(query) {
formData.append('platform', platform);
}

fetch(ASK_AI_API_URL, {method: 'POST', body: formData, signal: aiAbortController.signal})
fetch(ASK_AI_API_URL, {
method: 'POST',
body: formData,
signal: aiAbortController.signal,
})
.then((response) => response.json())
.then((data) => {
const answer = data.answer || '';
Expand All @@ -184,7 +219,10 @@ function askHelpsiteAI(query) {

const template = cloneTemplate('ai-response-template');
const content = template.querySelector('.ai-content');
content.innerHTML = answer;
content.innerHTML = DOMPurify.sanitize(answer, {
ALLOWED_TAGS: ['p', 'br', 'strong', 'b', 'em', 'i', 'ul', 'ol', 'li', 'a', 'code', 'pre'],
ALLOWED_ATTR: ['href', 'target', 'rel'],
});

const showMoreButton = template.querySelector('.ai-show-more');
aiContainer.innerHTML = '';
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"build-staging": "tsx ./node_modules/.bin/webpack-cli --config config/webpack/webpack.common.ts --env file=.env.staging && tsx ./scripts/combine-web-sourcemaps.ts",
"build-adhoc": "tsx ./node_modules/.bin/webpack-cli --config config/webpack/webpack.common.ts --env file=.env.adhoc && tsx ./scripts/combine-web-sourcemaps.ts",
"createDocsRoutes": "ts-node .github/scripts/createDocsRoutes.ts",
"generateAllowedUrls": "ts-node .github/scripts/generateAllowedUrls.ts",
"detectRedirectCycle": "ts-node .github/scripts/detectRedirectCycle.ts",
"ios-build": "bundle exec fastlane ios build_unsigned",
"ios-hybrid-build": "bundle exec fastlane ios build_unsigned_hybrid",
Expand Down
Loading