diff --git a/scripts/search/index_pages.py b/scripts/search/index_pages.py index 49dcf4dcea4..86db850b1b6 100644 --- a/scripts/search/index_pages.py +++ b/scripts/search/index_pages.py @@ -38,7 +38,7 @@ def read_metadata(text): for part in parts: parts = part.split(":") if len(parts) == 2: - if parts[0] in ['title', 'description', 'slug', 'keywords', 'score']: + if parts[0] in ['title', 'description', 'slug', 'keywords', 'score', 'doc_type']: metadata[parts[0]] = int(parts[1].strip()) if parts[0] == 'score' else parts[1].strip() return metadata @@ -215,12 +215,12 @@ def update_page_links(directory, base_directory, page_path, url, content, base_u c_page = os.path.abspath(os.path.join(os.path.dirname(page_path), './' + target)) metadata, _ = parse_metadata_and_content(directory, base_directory, c_page, log_snippet_failure=False) if 'slug' in metadata: - link_data.append((url, f'{base_url}{metadata.get('slug')}')) + link_data.append((url, f"{base_url}{metadata.get('slug')}")) else: fail = True elif target.startswith('/'): # ignore external links target = target.removesuffix('/') - link_data.append((url, f'{base_url}{target}')) + link_data.append((url, f"{base_url}{target}")) if fail: print(f"Warning: couldn't resolve link for {page_path}") @@ -248,7 +248,8 @@ def parse_markdown_content(metadata, content, base_url): 'lvl0': current_h1, 'lvl1': current_h1 }, - 'score': metadata.get('score', 0) + 'score': metadata.get('score', 0), + 'doc_type': metadata.get('doc_type', '') } for line in lines: if line.startswith('# '): @@ -266,8 +267,7 @@ def parse_markdown_content(metadata, content, base_url): current_subdoc['type'] = 'lvl1' current_subdoc['object_id'] = custom_slugify(heading_slug) current_subdoc['hierarchy']['lvl1'] = current_h1 - current_subdoc['hierarchy']['lvl0'] = current_h1 if metadata.get('title', '') == '' else metadata.get( - 'title', '') + current_subdoc['hierarchy']['lvl0'] = current_h1 if metadata.get('title', '') == '' else metadata.get('title', '') elif line.startswith('## '): if current_subdoc: yield from split_large_document(current_subdoc) @@ -293,7 +293,8 @@ def parse_markdown_content(metadata, content, base_url): 'lvl0': current_h1 if metadata.get('title', '') == '' else metadata.get('title', ''), 'lvl1': current_h1, 'lvl2': current_h2, - } + }, + 'doc_type': metadata.get('doc_type', '') } elif line.startswith('### '): # note we send users to the h2 or h1 even on ### @@ -322,7 +323,8 @@ def parse_markdown_content(metadata, content, base_url): 'lvl1': current_h1, 'lvl2': current_h2, 'lvl3': current_h3, - } + }, + 'doc_type': metadata.get('doc_type', '') } elif line.startswith('#### '): if current_subdoc: @@ -348,7 +350,8 @@ def parse_markdown_content(metadata, content, base_url): 'lvl2': current_h2, 'lvl3': current_h3, 'lvl4': current_h4, - } + }, + 'doc_type': metadata.get('doc_type', '') } elif current_subdoc: current_subdoc['content'] += line + '\n' @@ -410,9 +413,9 @@ def compute_page_rank(link_data, damping_factor=0.85, max_iter=100, tol=1e-6): def create_new_index(client, index_name): try: client.delete_index(index_name) - print(f'Temporary index \'{index_name}\' deleted successfully.') + print(f"Temporary index '{index_name}' deleted successfully.") except: - print(f'Temporary index \'{index_name}\' does not exist or could not be deleted') + print(f"Temporary index '{index_name}' does not exist or could not be deleted") client.set_settings(index_name, settings['settings']) client.save_rules(index_name, settings['rules']) print(f"Settings applied to temporary index '{index_name}'.") @@ -442,9 +445,19 @@ def main(base_directory, algolia_app_id, algolia_api_key, algolia_index_name, else: for d in batch: print(f"{d['url']} - {d['page_rank']}") - print(f'{'processed' if dry_run else 'indexed'} {len(batch)} records') + # Print a sample record to verify doc_type is included + if batch: + print("\n--- Sample record ---") + sample_record = batch[0] + print(f"Title: {sample_record.get('title', 'N/A')}") + print(f"URL: {sample_record.get('url', 'N/A')}") + print(f"Type: {sample_record.get('type', 'N/A')}") + print(f"Doc Type: {sample_record.get('doc_type', 'N/A')}") + print(f"Keywords: {sample_record.get('keywords', 'N/A')}") + print("--- End sample ---\n") + print(f"{'processed' if dry_run else 'indexed'} {len(batch)} records") t += len(batch) - print(f'total {'processed' if dry_run else 'indexed'} {t} records') + print(f"total {'processed' if dry_run else 'indexed'} {t} records") if not dry_run: print('switching temporary index...', end='') client.operation_index(temp_index_name, {"operation": "move", "destination": algolia_index_name}) @@ -471,4 +484,4 @@ def main(base_directory, algolia_app_id, algolia_api_key, algolia_index_name, if args.dry_run: print('Dry running, not sending results to Algolia.') main(args.base_directory, args.algolia_app_id, args.algolia_api_key, args.algolia_index_name, - dry_run=args.dry_run) + dry_run=args.dry_run) \ No newline at end of file diff --git a/scripts/search/settings.json b/scripts/search/settings.json index bb4457e3854..e715945e1ff 100644 --- a/scripts/search/settings.json +++ b/scripts/search/settings.json @@ -21,7 +21,8 @@ "url", "url_without_anchor", "type", - "title" + "title", + "doc_type" ], "camelCaseAttributes": [ "h1", @@ -51,7 +52,9 @@ "a", "an" ], - "attributesForFaceting": null, + "attributesForFaceting": [ + "doc_type" + ], "attributesToSnippet": [ "content:15", "title:10" diff --git a/src/theme/SearchBar/docTypeSelector.jsx b/src/theme/SearchBar/docTypeSelector.jsx new file mode 100644 index 00000000000..622abb928e4 --- /dev/null +++ b/src/theme/SearchBar/docTypeSelector.jsx @@ -0,0 +1,47 @@ +import React from 'react'; + +const DOC_TYPES = { + GUIDE: 'guide', + REFERENCE: 'reference', + CHANGELOG: 'changelog', + LANDINGPAGE: 'landing-page', +}; + +export function DocTypeSelector({ selectedDocTypes, onSelectionChange, className }) { + const handleChange = (event) => { + const value = event.target.value; + if (value === 'all') { + onSelectionChange(null); + } else { + onSelectionChange([value]); + } + }; + + const currentValue = selectedDocTypes?.length === 1 ? selectedDocTypes[0] : 'all'; + + return ( + + ); +} + +export { DOC_TYPES }; diff --git a/src/theme/SearchBar/index.js b/src/theme/SearchBar/index.js index 9e49ccc4be5..7e9ba1b1aea 100644 --- a/src/theme/SearchBar/index.js +++ b/src/theme/SearchBar/index.js @@ -1,4 +1,4 @@ -import React, { useCallback, useMemo, useRef } from 'react'; +import React, { useCallback, useMemo, useRef, useState } from 'react'; import { DocSearchButton, useDocSearchKeyboardEvents } from '@docsearch/react'; import Head from '@docusaurus/Head'; import { useHistory } from '@docusaurus/router'; @@ -21,6 +21,7 @@ import { } from './utils/searchConfig'; import { SearchHit } from './searchHit'; import { SearchResultsFooter } from './searchResultsFooter'; +import { DocTypeSelector } from './docTypeSelector'; function DocSearch({ contextualSearch, externalUrlRegex, ...props }) { const queryIDRef = useRef(null); @@ -31,6 +32,9 @@ function DocSearch({ contextualSearch, externalUrlRegex, ...props }) { const history = useHistory(); const searchButtonRef = useRef(null); + // Doc type filtering state + const [selectedDocTypes, setSelectedDocTypes] = useState(null); + // Use the modal management hook const { isOpen, @@ -43,8 +47,13 @@ function DocSearch({ contextualSearch, externalUrlRegex, ...props }) { importDocSearchModalIfNeeded } = useDocSearchModal(); - // Configure search parameters - const searchParameters = createSearchParameters(props, contextualSearch, contextualSearchFacetFilters); + // Configure search parameters with doc_type filter + const searchParameters = createSearchParameters( + props, + contextualSearch, + contextualSearchFacetFilters, + selectedDocTypes + ); useEffect(() => { initializeSearchAnalytics(props.appId, props.apiKey); @@ -66,6 +75,10 @@ function DocSearch({ contextualSearch, externalUrlRegex, ...props }) { }); }, [props.transformItems, processSearchResultUrl, currentLocale]); + const handleDocTypeChange = useCallback((docTypes) => { + setSelectedDocTypes(docTypes); + }, []); + const resultsFooterComponent = useMemo( () => // eslint-disable-next-line react/no-unstable-nested-components @@ -130,23 +143,40 @@ function DocSearch({ contextualSearch, externalUrlRegex, ...props }) { DocSearchModal && searchContainer && createPortal( - , + <> + + + {/* Selector positioned as overlay */} +
+ +
+ , searchContainer, )} diff --git a/src/theme/SearchBar/searchHit.jsx b/src/theme/SearchBar/searchHit.jsx index 9549f6ac9f4..3eed981e57c 100644 --- a/src/theme/SearchBar/searchHit.jsx +++ b/src/theme/SearchBar/searchHit.jsx @@ -10,20 +10,44 @@ export function SearchHit({ hit, children }) { .slice(0, 3) // Take first 3 segments max .map(segment => segment.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase())); + // Format doc_type for display, stripping quotes and formatting + const formatDocType = (docType) => { + if (!docType) return null; + // Remove surrounding quotes and format + const cleaned = docType.replace(/^'|'$/g, ''); + return cleaned.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase()); + }; + + const docTypeDisplay = formatDocType(hit.doc_type); + return ( {children} - {breadcrumbs.length > 0 && ( - - {breadcrumbs.join(' › ')} - - )} +
+ {/* Doc type badge */} + {docTypeDisplay && ( + + {docTypeDisplay} + + )} + + {/* Breadcrumbs */} + {breadcrumbs.length > 0 && ( + {breadcrumbs.join(' › ')} + )} +
); } diff --git a/src/theme/SearchBar/utils/searchConfig.js b/src/theme/SearchBar/utils/searchConfig.js index ee01e00fc80..0e6c13249a4 100644 --- a/src/theme/SearchBar/utils/searchConfig.js +++ b/src/theme/SearchBar/utils/searchConfig.js @@ -1,6 +1,16 @@ import { isRegexpStringMatch } from '@docusaurus/theme-common'; import { DEFAULT_SEARCH_PARAMS, URL_CONFIG } from '../searchConstants'; +/** + * Helper to create doc_type filters from array or single value + */ +export const createDocTypeFilters = (docTypes) => { + if (!docTypes) return []; + + const types = Array.isArray(docTypes) ? docTypes : [docTypes]; + return types.map(type => `doc_type:'${type}'`); +}; + /** * Merge facet filters from different sources * @param {string|string[]} f1 - First set of facet filters @@ -17,17 +27,31 @@ export function mergeFacetFilters(f1, f2) { * @param {Object} props - Component props * @param {boolean} contextualSearch - Whether to use contextual search * @param {string[]} contextualSearchFacetFilters - Contextual facet filters + * @param {string|string[]} docTypes - Document types to filter by * @returns {Object} - Configured search parameters */ -export function createSearchParameters(props, contextualSearch, contextualSearchFacetFilters) { +export function createSearchParameters(props, contextualSearch, contextualSearchFacetFilters, docTypes = null) { const configFacetFilters = props.searchParameters?.facetFilters ?? []; - const facetFilters = contextualSearch - ? mergeFacetFilters(contextualSearchFacetFilters, configFacetFilters) - : configFacetFilters; + const docTypeFilters = createDocTypeFilters(docTypes); + + let facetFilters = configFacetFilters; + + if (contextualSearch) { + facetFilters = mergeFacetFilters(contextualSearchFacetFilters, facetFilters); + } + + if (docTypeFilters.length > 0) { + facetFilters = mergeFacetFilters(facetFilters, docTypeFilters); + } return { ...props.searchParameters, facetFilters, + // Add doc_type to DocSearch's default attributesToRetrieve + attributesToRetrieve: [ + "hierarchy.lvl0","hierarchy.lvl1","hierarchy.lvl2","hierarchy.lvl3","hierarchy.lvl4","hierarchy.lvl5","hierarchy.lvl6", + "content","type","url","doc_type" + ], ...DEFAULT_SEARCH_PARAMS, }; } @@ -63,14 +87,20 @@ export function createSearchNavigator(history, externalUrlRegex) { export function transformSearchItems(items, options) { const { transformItems, processSearchResultUrl, currentLocale, queryIDRef } = options; - const baseTransform = (items) => items.map((item, index) => ({ - ...item, - url: (URL_CONFIG.FORCE_ENGLISH_RESULTS && currentLocale === URL_CONFIG.DEFAULT_LOCALE) - ? processSearchResultUrl(item.url) - : item.url, - index, // Adding the index property - needed for click metrics - queryID: queryIDRef.current - })); + const baseTransform = (items) => items.map((item, index) => { + const transformed = { + ...item, + url: (URL_CONFIG.FORCE_ENGLISH_RESULTS && currentLocale === URL_CONFIG.DEFAULT_LOCALE) + ? processSearchResultUrl(item.url) + : item.url, + index, + queryID: queryIDRef.current + }; + + return transformed; + }); - return transformItems ? transformItems(items) : baseTransform(items); + const result = transformItems ? transformItems(items) : baseTransform(items); + + return result; }