diff --git a/scripts/search/index_pages.py b/scripts/search/index_pages.py index 86db850b1b6..e795c2e954a 100644 --- a/scripts/search/index_pages.py +++ b/scripts/search/index_pages.py @@ -25,6 +25,17 @@ link_data = [] +def get_doc_type_rank(doc_type): + """Return numeric rank for doc_type to use in Algolia customRanking.""" + ranks = { + 'guide': 3, + 'reference': 3, + 'changelog': 1, + 'landing_page': 1 + } + return ranks.get(doc_type, 2) # Default to 2 for unspecified types + + def split_url_and_anchor(url): parsed_url = urlparse(url) url_without_anchor = urlunparse(parsed_url._replace(fragment="")) @@ -39,7 +50,11 @@ def read_metadata(text): parts = part.split(":") if len(parts) == 2: if parts[0] in ['title', 'description', 'slug', 'keywords', 'score', 'doc_type']: - metadata[parts[0]] = int(parts[1].strip()) if parts[0] == 'score' else parts[1].strip() + value = parts[1].strip() + # Strip quotes only from doc_type + if parts[0] == 'doc_type': + value = value.strip("'\"") + metadata[parts[0]] = int(value) if parts[0] == 'score' else value return metadata @@ -249,7 +264,8 @@ def parse_markdown_content(metadata, content, base_url): 'lvl1': current_h1 }, 'score': metadata.get('score', 0), - 'doc_type': metadata.get('doc_type', '') + 'doc_type': metadata.get('doc_type', ''), + 'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', '')) } for line in lines: if line.startswith('# '): @@ -294,7 +310,8 @@ def parse_markdown_content(metadata, content, base_url): 'lvl1': current_h1, 'lvl2': current_h2, }, - 'doc_type': metadata.get('doc_type', '') + 'doc_type': metadata.get('doc_type', ''), + 'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', '')) } elif line.startswith('### '): # note we send users to the h2 or h1 even on ### @@ -324,7 +341,8 @@ def parse_markdown_content(metadata, content, base_url): 'lvl2': current_h2, 'lvl3': current_h3, }, - 'doc_type': metadata.get('doc_type', '') + 'doc_type': metadata.get('doc_type', ''), + 'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', '')) } elif line.startswith('#### '): if current_subdoc: @@ -351,7 +369,8 @@ def parse_markdown_content(metadata, content, base_url): 'lvl3': current_h3, 'lvl4': current_h4, }, - 'doc_type': metadata.get('doc_type', '') + 'doc_type': metadata.get('doc_type', ''), + 'doc_type_rank': get_doc_type_rank(metadata.get('doc_type', '')) } elif current_subdoc: current_subdoc['content'] += line + '\n' @@ -453,6 +472,7 @@ def main(base_directory, algolia_app_id, algolia_api_key, algolia_index_name, print(f"URL: {sample_record.get('url', 'N/A')}") print(f"Type: {sample_record.get('type', 'N/A')}") print(f"Doc Type: {sample_record.get('doc_type', 'N/A')}") + print(f"Doc Type Rank: {sample_record.get('doc_type_rank', 'N/A')}") print(f"Keywords: {sample_record.get('keywords', 'N/A')}") print("--- End sample ---\n") print(f"{'processed' if dry_run else 'indexed'} {len(batch)} records") diff --git a/scripts/search/settings.json b/scripts/search/settings.json index e715945e1ff..6d995f2df2b 100644 --- a/scripts/search/settings.json +++ b/scripts/search/settings.json @@ -22,7 +22,8 @@ "url_without_anchor", "type", "title", - "doc_type" + "doc_type", + "doc_type_rank" ], "camelCaseAttributes": [ "h1", @@ -53,7 +54,7 @@ "an" ], "attributesForFaceting": [ - "doc_type" + "filterOnly(doc_type)" ], "attributesToSnippet": [ "content:15", @@ -83,6 +84,7 @@ "custom" ], "customRanking": [ + "desc(doc_type_rank)", "desc(score)", "desc(page_rank)" ], diff --git a/src/theme/SearchBar/utils/searchConfig.js b/src/theme/SearchBar/utils/searchConfig.js index 0e6c13249a4..3d0dffa2a6b 100644 --- a/src/theme/SearchBar/utils/searchConfig.js +++ b/src/theme/SearchBar/utils/searchConfig.js @@ -8,7 +8,7 @@ export const createDocTypeFilters = (docTypes) => { if (!docTypes) return []; const types = Array.isArray(docTypes) ? docTypes : [docTypes]; - return types.map(type => `doc_type:'${type}'`); + return types.map(type => `doc_type:${type}`); }; /**