Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[6.0] Fix a few issues with infinite recursions if the content contains cyclic curation #905

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ public struct GeneratedCurationWriter {
for (usr, reference) in context.documentationCache.referencesBySymbolID {
// Filter out symbols that aren't in the specified sub hierarchy.
if symbolLink != nil || depthLimit != nil {
guard reference == curationCrawlRoot || context.pathsTo(reference).contains(where: { path in path.suffix(depthLimit ?? .max).contains(curationCrawlRoot)}) else {
guard reference == curationCrawlRoot || context.finitePaths(to: reference).contains(where: { path in path.suffix(depthLimit ?? .max).contains(curationCrawlRoot)}) else {
continue
}
}
Expand Down
3 changes: 1 addition & 2 deletions Sources/SwiftDocC/Indexing/Navigator/NavigatorIndex.swift
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,7 @@ public class NavigatorIndex {
}

/// Indicates the page type of a given item inside the tree.
/// - Note: This information is stored as UInt8 to decrease the required size to store it and make
/// the comparision faster between types.
/// - Note: This information is stored as `UInt8` to decrease the required size to store it and make the comparison faster between types.
public enum PageType: UInt8 {
case root = 0
case article = 1
Expand Down
50 changes: 42 additions & 8 deletions Sources/SwiftDocC/Indexing/Navigator/NavigatorTree.swift
Original file line number Diff line number Diff line change
Expand Up @@ -480,20 +480,54 @@ public class NavigatorTree {

/// Copy the current node and children to new instances preserving the node item.
public func copy() -> NavigatorTree.Node {
// DocC detects cyclic curation and avoids adding it to the topic graph.
// However, the navigator is based on the render node's topic sections which still has the cycle.
//
// Because the navigator nodes are shared instances, the cycle needs to be broken at each occurrence to correctly unroll it.
// For example, consider this hierarchy:
//
// 1 2
// │ │
// ▼ ▼
// 3 ────▶ 4
// ▲ │
// └── 5 ◀─┘
//
// When approaching the cycle from `1`, it unrolls as `1, 3, 4, 5` but when approaching it from `2` it unrolls as `2, 4, 5, 3`.
// This ensures a consistent structure for the navigation tree, no matter which render node was indexed first.
//
// Alternatively we could process the entire graph and break the cycle based on which node has the shortest path from the root.
// However, because DocC already warns about the cyclic curation, it seem sufficient to create a consistent navigation tree,
// even if it's not as small as it could be if we were using a more sophisticated graph algorithm.

return _copy([])
}

/// Private version of the logic to copy the current node and children to new instances preserving the node item.
/// - Parameter hierarchy: The set containing the parent items to avoid entering in an infinite loop.
private func _copy(_ hierarchy: Set<NavigatorItem>) -> NavigatorTree.Node {
/// The private copy implementation which tracks the already encountered items to avoid an infinite recursion.
private func _copy(_ seen: Set<NavigatorItem>) -> NavigatorTree.Node {
let mirror = NavigatorTree.Node(item: item, bundleIdentifier: bundleIdentifier)
guard !hierarchy.contains(item) else { return mirror } // Avoid to enter in an infity loop.
var updatedHierarchy = hierarchy
if let parentItem = parent?.item { updatedHierarchy.insert(parentItem) }
children.forEach { (child) in
let childCopy = child._copy(updatedHierarchy)
guard !seen.contains(item) else { return mirror } // Avoid an infinite recursion.
var seen = seen
seen.insert(item)

// Avoid repeating children that have already occurred in this path through the navigation hierarchy.
let childrenNotAlreadyEncountered = children.filter { !seen.contains($0.item) }
for (child, indexAfter) in zip(childrenNotAlreadyEncountered, 1...) {
// If the child is a group marker, ensure that the group is not empty by checking that it's followed by a non-group.
// If we didn't do this, we could end up with empty groups if all the children had already been encountered higher up in the hierarchy.
if child.item.pageType == NavigatorIndex.PageType.groupMarker.rawValue {
guard indexAfter < childrenNotAlreadyEncountered.endIndex,
childrenNotAlreadyEncountered[indexAfter].item.pageType != NavigatorIndex.PageType.groupMarker.rawValue
else {
// This group is empty. Skip copying it.
continue
}
// This group is not empty. Include a copy of it
}
let childCopy = child._copy(seen)
mirror.add(child: childCopy)
}

return mirror
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
This source file is part of the Swift.org open source project

Copyright (c) 2024 Apple Inc. and the Swift project authors
Licensed under Apache License v2.0 with Runtime Library Exception

See https://swift.org/LICENSE.txt for license information
See https://swift.org/CONTRIBUTORS.txt for Swift project authors
*/

extension DocumentationContext {
/// Options that configure how the context produces node breadcrumbs.
struct PathOptions: OptionSet {
let rawValue: Int

/// Prefer a technology as the canonical path over a shorter path.
static let preferTechnologyRoot = PathOptions(rawValue: 1 << 0)
}

/// Finds all finite (acyclic) paths, also called "breadcrumbs", to the given reference in the topic graph.
///
/// Each path is a list of references that describe a walk through the topic graph from a leaf node up to, but not including, the given `reference`.
///
/// The first path is the canonical path to the node. The other paths are sorted by increasing length (number of components).
///
/// > Note:
/// If all paths from the given reference are infinite (cycle back on themselves) then this function will return an empty list, because there are no _finite_ paths in the topic graph from that reference.
///
/// - Parameters:
/// - reference: The reference to find paths to.
/// - options: Options for how the context produces node breadcrumbs.
/// - Returns: A list of finite paths to the given reference in the topic graph.
func finitePaths(to reference: ResolvedTopicReference, options: PathOptions = []) -> [[ResolvedTopicReference]] {
topicGraph.reverseEdgesGraph
.allFinitePaths(from: reference)
// Graph traversal typically happens from the starting point outwards, but the callers of `finitePaths(to:options:)`
// expect paths going inwards from the leaves to the starting point, excluding the starting point itself.
// To match the caller's expectations we remove the starting point and then flip the paths.
.map { $0.dropFirst().reversed() }
.sorted { (lhs, rhs) -> Bool in
// Order a path rooted in a technology as the canonical one.
if options.contains(.preferTechnologyRoot), let first = lhs.first {
return try! entity(with: first).semantic is Technology
}

return breadcrumbsAreInIncreasingOrder(lhs, rhs)
}
}

/// Finds the shortest finite (acyclic) path, also called "breadcrumb", to the given reference in the topic graph.
///
/// The path is a list of references that describe a walk through the topic graph from a leaf node up to, but not including, the given `reference`.
///
/// > Note:
/// If all paths from the given reference are infinite (cycle back on themselves) then this function will return `nil`, because there are no _finite_ paths in the topic graph from that reference.
///
/// - Parameter reference: The reference to find the shortest path to.
/// - Returns: The shortest path to the given reference, or `nil` if all paths to the reference are infinite (contain cycles).
func shortestFinitePath(to reference: ResolvedTopicReference) -> [ResolvedTopicReference]? {
topicGraph.reverseEdgesGraph
.shortestFinitePaths(from: reference)
// Graph traversal typically happens from the starting point outwards, but the callers of `shortestFinitePaths(to:)`
// expect paths going inwards from the leaves to the starting point, excluding the starting point itself.
// To match the caller's expectations we remove the starting point and then flip the paths.
.map { $0.dropFirst().reversed() }
.min(by: breadcrumbsAreInIncreasingOrder)
}

/// Finds all the reachable root node references from the given reference.
///
/// > Note:
/// If all paths from the given reference are infinite (cycle back on themselves) then this function will return an empty set, because there are no reachable roots in the topic graph from that reference.
///
/// - Parameter reference: The reference to find reachable root node references from.
/// - Returns: The references of the root nodes that are reachable fro the given reference, or `[]` if all paths from the reference are infinite (contain cycles).
func reachableRoots(from reference: ResolvedTopicReference) -> Set<ResolvedTopicReference> {
topicGraph.reverseEdgesGraph.reachableLeafNodes(from: reference)
}
}

/// Compares two breadcrumbs for sorting so that the breadcrumb with fewer components come first and breadcrumbs with the same number of components are sorted alphabetically.
private func breadcrumbsAreInIncreasingOrder(_ lhs: [ResolvedTopicReference], _ rhs: [ResolvedTopicReference]) -> Bool {
// If the breadcrumbs have the same number of components, sort alphabetically to produce stable results.
guard lhs.count != rhs.count else {
return lhs.map({ $0.path }).joined(separator: ",") < rhs.map({ $0.path }).joined(separator: ",")
}
// Otherwise, sort by the number of breadcrumb components.
return lhs.count < rhs.count
}

119 changes: 41 additions & 78 deletions Sources/SwiftDocC/Infrastructure/DocumentationContext.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2247,7 +2247,7 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
let automaticallyCurated = autoCurateSymbolsInTopicGraph()

// Crawl the rest of the symbols that haven't been crawled so far in hierarchy pre-order.
allCuratedReferences = try crawlSymbolCuration(in: automaticallyCurated.map(\.child), bundle: bundle, initial: allCuratedReferences)
allCuratedReferences = try crawlSymbolCuration(in: automaticallyCurated.map(\.symbol), bundle: bundle, initial: allCuratedReferences)

// Remove curation paths that have been created automatically above
// but we've found manual curation for in the second crawl pass.
Expand Down Expand Up @@ -2306,19 +2306,18 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
/// call `removeUnneededAutomaticCuration(_:)` which walks the list of automatic curations and removes
/// the parent <-> child topic graph relationships that have been obsoleted.
///
/// - Parameter automaticallyCurated: A list of topics that have been automatically curated.
func removeUnneededAutomaticCuration(_ automaticallyCurated: [(child: ResolvedTopicReference, parent: ResolvedTopicReference)]) {
for pair in automaticallyCurated {
let paths = pathsTo(pair.child)

// Collect all current unique parents of the child.
let parents = Set(paths.map({ $0.last?.path }))

// Check if the topic has multiple curation paths
guard parents.count > 1 else { continue }

// The topic has been manually curated, remove the automatic curation now.
topicGraph.removeEdge(fromReference: pair.parent, toReference: pair.child)
/// - Parameter automaticallyCurated: A list of automatic curation records.
func removeUnneededAutomaticCuration(_ automaticallyCurated: [AutoCuratedSymbolRecord]) {
// It might look like it would be correct to check `topicGraph.nodes[symbol]?.isManuallyCurated` here,
// but that would incorrectly remove the only parent if the manual curation and the automatic curation was the same.
//
// Similarly, it might look like it would be correct to only check `parents(of: symbol).count > 1` here,
// but that would incorrectly remove the automatic curation for symbols with different language representations with different parents.
for (symbol, parent, counterpartParent) in automaticallyCurated where parents(of: symbol).count > (counterpartParent != nil ? 2 : 1) {
topicGraph.removeEdge(fromReference: parent, toReference: symbol)
if let counterpartParent {
topicGraph.removeEdge(fromReference: counterpartParent, toReference: symbol)
}
}
}

Expand Down Expand Up @@ -2359,20 +2358,39 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
}
}

typealias AutoCuratedSymbolRecord = (symbol: ResolvedTopicReference, parent: ResolvedTopicReference, counterpartParent: ResolvedTopicReference?)

/// Curate all remaining uncurated symbols under their natural parent from the symbol graph.
///
/// This will include all symbols that were not manually curated by the documentation author.
/// - Returns: An ordered list of symbol references that have been added to the topic graph automatically.
private func autoCurateSymbolsInTopicGraph() -> [(child: ResolvedTopicReference, parent: ResolvedTopicReference)] {
var automaticallyCuratedSymbols = [(ResolvedTopicReference, ResolvedTopicReference)]()
linkResolver.localResolver.traverseSymbolAndParentPairs { reference, parentReference in
private func autoCurateSymbolsInTopicGraph() -> [AutoCuratedSymbolRecord] {
var automaticallyCuratedSymbols = [AutoCuratedSymbolRecord]()
linkResolver.localResolver.traverseSymbolAndParents { reference, parentReference, counterpartParentReference in
guard let topicGraphNode = topicGraph.nodeWithReference(reference),
let topicGraphParentNode = topicGraph.nodeWithReference(parentReference),
// Check that the node hasn't got any parents from manual curation
// Check that the node isn't already manually curated
!topicGraphNode.isManuallyCurated
else { return }

// Check that the symbol doesn't already have parent's that aren't either language representation's hierarchical parent.
// This for example happens for default implementation and symbols that are requirements of protocol conformances.
guard parents(of: reference).allSatisfy({ $0 == parentReference || $0 == counterpartParentReference }) else {
return
}

guard let topicGraphParentNode = topicGraph.nodeWithReference(parentReference) else {
preconditionFailure("Node with reference \(parentReference.absoluteString) exist in link resolver but not in topic graph.")
}
topicGraph.addEdge(from: topicGraphParentNode, to: topicGraphNode)
automaticallyCuratedSymbols.append((child: reference, parent: parentReference))

if let counterpartParentReference {
guard let topicGraphCounterpartParentNode = topicGraph.nodeWithReference(counterpartParentReference) else {
preconditionFailure("Node with reference \(counterpartParentReference.absoluteString) exist in link resolver but not in topic graph.")
}
topicGraph.addEdge(from: topicGraphCounterpartParentNode, to: topicGraphNode)
}
// Collect a single automatic curation record for both language representation parents.
automaticallyCuratedSymbols.append((reference, parentReference, counterpartParentReference))
}
return automaticallyCuratedSymbols
}
Expand Down Expand Up @@ -2736,15 +2754,9 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
return topicGraph.nodes[reference]?.title
}

/**
Traverse the Topic Graph breadth-first, starting at the given reference.
*/
func traverseBreadthFirst(from reference: ResolvedTopicReference, _ observe: (TopicGraph.Node) -> TopicGraph.Traversal) {
guard let node = topicGraph.nodeWithReference(reference) else {
return
}

topicGraph.traverseBreadthFirst(from: node, observe)
/// Returns a sequence that traverses the topic graph in breadth first order from a given reference, without visiting the same node more than once.
func breadthFirstSearch(from reference: ResolvedTopicReference) -> some Sequence<TopicGraph.Node> {
topicGraph.breadthFirstSearch(from: reference)
}

/**
Expand Down Expand Up @@ -2866,55 +2878,6 @@ public class DocumentationContext: DocumentationContextDataProviderDelegate {
.map { $0.reference }
}

/// Options to consider when producing node breadcrumbs.
struct PathOptions: OptionSet {
let rawValue: Int

/// The node is a technology page; sort the path to a technology as canonical.
static let preferTechnologyRoot = PathOptions(rawValue: 1 << 0)
}

/// Finds all paths (breadcrumbs) to the given node reference.
///
/// Each path is an array of references to the symbols from the module symbol to the current one.
/// The first path in the array is always the canonical path to the symbol.
///
/// - Parameters:
/// - reference: The reference to build that paths to.
/// - currentPathToNode: Used for recursion - an accumulated path to "continue" working on.
/// - Returns: A list of paths to the current reference in the topic graph.
func pathsTo(_ reference: ResolvedTopicReference, currentPathToNode: [ResolvedTopicReference] = [], options: PathOptions = []) -> [[ResolvedTopicReference]] {
let nodeParents = parents(of: reference)
guard !nodeParents.isEmpty else {
// The path ends with this node
return [currentPathToNode]
}
var results = [[ResolvedTopicReference]]()
for parentReference in nodeParents {
let parentPaths = pathsTo(parentReference, currentPathToNode: [parentReference] + currentPathToNode)
results.append(contentsOf: parentPaths)
}

// We are sorting the breadcrumbs by the path distance to the documentation root
// so that the first element is the shortest path that we are using as canonical.
results.sort { (lhs, rhs) -> Bool in
// Order a path rooted in a technology as the canonical one.
if options.contains(.preferTechnologyRoot), let first = lhs.first {
return try! entity(with: first).semantic is Technology
}

// If the breadcrumbs have equal amount of components
// sort alphabetically to produce stable paths order.
guard lhs.count != rhs.count else {
return lhs.map({ $0.path }).joined(separator: ",") < rhs.map({ $0.path }).joined(separator: ",")
}
// Order by the length of the breadcrumb.
return lhs.count < rhs.count
}

return results
}

func dumpGraph() -> String {
return topicGraph.nodes.values
.filter { parents(of: $0.reference).isEmpty }
Expand Down
Loading