Skip to content

Commit

Permalink
Merge pull request #1134 from lahma/issue/#584
Browse files Browse the repository at this point in the history
Improve QuerySelectorAll performance
  • Loading branch information
FlorianRappl committed Aug 20, 2023
2 parents 108e60d + 08c2f22 commit c80b834
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 42 deletions.
10 changes: 4 additions & 6 deletions src/AngleSharp.Benchmarks/AngleSharp.Benchmarks.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net472</TargetFramework>
<TargetFrameworks>net472;net6.0</TargetFrameworks>
<SignAssembly>false</SignAssembly>
</PropertyGroup>

Expand All @@ -10,16 +10,14 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.13.5" />
<PackageReference Include="CsQuery" Version="1.3.5-beta5" />
<PackageReference Include="BenchmarkDotNet" Version="0.13.7" />
<PackageReference Include="CsQuery" Version="1.3.5-beta5" Condition=" '$(TargetFramework)' == 'net472' " />
<PackageReference Include="HtmlAgilityPack" Version="1.11.46" />
<PackageReference Include="System.Net.Http" Version="4.3.4" />
</ItemGroup>

<ItemGroup>
<None Update="page.html">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="page.html" CopyToOutputDirectory="PreserveNewest" />
</ItemGroup>

</Project>
11 changes: 8 additions & 3 deletions src/AngleSharp.Benchmarks/ParserBenchmark.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
using System.Collections.Generic;
using System.Text;
using AngleSharp.Html.Parser;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;

#if NETFRAMEWORK
using CsQuery;
using CsQuery.ExtensionMethods.Internal;
using CsQuery.HtmlParser;
#endif

using HtmlAgilityPack;

namespace AngleSharp.Benchmarks
Expand All @@ -20,7 +23,7 @@ public IEnumerable<UrlTest> GetSources()
var websites = new UrlTests(
".html",
true);

websites.Include(
"http://www.amazon.com",
"http://www.blogspot.com",
Expand Down Expand Up @@ -78,14 +81,16 @@ public IEnumerable<UrlTest> GetSources()

[ParamsSource(nameof(GetSources))] public UrlTest UrlTest { get; set; }

#if NETFRAMEWORK
[Benchmark]
public void CsQuery()
{
var factory = new ElementFactory(DomIndexProviders.Simple);

using var stream = UrlTest.Source.ToStream();
factory.Parse(stream, Encoding.UTF8);
factory.Parse(stream, System.Text.Encoding.UTF8);
}
#endif

[Benchmark]
public void HTMLAgilityPack()
Expand Down
10 changes: 10 additions & 0 deletions src/AngleSharp.Benchmarks/SelectorBenchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
using AngleSharp.Html.Parser;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;

#if NETFRAMEWORK
using CsQuery;
#endif

namespace AngleSharp.Benchmarks
{
Expand All @@ -14,14 +17,19 @@ public class SelectorBenchmark
{
private static readonly HtmlParser angleSharpParser = new HtmlParser();
private IDocument angleSharpDocument;

#if NETFRAMEWORK
private CQ cqDocument;
#endif

[GlobalSetup]
public void GlobalSetup()
{
var pageContent = File.ReadAllText("page.html");
angleSharpDocument = angleSharpParser.ParseDocument(pageContent);
#if NETFRAMEWORK
cqDocument = CQ.CreateDocument(pageContent);
#endif
}

[ParamsSource(nameof(GetSelectors))]
Expand Down Expand Up @@ -73,11 +81,13 @@ public void GlobalSetup()
"div > p > a"
};

#if NETFRAMEWORK
[Benchmark]
public void CsQuery()
{
cqDocument.Select(Selector);
}
#endif

[Benchmark]
public void AngleSharp()
Expand Down
5 changes: 3 additions & 2 deletions src/AngleSharp/Css/Dom/Internal/ComplexSelector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,12 @@ public void AppendSelector(ISelector selector, CssCombinator combinator)

private Boolean MatchCascade(Int32 pos, IElement element, IElement? scope)
{
var newElements = _combinators[pos].Transform!(element);
var combinatorSelector = _combinators[pos];
var newElements = combinatorSelector.Transform!(element);

foreach (var newElement in newElements)
{
if (_combinators[pos].Selector.Match(newElement, scope) && (pos == 0 || MatchCascade(pos - 1, newElement, scope)))
if (combinatorSelector.Selector.Match(newElement, scope) && (pos == 0 || MatchCascade(pos - 1, newElement, scope)))
{
return true;
}
Expand Down
41 changes: 31 additions & 10 deletions src/AngleSharp/Css/Dom/SelectorExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,19 @@ public static class SelectorExtensions
/// <returns>The resulting element or null.</returns>
public static IElement? MatchAny(this ISelector selector, IEnumerable<IElement> elements, IElement? scope)
{
var stack = new Stack<INode>();
foreach (var element in elements)
{
foreach (var descendentAndSelf in element.DescendentsAndSelf<IElement>())
stack.Clear();
var nodes = element.GetDescendantsAndSelf(
stack,
filter: static (node, state) => node is IElement e && state.Selector.Match(e, state.Scope),
state: new SelectorState(selector, scope));

var enumerator = nodes.GetEnumerator();
if (enumerator.MoveNext())
{
if (selector.Match(descendentAndSelf, scope))
{
return descendentAndSelf;
}
return (IElement?) enumerator.Current;
}
}

Expand Down Expand Up @@ -59,16 +64,32 @@ public static IHtmlCollection<IElement> MatchAll(this ISelector selector, IEnume

private static void MatchAll(this ISelector selector, IEnumerable<IElement> elements, IElement? scope, List<IElement> result)
{
var stack = new Stack<INode>();
foreach (var element in elements)
{
foreach (var descendentAndSelf in element.DescendentsAndSelf<IElement>())
stack.Clear();
var nodes = element.GetDescendantsAndSelf(
stack,
filter: static (node, state) => node is IElement e && state.Selector.Match(e, state.Scope),
state: new SelectorState(selector, scope));

foreach (var descendentAndSelf in nodes)
{
if (selector.Match(descendentAndSelf, scope))
{
result.Add(descendentAndSelf);
}
result.Add((IElement) descendentAndSelf);
}
}
}

private readonly struct SelectorState
{
public readonly ISelector Selector;
public readonly IElement? Scope;

public SelectorState(ISelector selector, IElement? scope)
{
Selector = selector;
Scope = scope;
}
}
}
}
15 changes: 13 additions & 2 deletions src/AngleSharp/Dom/Internal/NodeList.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ namespace AngleSharp.Dom
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;

/// <summary>
/// Represents a list of Node instances or nodes.
Expand Down Expand Up @@ -34,17 +35,27 @@ internal NodeList()

public Node this[Int32 index]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => _entries[index];
[MethodImpl(MethodImplOptions.AggressiveInlining)]
set => _entries[index] = value;
}

INode INodeList.this[Int32 index] => this[index];
INode INodeList.this[Int32 index]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => this[index];
}

#endregion

#region Properties

public Int32 Length => _entries.Count;
public Int32 Length
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => _entries.Count;
}

#endregion

Expand Down
40 changes: 35 additions & 5 deletions src/AngleSharp/Dom/NodeExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,49 @@ public static Boolean IsDescendantOf(this INode node, INode parent)
/// <returns>An iterator over all descendants and itself.</returns>
public static IEnumerable<INode> GetDescendantsAndSelf(this INode parent)
{
var stack = new Stack<INode>();
return GetDescendantsAndSelf<Object?>(parent, new Stack<INode>(), null, null);
}

/// <summary>
/// Gets the descendant nodes and itself of the provided parent, in tree order.
/// </summary>
/// <param name="parent">The parent of the descendants.</param>
/// <param name="stack">Stack instance to be used (allows reuse).</param>
/// <param name="filter">Optional filter to run against items.</param>
/// <param name="state">Optional state to help with filtering.</param>
/// <returns>An iterator over all descendants and itself.</returns>
internal static IEnumerable<INode> GetDescendantsAndSelf<TState>(this INode parent, Stack<INode> stack, Func<INode, TState?, Boolean>? filter = null, TState? state = default)
{
stack.Push(parent);

while (stack.Count > 0)
{
var next = stack.Pop();
yield return next;

var length = next.ChildNodes.Length;
if (filter == null || filter(next, state))
{
yield return next;
}

var childNodes = next.ChildNodes;

while (length > 0)
// we only have one implementation
if (childNodes is NodeList nodeList)
{
var length = nodeList.Length;
while (length > 0)
{
stack.Push(nodeList[--length]);
}
}
else
{
stack.Push(next.ChildNodes[--length]);
// unlikely virtual dispatch
var length = childNodes.Length;
while (length > 0)
{
stack.Push(childNodes[--length]);
}
}
}
}
Expand Down
Loading

0 comments on commit c80b834

Please sign in to comment.