Skip to content

Commit

Permalink
Update HtmlUtils.SanitizeHtml()
Browse files Browse the repository at this point in the history
  • Loading branch information
RickStrahl committed Sep 6, 2018
1 parent 9acb2fa commit 746f744
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 12 deletions.
4 changes: 2 additions & 2 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Westwind.Utilities Changelog

### 3.0.19
*September 5th, 2018**
### 3.0.20
*September 6th, 2018**

* **HtmlUtils.SanitizeHtml()**
RegEx based HTML sanitation that handles the most common script injection scenarios for `<script>`,`<iframe>`,`<form>` etc. tags, `javascript:` script embeds and `onXXX` DOM element event handlers.
Expand Down
69 changes: 69 additions & 0 deletions Westwind.Utilities.Test/SanitizeHtmlTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
using System;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Westwind.Utilities;

namespace Westwind.Utilities.Tests
{
[TestClass]
public class HtmlSanitizeTests
{
[TestMethod]
public void HtmlSanitizeScriptTags()
{
string html = "<div>User input with <ScRipt>alert('Gotcha');</ScRipt></div>";

var result = HtmlUtils.SanitizeHtml(html);

Console.WriteLine(result);
Assert.IsTrue(!result.Contains("<ScRipt>"));
}


[TestMethod]
public void HtmlSanitizeJavaScriptTags()
{
string html = "<div>User input with <a href=\"javascript: alert('Gotcha')\">Don't hurt me!<a/></div>";

var result = HtmlUtils.SanitizeHtml(html);

Console.WriteLine(result);
Assert.IsTrue(!result.Contains("javascript:"));
}

[TestMethod]
public void HtmlSanitizeJavaScriptTagsSingleQuotes()
{
string html = "<div>User input with <a href='javascript: alert(\"Gotcha\");'>Don't hurt me!<a/></div>";

var result = HtmlUtils.SanitizeHtml(html);

Console.WriteLine(result);
Assert.IsTrue(!result.Contains("javascript:"));
}

[TestMethod]
public void HtmlSanitizeJavaScriptTagsWithUnicodeQuotes()
{
string html = "<div>User input with <a href='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;:alert(\"javascript active\");'>Don't hurt me!<a/></div>";

var result = HtmlUtils.SanitizeHtml(html);

Console.WriteLine(result);
Assert.IsTrue(!result.Contains("&#106;&#97;&#118"));
}


[TestMethod]
public void HtmlSanitizeEventAttributes()
{
string html = "<div onmouseover=\"alert('Gotcha!')\">User input with " +
"<div onclick='alert(\"Gotcha!\");'>Don't hurt me!<div/>" +
"</div>";

var result = HtmlUtils.SanitizeHtml(html);

Console.WriteLine(result);
Assert.IsTrue(!result.Contains("onmouseover:") && !result.Contains("onclick"));
}
}
}
13 changes: 6 additions & 7 deletions Westwind.Utilities/Utilities/HtmlUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,11 @@ internal static string ResolveUrl(string originalUrl)
// Just to be sure fix up any double slashes
return newUrl;
#else
throw new ArgumentException("Invalid URL: Relative URL not allowed.");
throw new ArgumentException("Invalid URL: Relative URL not allowed.");
#endif
}
}

return originalUrl;
return originalUrl;
}


Expand All @@ -254,15 +254,14 @@ public static string HtmlAbstract(string html, int length)

static string HtmlSanitizeTagBlackList { get; } = "script|iframe|object|embed|form";

static Regex _RegExScript = new Regex(
$@"(<({HtmlSanitizeTagBlackList})\b[^<]*(?:(?!<\/({HtmlSanitizeTagBlackList}))<[^<]*)*<\/({HtmlSanitizeTagBlackList})>)",
RegexOptions.IgnoreCase | RegexOptions.Multiline);
static Regex _RegExScript = new Regex($@"(<({HtmlSanitizeTagBlackList})\b[^<]*(?:(?!<\/({HtmlSanitizeTagBlackList}))<[^<]*)*<\/({HtmlSanitizeTagBlackList})>)",
RegexOptions.IgnoreCase | RegexOptions.Multiline);

// strip javascript: and unicode representation of javascript:
// href='javascript:alert(\"gotcha\")'
// href='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;:alert(\"gotcha\");'
static Regex _RegExJavaScriptHref = new Regex(
@"<.*?(href|src|dynsrc|lowsrc)=.{0,20}((javascript:)|(&#106;&#97)).*?>",
@"<.*?(href|src|dynsrc|lowsrc)=.{0,20}((javascript:)|(&#)).*?>",
RegexOptions.IgnoreCase | RegexOptions.Multiline);

static Regex _RegExOnEventAttributes = new Regex(
Expand Down
5 changes: 2 additions & 3 deletions Westwind.Utilities/Westwind.Utilities.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net45;net40</TargetFrameworks>
<Version>3.0.20</Version>
<RuntimeIdentifiers>win7-x86;win7-x64</RuntimeIdentifiers>
<Authors>Rick Strahl</Authors>
<RequireLicenseAcceptance>false</RequireLicenseAcceptance>
Expand All @@ -25,9 +26,7 @@
<RepositoryType>Github</RepositoryType>
<Company>West Wind Technologies</Company>
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<Version>3.0.19</Version>
<AssemblyVersion>3.0.19</AssemblyVersion>
<FileVersion>3.0.19</FileVersion>

<RepositoryUrl>https://github.com/RickStrahl/Westwind.Utilities</RepositoryUrl>
</PropertyGroup>

Expand Down

0 comments on commit 746f744

Please sign in to comment.