From eb1a5d6ae722214f2f47f85635e837a812b13e7b Mon Sep 17 00:00:00 2001 From: Andrey Taritsyn Date: Sat, 6 May 2017 21:44:25 +0300 Subject: [PATCH] HTML and XHTML minifiers now support processing of CDATA sections outside the `script` and `style` tags --- README.md | 5 +- logo.png => images/WebMarkupMin_Logo.png | Bin .../WebMarkupMin.Core.nuspec | 3 +- nuget/WebMarkupMin.Core/readme.txt | 4 +- src/WebMarkupMin.Core/GenericHtmlMinifier.cs | 26 +++- src/WebMarkupMin.Core/Parsers/HtmlNodeType.cs | 1 + src/WebMarkupMin.Core/Parsers/HtmlParser.cs | 47 +++--- .../Parsers/MarkupParserBase.cs | 34 +++++ .../Parsers/MarkupParsingHandlersBase.cs | 16 ++ src/WebMarkupMin.Core/Parsers/XmlParser.cs | 32 ---- .../Parsers/XmlParsingHandlers.cs | 16 -- .../Html/MinificationTests.cs | 143 +++++++++++------- .../Html/ParsingTests.cs | 10 ++ 13 files changed, 208 insertions(+), 129 deletions(-) rename logo.png => images/WebMarkupMin_Logo.png (100%) diff --git a/README.md b/README.md index d6b7addb..bc0e1e69 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ -WebMarkupMin logo +Web Markup Minifier +=================== + +WebMarkupMin logo The **Web Markup Minifier** (abbreviated WebMarkupMin) - a .NET library that contains a set of markup minifiers. The objective of this project is to improve the performance of web applications by reducing the size of HTML, XHTML and XML code. diff --git a/logo.png b/images/WebMarkupMin_Logo.png similarity index 100% rename from logo.png rename to images/WebMarkupMin_Logo.png diff --git a/nuget/WebMarkupMin.Core/WebMarkupMin.Core.nuspec b/nuget/WebMarkupMin.Core/WebMarkupMin.Core.nuspec index 246b4f50..63fefbb5 100644 --- a/nuget/WebMarkupMin.Core/WebMarkupMin.Core.nuspec +++ b/nuget/WebMarkupMin.Core/WebMarkupMin.Core.nuspec @@ -18,7 +18,8 @@ Minification of markup produces by removing extra whitespaces, comments and redu Also supports minification of views of popular JavaScript template engines: KnockoutJS, Kendo UI MVVM and AngularJS 1.X. The Web Markup Minifier (abbreviated WebMarkupMin) is a .NET library that contains a set of markup minifiers. The objective of this project is to improve the performance of web applications by reducing the size of HTML, XHTML and XML code. - Added support of .NET Core 1.0.4. + 1. Added support of .NET Core 1.0.4; +2. HTML and XHTML minifiers now support processing of CDATA sections outside the `script` and `style` tags. Copyright (c) 2013-2017 Andrey Taritsyn - http://www.taritsyn.ru en-US WebMarkupMin Markup HTML XHTML XML Minification Minifier Minify Performance Optimization Compression diff --git a/nuget/WebMarkupMin.Core/readme.txt b/nuget/WebMarkupMin.Core/readme.txt index c1a325f2..eea99efd 100644 --- a/nuget/WebMarkupMin.Core/readme.txt +++ b/nuget/WebMarkupMin.Core/readme.txt @@ -46,7 +46,9 @@ ============= RELEASE NOTES ============= - Added support of .NET Core 1.0.4. + 1. Added support of .NET Core 1.0.4; + 2. HTML and XHTML minifiers now support processing of CDATA sections outside the + `script` and `style` tags. ============= DOCUMENTATION diff --git a/src/WebMarkupMin.Core/GenericHtmlMinifier.cs b/src/WebMarkupMin.Core/GenericHtmlMinifier.cs index 76abb662..53bc705c 100644 --- a/src/WebMarkupMin.Core/GenericHtmlMinifier.cs +++ b/src/WebMarkupMin.Core/GenericHtmlMinifier.cs @@ -84,17 +84,17 @@ internal sealed class GenericHtmlMinifier : IMarkupMinifier RegexOptions.RightToLeft); private static readonly Regex _beginCdataSectionRegex = new Regex( - @"^\s*\s*$", RegexOptions.RightToLeft); private static readonly Regex _styleBeginCdataSectionRegex = new Regex( - @"^\s*/\*\s*\s*\*/\s*$", RegexOptions.RightToLeft); private static readonly Regex _styleBeginMaxCompatibleCdataSectionRegex = new Regex( - @"^\s*\s*$", RegexOptions.RightToLeft); @@ -104,14 +104,13 @@ internal sealed class GenericHtmlMinifier : IMarkupMinifier RegexOptions.RightToLeft); private static readonly Regex _scriptBeginCdataSectionRegex = new Regex( - @"^\s*(?://[ \t\v]*|(?:\r?\n[ \t\v]*)?/\*\s*\]\]>\s*\*/)\s*$", RegexOptions.RightToLeft); private static readonly Regex _scriptBeginMaxCompatibleCdataSectionRegex = new Regex( @"^\s*(?:" + @"|(?:\r?\n[ \t\v]*)?/\*\s*\]\]>\s*\*/\s*-->)\s*$", RegexOptions.RightToLeft); @@ -324,6 +323,7 @@ internal sealed class GenericHtmlMinifier : IMarkupMinifier Comment = CommentHandler, IfConditionalComment = IfConditionalCommentHandler, EndIfConditionalComment = EndIfConditionalCommentHandler, + CdataSection = CdataSectionHandler, StartTag = StartTagHandler, EndTag = EndTagHandler, Text = TextHandler, @@ -688,6 +688,20 @@ private void CommentHandler(MarkupParsingContext context, string commentText) } } + /// + /// CDATA sections handler + /// + /// Markup parsing context + /// CDATA text + private void CdataSectionHandler(MarkupParsingContext context, string cdataText) + { + _currentNodeType = HtmlNodeType.CdataSection; + + _buffer.Add(""); + } + /// /// If conditional comments handler /// diff --git a/src/WebMarkupMin.Core/Parsers/HtmlNodeType.cs b/src/WebMarkupMin.Core/Parsers/HtmlNodeType.cs index 1088b13a..d2b0b294 100644 --- a/src/WebMarkupMin.Core/Parsers/HtmlNodeType.cs +++ b/src/WebMarkupMin.Core/Parsers/HtmlNodeType.cs @@ -11,6 +11,7 @@ internal enum HtmlNodeType : byte Comment, IfConditionalComment, EndIfConditionalComment, + CdataSection, StartTag, EndTag, Text, diff --git a/src/WebMarkupMin.Core/Parsers/HtmlParser.cs b/src/WebMarkupMin.Core/Parsers/HtmlParser.cs index 77846044..4aff84d0 100644 --- a/src/WebMarkupMin.Core/Parsers/HtmlParser.cs +++ b/src/WebMarkupMin.Core/Parsers/HtmlParser.cs @@ -202,14 +202,19 @@ public void Parse(string content) break; case '!': + int fourthCharPosition = thirdCharPosition + 1; + char fourthCharValue; + bool fourthCharExist = content.TryGetChar(fourthCharPosition, out fourthCharValue); + + if (!fourthCharExist) + { + break; + } + switch (thirdCharValue) { case '-': - int fourthCharPosition = thirdCharPosition + 1; - char fourthCharValue; - bool fourthCharExist = content.TryGetChar(fourthCharPosition, out fourthCharValue); - - if (fourthCharExist && fourthCharValue == '-') + if (fourthCharValue == '-') { // Comments int fifthCharPosition = fourthCharPosition + 1; @@ -247,21 +252,29 @@ public void Parse(string content) break; case '[': - // Remaining conditional comments - - // Hidden End If conditional comment (e.g. ) - isProcessed = ProcessHiddenEndIfComment(); - - if (!isProcessed) + if (fourthCharValue == 'C') { - // Revealed If conditional comment (e.g. ) - isProcessed = ProcessRevealedIfComment(); + // CDATA sections + isProcessed = ProcessCdataSection(); } - - if (!isProcessed) + else { - // Revealed End If conditional comment (e.g. ) - isProcessed = ProcessRevealedEndIfComment(); + // Remaining conditional comments + + // Hidden End If conditional comment (e.g. ) + isProcessed = ProcessHiddenEndIfComment(); + + if (!isProcessed) + { + // Revealed If conditional comment (e.g. ) + isProcessed = ProcessRevealedIfComment(); + } + + if (!isProcessed) + { + // Revealed End If conditional comment (e.g. ) + isProcessed = ProcessRevealedEndIfComment(); + } } break; diff --git a/src/WebMarkupMin.Core/Parsers/MarkupParserBase.cs b/src/WebMarkupMin.Core/Parsers/MarkupParserBase.cs index 268f94cf..42cab292 100644 --- a/src/WebMarkupMin.Core/Parsers/MarkupParserBase.cs +++ b/src/WebMarkupMin.Core/Parsers/MarkupParserBase.cs @@ -2,6 +2,7 @@ using System.Text.RegularExpressions; using WebMarkupMin.Core.Resources; +using WebMarkupMin.Core.Utilities; namespace WebMarkupMin.Core.Parsers { @@ -170,6 +171,39 @@ protected bool ProcessEndIgnoringCommentTag() _innerContext.NodeCoordinates, _innerContext.GetSourceFragment()); } + /// + /// Process a CDATA sections + /// + /// Result of processing (true - is processed; false - is not processed) + protected bool ProcessCdataSection() + { + bool isProcessed = false; + string content = _innerContext.SourceCode; + + if (content.CustomStartsWith("", cdataStartPosition, StringComparison.Ordinal); + + if (cdataEndPosition > cdataStartPosition) + { + string cdataText = content.Substring(cdataStartPosition + 9, + cdataEndPosition - cdataStartPosition - 9); + + var cdataSectionHandler = CommonHandlers.CdataSection; + if (cdataSectionHandler != null) + { + cdataSectionHandler(_context, cdataText); + } + + _innerContext.IncreasePosition(cdataEndPosition + 3 - cdataStartPosition); + isProcessed = true; + } + } + + return isProcessed; + } + #endregion } } \ No newline at end of file diff --git a/src/WebMarkupMin.Core/Parsers/MarkupParsingHandlersBase.cs b/src/WebMarkupMin.Core/Parsers/MarkupParsingHandlersBase.cs index d0ca4d6d..5a1d70c8 100644 --- a/src/WebMarkupMin.Core/Parsers/MarkupParsingHandlersBase.cs +++ b/src/WebMarkupMin.Core/Parsers/MarkupParsingHandlersBase.cs @@ -23,6 +23,15 @@ public CommentDelegate Comment set; } + /// + /// CDATA sections handler + /// + public CdataSectionDelegate CdataSection + { + get; + set; + } + /// /// Text handler /// @@ -56,6 +65,13 @@ public IgnoredFragmentDelegate IgnoredFragment /// Comment text public delegate void CommentDelegate(MarkupParsingContext context, string comment); + /// + /// CDATA sections delegate + /// + /// Markup parsing context + /// CDATA text + public delegate void CdataSectionDelegate(MarkupParsingContext context, string cdataText); + /// /// Text delegate /// diff --git a/src/WebMarkupMin.Core/Parsers/XmlParser.cs b/src/WebMarkupMin.Core/Parsers/XmlParser.cs index ba894734..76382863 100644 --- a/src/WebMarkupMin.Core/Parsers/XmlParser.cs +++ b/src/WebMarkupMin.Core/Parsers/XmlParser.cs @@ -246,38 +246,6 @@ private bool ProcessProcessingInstruction() return isProcessed; } - /// - /// Process a CDATA sections - /// - /// Result of processing (true - is processed; false - is not processed) - private bool ProcessCdataSection() - { - bool isProcessed = false; - string content = _innerContext.SourceCode; - - if (content.CustomStartsWith("", cdataStartPosition, StringComparison.Ordinal); - - if (cdataEndPosition > cdataStartPosition) - { - string cdataText = content.Substring(cdataStartPosition + 9, - cdataEndPosition - cdataStartPosition - 9); - - if (_handlers.CdataSection != null) - { - _handlers.CdataSection(_context, cdataText); - } - - _innerContext.IncreasePosition(cdataEndPosition + 3 - cdataStartPosition); - isProcessed = true; - } - } - - return isProcessed; - } - /// /// Process a start tag /// diff --git a/src/WebMarkupMin.Core/Parsers/XmlParsingHandlers.cs b/src/WebMarkupMin.Core/Parsers/XmlParsingHandlers.cs index bbb90577..ea7bb3e1 100644 --- a/src/WebMarkupMin.Core/Parsers/XmlParsingHandlers.cs +++ b/src/WebMarkupMin.Core/Parsers/XmlParsingHandlers.cs @@ -25,15 +25,6 @@ public ProcessingInstructionDelegate ProcessingInstruction set; } - /// - /// CDATA sections handler - /// - public CdataSectionDelegate CdataSection - { - get; - set; - } - /// /// Start tags handler /// @@ -78,13 +69,6 @@ public EmptyTagDelegate EmptyTag public delegate void ProcessingInstructionDelegate(MarkupParsingContext context, string instructionName, IList attributes); - /// - /// CDATA sections delegate - /// - /// Markup parsing context - /// CDATA text - public delegate void CdataSectionDelegate(MarkupParsingContext context, string cdataText); - /// /// Start tags delegate /// diff --git a/test/WebMarkupMin.Core.Test/Html/MinificationTests.cs b/test/WebMarkupMin.Core.Test/Html/MinificationTests.cs index 7f204ecf..965d2121 100644 --- a/test/WebMarkupMin.Core.Test/Html/MinificationTests.cs +++ b/test/WebMarkupMin.Core.Test/Html/MinificationTests.cs @@ -1374,7 +1374,30 @@ public void WhitespaceMinificationIsCorrect() const string targetOutput16D = targetOutput16C; - const string input17 = "
\n" + + const string input17 = "\n" + + " \n" + + " \n" + + " \n" + + " Vasya Pupkin <vasya-pupkin@mail.ru>\n" + + " \n" + + " \n" + + "" + ; + const string targetOutput17A = input17; + const string targetOutput17B = "" + + "" + + "\n" + + " \n" + + " Vasya Pupkin <vasya-pupkin@mail.ru>\n" + + " \n" + + " " + + "" + ; + const string targetOutput17C = targetOutput17B; + const string targetOutput17D = targetOutput17C; + + + const string input18 = "
\n" + " \n" + " \n" + " \n" + @@ -1397,8 +1420,8 @@ public void WhitespaceMinificationIsCorrect() " \n" + "
" ; - const string targetOutput17A = input17; - const string targetOutput17B = "
" + + const string targetOutput18A = input18; + const string targetOutput18B = "
" + "" + "" + "" + @@ -1421,7 +1444,7 @@ public void WhitespaceMinificationIsCorrect() " " + "
" ; - const string targetOutput17C = "
" + + const string targetOutput18C = "
" + "" + "" + "" + @@ -1444,10 +1467,10 @@ public void WhitespaceMinificationIsCorrect() "" + "
" ; - const string targetOutput17D = targetOutput17C; + const string targetOutput18D = targetOutput18C; - const string input18 = "
\n" + + const string input19 = "
\n" + "
Name:
\n" + "
John Doe \n" + "
\n\n" + @@ -1459,8 +1482,8 @@ public void WhitespaceMinificationIsCorrect() "\n" + "
" ; - const string targetOutput18A = input18; - const string targetOutput18B = "
" + + const string targetOutput19A = input19; + const string targetOutput19B = "
" + "
Name:
" + "
John Doe
" + "
Gender:
" + @@ -1469,7 +1492,7 @@ public void WhitespaceMinificationIsCorrect() "
Unknown
" + "
" ; - const string targetOutput18C = "
" + + const string targetOutput19C = "
" + "
Name:
" + "
John Doe
" + "
Gender:
" + @@ -1478,10 +1501,10 @@ public void WhitespaceMinificationIsCorrect() "
Unknown
" + "
" ; - const string targetOutput18D = targetOutput18C; + const string targetOutput19D = targetOutput19C; - const string input19 = "\n" + + const string input20 = "\n" + " \n" + " \n" + " \n" + @@ -1490,23 +1513,23 @@ public void WhitespaceMinificationIsCorrect() " \n" + "" ; - const string targetOutput19A = input19; - const string targetOutput19B = "" + + const string targetOutput20A = input20; + const string targetOutput20B = "" + " " + " " + " " + "" ; - const string targetOutput19C = "" + + const string targetOutput20C = "" + "" + "" + "" + "" ; - const string targetOutput19D = targetOutput19C; + const string targetOutput20D = targetOutput20C; - const string input20 = "\n" + + const string input21 = "\n" + " \n" + " \n" + " \n" + @@ -1515,43 +1538,43 @@ public void WhitespaceMinificationIsCorrect() " \n" + "" ; - const string targetOutput20A = input20; - const string targetOutput20B = "" + + const string targetOutput21A = input21; + const string targetOutput21B = "" + " " + " " + " " + "" ; - const string targetOutput20C = "" + + const string targetOutput21C = "" + "" + "" + "" + "" ; - const string targetOutput20D = targetOutput20C; + const string targetOutput21D = targetOutput21C; - const string input21 = "
\n" + + const string input22 = "
\n" + " \"LibSass\n" + "
Fig 1. - LibSass logo. \n" + "
\n" + "
" ; - const string targetOutput21A = input21; - const string targetOutput21B = "
" + + const string targetOutput22A = input22; + const string targetOutput22B = "
" + " \"LibSass " + "
Fig 1. - LibSass logo.
" + "
" ; - const string targetOutput21C = "
" + + const string targetOutput22C = "
" + "\"LibSass" + "
Fig 1. - LibSass logo.
" + "
" ; - const string targetOutput21D = targetOutput21C; + const string targetOutput22D = targetOutput22C; - const string input22 = "
\n" + + const string input23 = "\n" + "
\n" + " Personal data \n" + "\n" + @@ -1561,8 +1584,8 @@ public void WhitespaceMinificationIsCorrect() "
\n" + "
" ; - const string targetOutput22A = input22; - const string targetOutput22B = "
" + + const string targetOutput23A = input23; + const string targetOutput23B = " " + "
" + " Personal data " + "Name:
" + @@ -1571,7 +1594,7 @@ public void WhitespaceMinificationIsCorrect() "
" + "
" ; - const string targetOutput22C = "
" + + const string targetOutput23C = "" + "
" + "Personal data" + "Name:
" + @@ -1580,63 +1603,63 @@ public void WhitespaceMinificationIsCorrect() "
" + "
" ; - const string targetOutput22D = targetOutput22C; + const string targetOutput23D = targetOutput23C; - const string input23 = "\n" + + const string input24 = "\n" + " 漢 Kan \n" + " 字 ji \n" + "" ; - const string targetOutput23A = input23; - const string targetOutput23B = " " + + const string targetOutput24A = input24; + const string targetOutput24B = " " + "漢 Kan " + "字 ji " + "" ; - const string targetOutput23C = targetOutput23B; - const string targetOutput23D = "" + + const string targetOutput24C = targetOutput24B; + const string targetOutput24D = "" + "漢 Kan " + "字 ji" + "" ; - const string input24 = "\n" + + const string input25 = "\n" + " 漢 ( Kan ) \n" + " 字 ( ji ) \n" + "" ; - const string targetOutput24A = input24; - const string targetOutput24B = " " + + const string targetOutput25A = input25; + const string targetOutput25B = " " + "漢 ( Kan ) " + "字 ( ji ) " + "" ; - const string targetOutput24C = targetOutput24B; - const string targetOutput24D = "" + + const string targetOutput25C = targetOutput25B; + const string targetOutput25D = "" + "漢 ( Kan ) " + "字 ( ji )" + "" ; - const string input25 = "\n" + + const string input26 = "\n" + " 東\n" + " \n" + " とう \n" + " きょう \n" + "" ; - const string targetOutput25A = input25; - const string targetOutput25B = " " + + const string targetOutput26A = input26; + const string targetOutput26B = " " + "東 " + " " + " とう " + " きょう " + ""; - const string targetOutput25C = targetOutput25B; - const string targetOutput25D = "" + + const string targetOutput26C = targetOutput26B; + const string targetOutput26D = "" + "東 " + " " + "とう " + @@ -1645,7 +1668,7 @@ public void WhitespaceMinificationIsCorrect() ; - const string input26 = "\n" + + const string input27 = "\n" + " ♥\n" + " : \n" + " Heart \n" + @@ -1672,8 +1695,8 @@ public void WhitespaceMinificationIsCorrect() " . \n" + "" ; - const string targetOutput26A = input26; - const string targetOutput26B = " " + + const string targetOutput27A = input27; + const string targetOutput27B = " " + "♥ " + ": " + " Heart " + @@ -1700,8 +1723,8 @@ public void WhitespaceMinificationIsCorrect() ". " + "" ; - const string targetOutput26C = targetOutput26B; - const string targetOutput26D = "" + + const string targetOutput27C = targetOutput27B; + const string targetOutput27D = "" + "♥ " + ": " + "Heart " + @@ -1730,21 +1753,21 @@ public void WhitespaceMinificationIsCorrect() ; - const string input27 = "\n" + + const string input28 = "\n" + " \n" + " jiù jīn shān \n" + " Сан-Франциско \n" + "" ; - const string targetOutput27A = input27; - const string targetOutput27B = " " + + const string targetOutput28A = input28; + const string targetOutput28B = " " + " " + " jiù jīn shān " + " Сан-Франциско " + "" ; - const string targetOutput27C = targetOutput27B; - const string targetOutput27D = "" + + const string targetOutput28C = targetOutput28B; + const string targetOutput28D = "" + " " + "jiù jīn shān " + "Сан-Франциско" + @@ -1887,6 +1910,11 @@ public void WhitespaceMinificationIsCorrect() string output27C = mediumRemovingWhitespaceMinifier.Minify(input27).MinifiedContent; string output27D = aggressiveRemovingWhitespaceMinifier.Minify(input27).MinifiedContent; + string output28A = keepingWhitespaceMinifier.Minify(input28).MinifiedContent; + string output28B = safeRemovingWhitespaceMinifier.Minify(input28).MinifiedContent; + string output28C = mediumRemovingWhitespaceMinifier.Minify(input28).MinifiedContent; + string output28D = aggressiveRemovingWhitespaceMinifier.Minify(input28).MinifiedContent; + // Assert Assert.Equal(targetOutput1A, output1A); Assert.Equal(targetOutput1B, output1B); @@ -2022,6 +2050,11 @@ public void WhitespaceMinificationIsCorrect() Assert.Equal(targetOutput27B, output27B); Assert.Equal(targetOutput27C, output27C); Assert.Equal(targetOutput27D, output27D); + + Assert.Equal(targetOutput28A, output28A); + Assert.Equal(targetOutput28B, output28B); + Assert.Equal(targetOutput28C, output28C); + Assert.Equal(targetOutput28D, output28D); } #endregion diff --git a/test/WebMarkupMin.Core.Test/Html/ParsingTests.cs b/test/WebMarkupMin.Core.Test/Html/ParsingTests.cs index 2446e158..399ea245 100644 --- a/test/WebMarkupMin.Core.Test/Html/ParsingTests.cs +++ b/test/WebMarkupMin.Core.Test/Html/ParsingTests.cs @@ -186,14 +186,24 @@ public void ParsingXmlBasedTagsIsCorrect() "" + "" ; + const string input3 = "" + + "x<y" + + "+" + + "3" + + "=" + + "x<y3" + + "" + ; // Act string output1 = minifier.Minify(input1).MinifiedContent; string output2 = minifier.Minify(input2).MinifiedContent; + string output3 = minifier.Minify(input3).MinifiedContent; // Assert Assert.Equal(input1, output1); Assert.Equal(input2, output2); + Assert.Equal(input3, output3); } [Fact]