Added safeUrl list that is used to add temporary urls that are genera…

…ted by the code so that they are automaticly whitelisted
Sicos1977 · Mar 10, 2021 · cfa5746 · cfa5746
1 parent c536733
commit cfa5746
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 18 deletions.
diff --git a/ChromeHtmlToPdfLib/Browser.cs b/ChromeHtmlToPdfLib/Browser.cs
@@ -119,18 +119,19 @@ internal Browser(Uri browser, Stream logStream)
         /// <param name="mediaLoadTimeout">When set a timeout will be started after the DomContentLoaded
         ///     event has fired. After a timeout the NavigateTo method will exit as if the page
         ///     has been completely loaded</param>
-        /// <param name="urlBlacklist">A list of URL's that need to be blocked (use * as a wildcard)</param>
+        /// <param name="urlBlacklist">A list with URL's that need to be blocked (use * as a wildcard)</param>
+        /// <param name="safeUrls">A list with URL's that are safe to load</param>
         /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception>
         /// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception>
         public void NavigateTo(
             Uri uri,
+            List<string> safeUrls,
             CountdownTimer countdownTimer = null,
             int? mediaLoadTimeout = null,
             List<string> urlBlacklist = null)
         {
             var waitEvent = new ManualResetEvent(false);
             var mediaLoadTimeoutCancellationTokenSource = new CancellationTokenSource();
-            var absoluteUri = uri.AbsoluteUri.Substring(0, uri.AbsoluteUri.LastIndexOf('/') + 1);
             var navigationError = string.Empty;
             var waitforNetworkIdle = false;
             var mediaTimeoutTaskSet = false;
@@ -147,11 +148,11 @@ internal Browser(Uri browser, Stream logStream)
                         var fetch = Fetch.FromJson(data);
                         var requestId = fetch.Params.RequestId;
                         var url = fetch.Params.Request.Url;
+                        var isSafeUrl = safeUrls.Contains(url);
 
-                        if (!RegularExpression.IsRegExMatch(urlBlacklist, url, out var matchedPattern) ||
-                            url.StartsWith(absoluteUri, StringComparison.InvariantCultureIgnoreCase))
+                        if (!RegularExpression.IsRegExMatch(urlBlacklist, url, out var matchedPattern) || isSafeUrl)
                         {
-                            WriteToLog($"The url '{url}' has been allowed");
+                            WriteToLog($"The url '{url}' has been allowed{(isSafeUrl ? " because it is on the safe url list" : string.Empty)}");
                             var fetchContinue = new Message {Method = "Fetch.continueRequest"};
                             fetchContinue.Parameters.Add("requestId", requestId);
                             _pageConnection.SendAsync(fetchContinue).GetAwaiter();

diff --git a/ChromeHtmlToPdfLib/ChromeHtmlToPdfLib.csproj b/ChromeHtmlToPdfLib/ChromeHtmlToPdfLib.csproj
@@ -2,11 +2,11 @@
 
   <PropertyGroup>
     <TargetFrameworks>netcoreapp3.0;netstandard2.0</TargetFrameworks>
-    <Version>2.1.11</Version>
+    <Version>2.1.12</Version>
     <PackageLicenseExpression>MIT</PackageLicenseExpression>
     <Description>ChromeHtmlToPdf is a 100% managed C# .NETStandard 2.0 library that can be used to convert HTML to PDF format with the use of Google Chrome</Description>
     <Copyright>(C)2017-2021 Kees van Spelde</Copyright>
-    <PackageReleaseNotes>- Make pagesettings object clonable</PackageReleaseNotes>
+    <PackageReleaseNotes>- Added safeUrl list that is used to add temporary urls that are generated by the code so that they are automaticly whitelisted</PackageReleaseNotes>
     <PackageProjectUrl>https://github.com/Sicos1977/ChromeHtmlToPdf</PackageProjectUrl>
     <RepositoryUrl>https://github.com/Sicos1977/ChromeHtmlToPdf</RepositoryUrl>
     <RepositoryType>GitHub</RepositoryType>
@@ -19,8 +19,8 @@
     <Company>Magic-Sessions</Company>
     <SignAssembly>true</SignAssembly>
     <AssemblyOriginatorKeyFile>ChromeHtmlToPdf.snk</AssemblyOriginatorKeyFile>
-    <AssemblyVersion>2.1.11.0</AssemblyVersion>
-    <FileVersion>2.1.11.0</FileVersion>
+    <AssemblyVersion>2.1.12.0</AssemblyVersion>
+    <FileVersion>2.1.12.0</FileVersion>
   </PropertyGroup>
 
   <ItemGroup>

diff --git a/ChromeHtmlToPdfLib/Converter.cs b/ChromeHtmlToPdfLib/Converter.cs
@@ -136,7 +136,7 @@ public class Converter : IDisposable
         private Exception _chromeEventException;
 
         /// <summary>
-        ///     A list with urls to blacklist
+        ///     A list with URL's to blacklist
         /// </summary>
         private List<string> _urlBlacklist;
 
@@ -995,6 +995,8 @@ private void ClearUrlBlackList()
 
             _conversionTimeout = conversionTimeout;
 
+            var safeUrls = new List<string>();
+
             if (inputUri.IsFile)
             {
                 if (!File.Exists(inputUri.OriginalString))
@@ -1024,18 +1026,19 @@ private void ClearUrlBlackList()
             try
             {
                 if (inputUri.IsFile && CheckForPreWrap(inputUri, out var preWrapFile))
-                {
                     inputUri = new ConvertUri(preWrapFile);
-                }
 
                 if (ImageResize || ImageRotate || SanitizeHtml || pageSettings.PaperFormat == PaperFormat.FitPageToContent)
                 {
                     var documentHelper = new DocumentHelper(GetTempDirectory, WebProxy, ImageDownloadTimeout, _logStream) { InstanceId = InstanceId };
 
                     if (SanitizeHtml)
                     {
-                        if (documentHelper.SanitizeHtml(inputUri, Sanitizer, out var outputUri))
+                        if (documentHelper.SanitizeHtml(inputUri, Sanitizer, out var outputUri, out var sanitizeHtmlSafeUrls))
+                        {
                             inputUri = outputUri;
+                            safeUrls.AddRange(sanitizeHtmlSafeUrls);
+                        }
                     }
 
                     if (pageSettings.PaperFormat == PaperFormat.FitPageToContent)
@@ -1047,8 +1050,17 @@ private void ClearUrlBlackList()
 
                     if (ImageResize || ImageRotate)
                     {
-                        if (documentHelper.ValidateImages(inputUri, ImageResize, ImageRotate, pageSettings, out var outputUri, _urlBlacklist))
+                        if (documentHelper.ValidateImages(
+                            inputUri,
+                            ImageResize,
+                            ImageRotate,
+                            pageSettings,
+                            out var outputUri,
+                            out var validateImagesSafeUrls))
+                        {
+                            safeUrls.AddRange(validateImagesSafeUrls);
                             inputUri = outputUri;
+                        }
                     }
                 }
 
@@ -1070,7 +1082,7 @@ private void ClearUrlBlackList()
 
                 WriteToLog("Loading " + (inputUri.IsFile ? "file " + inputUri.OriginalString : "url " + inputUri));
 
-                _browser.NavigateTo(inputUri, countdownTimer, mediaLoadTimeout, _urlBlacklist);
+                _browser.NavigateTo(inputUri, safeUrls, countdownTimer, mediaLoadTimeout, _urlBlacklist);
 
                 if (!string.IsNullOrWhiteSpace(waitForWindowStatus))
                 {

diff --git a/ChromeHtmlToPdfLib/Helpers/DocumentHelper.cs b/ChromeHtmlToPdfLib/Helpers/DocumentHelper.cs
@@ -150,13 +150,16 @@ private int ParseValue(string value)
         /// <param name="sanitizer"><see cref="HtmlSanitizer"/></param>
         /// <param name="outputUri">The outputUri when this method returns <c>false</c> otherwise
         ///     <c>null</c> is returned</param>
+        /// <param name="safeUrls">A list of safe URL's</param>
         /// <returns></returns>
         public bool SanitizeHtml(
             ConvertUri inputUri,
             HtmlSanitizer sanitizer, 
-            out ConvertUri outputUri)
+            out ConvertUri outputUri,
+            out List<string> safeUrls)
         {
             outputUri = null;
+            safeUrls = new List<string>();
 
             using (var webpage = inputUri.IsFile ? File.OpenRead(inputUri.OriginalString) : DownloadStream(inputUri))
             {
@@ -242,6 +245,7 @@ private int ParseValue(string value)
 
                 var sanitizedOutputFile = GetTempFile(".htm");
                 outputUri = new ConvertUri(sanitizedOutputFile, inputUri.Encoding);
+                safeUrls.Add(outputUri.ToString());
 
                 try
                 {
@@ -255,6 +259,7 @@ private int ParseValue(string value)
                         {
                             var src = image.Source;
                             WriteToLog($"Updating image source to '{src}'");
+                            safeUrls.Add(src);
                             image.Source = src;
                         }
                     }
@@ -409,6 +414,7 @@ public bool FitPageToContent(ConvertUri inputUri, out ConvertUri outputUri)
         /// <param name="outputUri">The outputUri when this method returns <c>true</c> otherwise
         ///     <c>null</c> is returned</param>
         /// <param name="urlBlacklist">A list of URL's that need to be blocked (use * as a wildcard)</param>
+        /// <param name="safeUrls">A list with URL's that are safe to load</param>
         /// <returns>Returns <c>false</c> when the images dit not fit the page, otherwise <c>true</c></returns>
         /// <exception cref="WebException">Raised when the webpage from <paramref name="inputUri"/> could not be downloaded</exception>
         public bool ValidateImages(
@@ -417,9 +423,11 @@ public bool FitPageToContent(ConvertUri inputUri, out ConvertUri outputUri)
             bool rotate,
             PageSettings pageSettings,
             out ConvertUri outputUri,
+            out List<string> safeUrls,
             List<string> urlBlacklist = null)
         {
             outputUri = null;
+            safeUrls = new List<string>();
 
             using (var webpage = inputUri.IsFile
                 ? File.OpenRead(inputUri.OriginalString)
@@ -526,7 +534,9 @@ public bool FitPageToContent(ConvertUri inputUri, out ConvertUri outputUri)
                                 htmlImage.DisplayWidth = image.Width;
                                 htmlImage.DisplayHeight = image.Height;
                                 htmlImage.SetStyle(string.Empty);
-                                htmlImage.Source = new Uri(fileName).ToString();
+                                var newSrc = new Uri(fileName).ToString();
+                                safeUrls.Add(newSrc);
+                                htmlImage.Source = newSrc;
                                 htmlChanged = true;
                                 imageChanged = true;
                             }
@@ -605,12 +615,15 @@ public bool FitPageToContent(ConvertUri inputUri, out ConvertUri outputUri)
 
                         WriteToLog($"Unchanged image saved to location '{fileName}'");
                         image.Save(fileName);
-                        unchangedImage.Source = new Uri(fileName).ToString();
+                        var newSrc = new Uri(fileName).ToString();
+                        safeUrls.Add(newSrc);
+                        unchangedImage.Source = newSrc;
                     }
                 }
 
                 var outputFile = GetTempFile(".htm");
                 outputUri = new ConvertUri(outputFile, inputUri.Encoding);
+                safeUrls.Add(outputUri.ToString());
 
                 try
                 {