Skip to content

Commit

Permalink
Added safeUrl list that is used to add temporary urls that are genera…
Browse files Browse the repository at this point in the history
…ted by the code so that they are automaticly whitelisted
  • Loading branch information
Sicos2002 committed Mar 10, 2021
1 parent c536733 commit cfa5746
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 18 deletions.
11 changes: 6 additions & 5 deletions ChromeHtmlToPdfLib/Browser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -119,18 +119,19 @@ internal Browser(Uri browser, Stream logStream)
/// <param name="mediaLoadTimeout">When set a timeout will be started after the DomContentLoaded
/// event has fired. After a timeout the NavigateTo method will exit as if the page
/// has been completely loaded</param>
/// <param name="urlBlacklist">A list of URL's that need to be blocked (use * as a wildcard)</param>
/// <param name="urlBlacklist">A list with URL's that need to be blocked (use * as a wildcard)</param>
/// <param name="safeUrls">A list with URL's that are safe to load</param>
/// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception>
/// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception>
public void NavigateTo(
Uri uri,
List<string> safeUrls,
CountdownTimer countdownTimer = null,
int? mediaLoadTimeout = null,
List<string> urlBlacklist = null)
{
var waitEvent = new ManualResetEvent(false);
var mediaLoadTimeoutCancellationTokenSource = new CancellationTokenSource();
var absoluteUri = uri.AbsoluteUri.Substring(0, uri.AbsoluteUri.LastIndexOf('/') + 1);
var navigationError = string.Empty;
var waitforNetworkIdle = false;
var mediaTimeoutTaskSet = false;
Expand All @@ -147,11 +148,11 @@ internal Browser(Uri browser, Stream logStream)
var fetch = Fetch.FromJson(data);
var requestId = fetch.Params.RequestId;
var url = fetch.Params.Request.Url;
var isSafeUrl = safeUrls.Contains(url);

if (!RegularExpression.IsRegExMatch(urlBlacklist, url, out var matchedPattern) ||
url.StartsWith(absoluteUri, StringComparison.InvariantCultureIgnoreCase))
if (!RegularExpression.IsRegExMatch(urlBlacklist, url, out var matchedPattern) || isSafeUrl)
{
WriteToLog($"The url '{url}' has been allowed");
WriteToLog($"The url '{url}' has been allowed{(isSafeUrl ? " because it is on the safe url list" : string.Empty)}");
var fetchContinue = new Message {Method = "Fetch.continueRequest"};
fetchContinue.Parameters.Add("requestId", requestId);
_pageConnection.SendAsync(fetchContinue).GetAwaiter();
Expand Down
8 changes: 4 additions & 4 deletions ChromeHtmlToPdfLib/ChromeHtmlToPdfLib.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

<PropertyGroup>
<TargetFrameworks>netcoreapp3.0;netstandard2.0</TargetFrameworks>
<Version>2.1.11</Version>
<Version>2.1.12</Version>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<Description>ChromeHtmlToPdf is a 100% managed C# .NETStandard 2.0 library that can be used to convert HTML to PDF format with the use of Google Chrome</Description>
<Copyright>(C)2017-2021 Kees van Spelde</Copyright>
<PackageReleaseNotes>- Make pagesettings object clonable</PackageReleaseNotes>
<PackageReleaseNotes>- Added safeUrl list that is used to add temporary urls that are generated by the code so that they are automaticly whitelisted</PackageReleaseNotes>
<PackageProjectUrl>https://github.com/Sicos1977/ChromeHtmlToPdf</PackageProjectUrl>
<RepositoryUrl>https://github.com/Sicos1977/ChromeHtmlToPdf</RepositoryUrl>
<RepositoryType>GitHub</RepositoryType>
Expand All @@ -19,8 +19,8 @@
<Company>Magic-Sessions</Company>
<SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>ChromeHtmlToPdf.snk</AssemblyOriginatorKeyFile>
<AssemblyVersion>2.1.11.0</AssemblyVersion>
<FileVersion>2.1.11.0</FileVersion>
<AssemblyVersion>2.1.12.0</AssemblyVersion>
<FileVersion>2.1.12.0</FileVersion>
</PropertyGroup>

<ItemGroup>
Expand Down
24 changes: 18 additions & 6 deletions ChromeHtmlToPdfLib/Converter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public class Converter : IDisposable
private Exception _chromeEventException;

/// <summary>
/// A list with urls to blacklist
/// A list with URL's to blacklist
/// </summary>
private List<string> _urlBlacklist;

Expand Down Expand Up @@ -995,6 +995,8 @@ private void ClearUrlBlackList()

_conversionTimeout = conversionTimeout;

var safeUrls = new List<string>();

if (inputUri.IsFile)
{
if (!File.Exists(inputUri.OriginalString))
Expand Down Expand Up @@ -1024,18 +1026,19 @@ private void ClearUrlBlackList()
try
{
if (inputUri.IsFile && CheckForPreWrap(inputUri, out var preWrapFile))
{
inputUri = new ConvertUri(preWrapFile);
}

if (ImageResize || ImageRotate || SanitizeHtml || pageSettings.PaperFormat == PaperFormat.FitPageToContent)
{
var documentHelper = new DocumentHelper(GetTempDirectory, WebProxy, ImageDownloadTimeout, _logStream) { InstanceId = InstanceId };

if (SanitizeHtml)
{
if (documentHelper.SanitizeHtml(inputUri, Sanitizer, out var outputUri))
if (documentHelper.SanitizeHtml(inputUri, Sanitizer, out var outputUri, out var sanitizeHtmlSafeUrls))
{
inputUri = outputUri;
safeUrls.AddRange(sanitizeHtmlSafeUrls);
}
}

if (pageSettings.PaperFormat == PaperFormat.FitPageToContent)
Expand All @@ -1047,8 +1050,17 @@ private void ClearUrlBlackList()

if (ImageResize || ImageRotate)
{
if (documentHelper.ValidateImages(inputUri, ImageResize, ImageRotate, pageSettings, out var outputUri, _urlBlacklist))
if (documentHelper.ValidateImages(
inputUri,
ImageResize,
ImageRotate,
pageSettings,
out var outputUri,
out var validateImagesSafeUrls))
{
safeUrls.AddRange(validateImagesSafeUrls);
inputUri = outputUri;
}
}
}

Expand All @@ -1070,7 +1082,7 @@ private void ClearUrlBlackList()

WriteToLog("Loading " + (inputUri.IsFile ? "file " + inputUri.OriginalString : "url " + inputUri));

_browser.NavigateTo(inputUri, countdownTimer, mediaLoadTimeout, _urlBlacklist);
_browser.NavigateTo(inputUri, safeUrls, countdownTimer, mediaLoadTimeout, _urlBlacklist);

if (!string.IsNullOrWhiteSpace(waitForWindowStatus))
{
Expand Down
19 changes: 16 additions & 3 deletions ChromeHtmlToPdfLib/Helpers/DocumentHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,16 @@ private int ParseValue(string value)
/// <param name="sanitizer"><see cref="HtmlSanitizer"/></param>
/// <param name="outputUri">The outputUri when this method returns <c>false</c> otherwise
/// <c>null</c> is returned</param>
/// <param name="safeUrls">A list of safe URL's</param>
/// <returns></returns>
public bool SanitizeHtml(
ConvertUri inputUri,
HtmlSanitizer sanitizer,
out ConvertUri outputUri)
out ConvertUri outputUri,
out List<string> safeUrls)
{
outputUri = null;
safeUrls = new List<string>();

using (var webpage = inputUri.IsFile ? File.OpenRead(inputUri.OriginalString) : DownloadStream(inputUri))
{
Expand Down Expand Up @@ -242,6 +245,7 @@ private int ParseValue(string value)

var sanitizedOutputFile = GetTempFile(".htm");
outputUri = new ConvertUri(sanitizedOutputFile, inputUri.Encoding);
safeUrls.Add(outputUri.ToString());

try
{
Expand All @@ -255,6 +259,7 @@ private int ParseValue(string value)
{
var src = image.Source;
WriteToLog($"Updating image source to '{src}'");
safeUrls.Add(src);
image.Source = src;
}
}
Expand Down Expand Up @@ -409,6 +414,7 @@ public bool FitPageToContent(ConvertUri inputUri, out ConvertUri outputUri)
/// <param name="outputUri">The outputUri when this method returns <c>true</c> otherwise
/// <c>null</c> is returned</param>
/// <param name="urlBlacklist">A list of URL's that need to be blocked (use * as a wildcard)</param>
/// <param name="safeUrls">A list with URL's that are safe to load</param>
/// <returns>Returns <c>false</c> when the images dit not fit the page, otherwise <c>true</c></returns>
/// <exception cref="WebException">Raised when the webpage from <paramref name="inputUri"/> could not be downloaded</exception>
public bool ValidateImages(
Expand All @@ -417,9 +423,11 @@ public bool FitPageToContent(ConvertUri inputUri, out ConvertUri outputUri)
bool rotate,
PageSettings pageSettings,
out ConvertUri outputUri,
out List<string> safeUrls,
List<string> urlBlacklist = null)
{
outputUri = null;
safeUrls = new List<string>();

using (var webpage = inputUri.IsFile
? File.OpenRead(inputUri.OriginalString)
Expand Down Expand Up @@ -526,7 +534,9 @@ public bool FitPageToContent(ConvertUri inputUri, out ConvertUri outputUri)
htmlImage.DisplayWidth = image.Width;
htmlImage.DisplayHeight = image.Height;
htmlImage.SetStyle(string.Empty);
htmlImage.Source = new Uri(fileName).ToString();
var newSrc = new Uri(fileName).ToString();
safeUrls.Add(newSrc);
htmlImage.Source = newSrc;
htmlChanged = true;
imageChanged = true;
}
Expand Down Expand Up @@ -605,12 +615,15 @@ public bool FitPageToContent(ConvertUri inputUri, out ConvertUri outputUri)

WriteToLog($"Unchanged image saved to location '{fileName}'");
image.Save(fileName);
unchangedImage.Source = new Uri(fileName).ToString();
var newSrc = new Uri(fileName).ToString();
safeUrls.Add(newSrc);
unchangedImage.Source = newSrc;
}
}

var outputFile = GetTempFile(".htm");
outputUri = new ConvertUri(outputFile, inputUri.Encoding);
safeUrls.Add(outputUri.ToString());

try
{
Expand Down

0 comments on commit cfa5746

Please sign in to comment.