Skip to content

Commit

Permalink
Reload webpage when sanitizer has sanitized the webpage
Browse files Browse the repository at this point in the history
  • Loading branch information
Sicos2002 committed Dec 8, 2020
1 parent e9bb261 commit f07e75a
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 5 deletions.
8 changes: 4 additions & 4 deletions ChromeHtmlToPdfLib/ChromeHtmlToPdfLib.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

<PropertyGroup>
<TargetFrameworks>netcoreapp3.0;netstandard2.0</TargetFrameworks>
<Version>2.1.4</Version>
<Version>2.1.5</Version>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<Description>ChromeHtmlToPdf is a 100% managed C# .NETStandard 2.0 library that can be used to convert HTML to PDF format with the use of Google Chrome</Description>
<Copyright>(C)2017-2020 Kees van Spelde</Copyright>
<PackageReleaseNotes>- Fix issue when handling inline images</PackageReleaseNotes>
<PackageReleaseNotes>- Reload webpage when sanitizer has sanitized the webpage</PackageReleaseNotes>
<PackageProjectUrl>https://github.com/Sicos1977/ChromeHtmlToPdf</PackageProjectUrl>
<RepositoryUrl>https://github.com/Sicos1977/ChromeHtmlToPdf</RepositoryUrl>
<RepositoryType>GitHub</RepositoryType>
Expand All @@ -19,8 +19,8 @@
<Company>Magic-Sessions</Company>
<SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>ChromeHtmlToPdf.snk</AssemblyOriginatorKeyFile>
<AssemblyVersion>2.1.4.0</AssemblyVersion>
<FileVersion>2.1.4.0</FileVersion>
<AssemblyVersion>2.1.5.0</AssemblyVersion>
<FileVersion>2.1.5.0</FileVersion>
</PropertyGroup>

<ItemGroup>
Expand Down
66 changes: 65 additions & 1 deletion ChromeHtmlToPdfLib/Helpers/DocumentHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -210,43 +210,103 @@ private int ParseValue(string value)
sanitizer.FilterUrl += delegate(object sender, FilterUrlEventArgs args)
{
if (args.OriginalUrl != args.SanitizedUrl)
{
WriteToLog($"URL sanitized from '{args.OriginalUrl}' to '{args.SanitizedUrl}'");
htmlChanged = true;
}
};

sanitizer.RemovingAtRule += delegate(object sender, RemovingAtRuleEventArgs args)
{
WriteToLog($"Removing CSS at-rule '{args.Rule.CssText}' from tag '{args.Tag.TagName}'");
htmlChanged = true;
};

sanitizer.RemovingAttribute += delegate(object sender, RemovingAttributeEventArgs args)
{
WriteToLog($"Removing attribute '{args.Attribute.Name}' from tag '{args.Tag.TagName}', reason '{args.Reason}'");
htmlChanged = true;
};

sanitizer.RemovingComment += delegate(object sender, RemovingCommentEventArgs args)
{
WriteToLog($"Removing comment '{args.Comment.TextContent}'");
htmlChanged = true;
};

sanitizer.RemovingCssClass += delegate(object sender, RemovingCssClassEventArgs args)
{
WriteToLog($"Removing CSS class '{args.CssClass}' from tag '{args.Tag.TagName}', reason '{args.Reason}'");
htmlChanged = true;
};

sanitizer.RemovingStyle += delegate(object sender, RemovingStyleEventArgs args)
{
WriteToLog($"Removing style '{args.Style.Name}' from tag '{args.Tag.TagName}', reason '{args.Reason}'");
htmlChanged = true;
};

sanitizer.RemovingTag += delegate(object sender, RemovingTagEventArgs args)
{
WriteToLog($"Removing tag '{args.Tag.TagName}', reason '{args.Reason}'");
htmlChanged = true;
};

sanitizer.SanitizeDom(document as IHtmlDocument);

htmlChanged = true;
WriteToLog("HTML sanitized");

if (htmlChanged)
{
var sanitizedOutputFile = GetTempFile(".htm");

try
{
WriteToLog($"Writing sanitized webpage to '{sanitizedOutputFile}'");

using (var fileStream = new FileStream(sanitizedOutputFile, FileMode.CreateNew, FileAccess.Write))
{
if (inputUri.Encoding != null)
{
using (var textWriter = new StreamWriter(fileStream, inputUri.Encoding))
document.ToHtml(textWriter, new HtmlMarkupFormatter());
}
else
using (var textWriter = new StreamWriter(fileStream))
document.ToHtml(textWriter, new HtmlMarkupFormatter());
}

WriteToLog("Written");
}
catch (Exception exception)
{
WriteToLog($"Could not generate new html file '{sanitizedOutputFile}', error: {ExceptionHelpers.GetInnerException(exception)}");
return true;
}

try
{
using (var sanitizedWebpage = File.OpenRead(sanitizedOutputFile))
{
WriteToLog($"Loading sanitized webpage from '{sanitizedOutputFile}'");

// ReSharper disable AccessToDisposedClosure
document = inputUri.Encoding != null
? context.OpenAsync(m => m.Content(sanitizedWebpage).Header("Content-Type", $"text/html; charset={inputUri.Encoding.WebName}").Address(inputUri.ToString())).Result
: context.OpenAsync(m => m.Content(sanitizedWebpage).Address(inputUri.ToString())).Result;
// ReSharper restore AccessToDisposedClosure

WriteToLog("Loaded");
}
}
catch (Exception exception)
{
WriteToLog($"Exception occured in AngleSharp: {ExceptionHelpers.GetInnerException(exception)}");
return true;
}

htmlChanged = false;
}
}

WriteToLog("Validating all images if they need to be rotated and if they fit the page");
Expand Down Expand Up @@ -393,6 +453,8 @@ private int ParseValue(string value)

try
{
WriteToLog($"Writing changed webpage to '{outputFile}'");

using (var fileStream = new FileStream(outputFile, FileMode.CreateNew, FileAccess.Write))
{
if (inputUri.Encoding != null)
Expand All @@ -406,6 +468,8 @@ private int ParseValue(string value)

}

WriteToLog("Written");

return false;
}
catch (Exception exception)
Expand Down

0 comments on commit f07e75a

Please sign in to comment.