From 33c559aad55e4f03af6c0558463e9aa169aea0c3 Mon Sep 17 00:00:00 2001 From: Valentin Perignon Date: Thu, 27 Apr 2023 10:46:42 +0200 Subject: [PATCH] feat: Clean head and body from html document --- .package.resolved | 6 +- MailCore/Utils/Constants.swift | 1 - MailCore/Utils/MessageBodyUtils.swift | 19 ++++- MailCore/Utils/Whitelist+Extension.swift | 93 +++++------------------- Project.swift | 2 +- 5 files changed, 41 insertions(+), 80 deletions(-) diff --git a/.package.resolved b/.package.resolved index 3a0482a0e..e28dedf7e 100644 --- a/.package.resolved +++ b/.package.resolved @@ -236,10 +236,10 @@ { "identity" : "swiftsoup", "kind" : "remoteSourceControl", - "location" : "https://github.com/scinfu/SwiftSoup", + "location" : "https://github.com/valentinperignon/SwiftSoup", "state" : { - "revision" : "f707b8680cddb96dc1855632340a572ef37bbb98", - "version" : "2.5.3" + "branch" : "try-headcleaner", + "revision" : "05c6c880a5056421de271012faa8bd0a42c2e0a8" } }, { diff --git a/MailCore/Utils/Constants.swift b/MailCore/Utils/Constants.swift index 2bfb3c84c..e68a812e0 100644 --- a/MailCore/Utils/Constants.swift +++ b/MailCore/Utils/Constants.swift @@ -65,7 +65,6 @@ public enum Constants { try! NSRegularExpression(pattern: ">\\s*<|>?\\s+ String? { do { - return try SwiftSoup.clean(rawHtml, Constants.extendedWhitelist) + let dirtyDocument = try SwiftSoup.parse(rawHtml) + let cleanedDocument = try SwiftSoup.Cleaner(headWhitelist: .headWhitelist, bodyWhitelist: .extendedBodyWhitelist) + .clean(dirtyDocument) + + // We need to remove the tag + let metaRefreshTags = try cleanedDocument.select("meta[http-equiv='refresh']") + for metaRefreshTag in metaRefreshTags { + try metaRefreshTag.parent()?.removeChild(metaRefreshTag) + } + + // If `` has a style attribute, keep it + if let bodyStyleAttribute = try dirtyDocument.body()?.attr("style") { + try cleanedDocument.body()?.attr("style", bodyStyleAttribute) + } + + return try cleanedDocument.outerHtml() } catch { DDLogError("An error occurred while parsing body \(error)") return nil diff --git a/MailCore/Utils/Whitelist+Extension.swift b/MailCore/Utils/Whitelist+Extension.swift index b919c5662..ea7d66665 100644 --- a/MailCore/Utils/Whitelist+Extension.swift +++ b/MailCore/Utils/Whitelist+Extension.swift @@ -20,87 +20,34 @@ import Foundation import SwiftSoup extension Whitelist { - static var extendedWhitelist: Whitelist { + static var headWhitelist: Whitelist { do { let customWhitelist = Whitelist.none() - let allowedTags = [ - "a", - "b", - "blockquote", - "body", - "br", - "caption", - "center", - "cite", - "code", - "col", - "colgroup", - "dd", - "div", - "dl", - "dt", - "em", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "head", - "hr", - "html", - "i", - "img", - "li", - "meta", - "ol", - "p", - "pre", - "q", - "small", - "span", - "strike", - "strong", - "style", - "sub", - "sup", - "table", - "tbody", - "td", - "tfoot", - "th", - "thead", - "title", - "tr", - "u", - "ul", - ] + try customWhitelist + .addTags("base", "meta", "style", "title") + .addAttributes("style", "media", "type") + .addAttributes("meta", "charset", "content", "http-equiv", "name") + .addAttributes("base", "href", "target") + .addProtocols("base", "href", "http", "https") - for tag in allowedTags { - try customWhitelist.addTags(tag) - try customWhitelist.addAttributes(tag, "style", "width", "height", "class", "align") - } + return customWhitelist + } catch { + fatalError("Couldn't init head whitelist") + } + } + static var extendedBodyWhitelist: Whitelist { + do { + let customWhitelist = try Whitelist.relaxed() try customWhitelist - .addAttributes("a", "href", "title") - .addAttributes("blockquote", "cite") - .addAttributes("col", "span") - .addAttributes("colgroup", "span") - .addAttributes("img", "align", "alt", "src", "title") - .addAttributes("ol", "start", "type") - .addAttributes("q", "cite") - .addAttributes("table", "summary") - .addAttributes("td", "abbr", "axis", "colspan", "rowspan") - .addAttributes("th", "abbr", "axis", "colspan", "rowspan", "scope") - .addAttributes("ul", "type") + .addTags("center", "style") + .addAttributes(":all", "style", "width", "height", "class", "id", "align", "bgcolor", "border") + .addAttributes("td", "valign") + .addProtocols("img", "src", "cid", "data") - .addProtocols("a", "href", "http", "https", "mailto") - .addProtocols("blockquote", "cite", "http", "https") - .addProtocols("cite", "cite", "http", "https") - .addProtocols("q", "cite", "http", "https") return customWhitelist } catch { - fatalError("Couldn't init html whitelist") + fatalError("Couldn't init body whitelist") } } } diff --git a/Project.swift b/Project.swift index 43ec46ebf..b48e01952 100644 --- a/Project.swift +++ b/Project.swift @@ -47,7 +47,7 @@ let project = Project(name: "Mail", .package(url: "https://github.com/SCENEE/FloatingPanel", .upToNextMajor(from: "2.0.0")), .package(url: "https://github.com/kean/Nuke", .upToNextMajor(from: "12.0.0")), .package(url: "https://github.com/airbnb/lottie-ios", .exact("3.5.0")), - .package(url: "https://github.com/scinfu/SwiftSoup", .upToNextMajor(from: "2.5.3")), + .package(url: "https://github.com/valentinperignon/SwiftSoup", .branch("try-headcleaner")), .package(url: "https://github.com/johnpatrickmorgan/NavigationBackport", .upToNextMajor(from: "0.7.2")), .package(url: "https://github.com/aheze/Popovers", .upToNextMajor(from: "1.3.2")) ],