diff --git a/.package.resolved b/.package.resolved index 75c50a8df..f7814170a 100644 --- a/.package.resolved +++ b/.package.resolved @@ -235,10 +235,10 @@ { "identity" : "swiftsoup", "kind" : "remoteSourceControl", - "location" : "https://github.com/scinfu/SwiftSoup", + "location" : "https://github.com/Ambrdctr/SwiftSoup", "state" : { - "revision" : "8b6cf29eead8841a1fa7822481cb3af4ddaadba6", - "version" : "2.6.1" + "branch" : "master", + "revision" : "56ce9d740ef9ea53bba13386ebdc460b6f4fe36c" } }, { diff --git a/Mail/Views/Thread/MessageView+Preprocessing.swift b/Mail/Views/Thread/MessageView+Preprocessing.swift index 0f2e58c19..a33d154cf 100644 --- a/Mail/Views/Thread/MessageView+Preprocessing.swift +++ b/Mail/Views/Thread/MessageView+Preprocessing.swift @@ -73,20 +73,11 @@ extension MessageView { presentableBody.body = detachedMessage let bodyValue = detachedMessage.value ?? "" - // Heuristic to give up on mail too large for "perfect" preprocessing. - guard bodyValue.lengthOfBytes(using: String.Encoding.utf8) < Self.bodySizeThreshold else { - DDLogInfo("give up on processing, file too large") - mutate(compactBody: bodyValue, quote: nil) - return - } - let task = Task.detached { - guard let messageBodyQuote = MessageBodyUtils.splitBodyAndQuote(messageBody: bodyValue) else { - return - } - + let messageBodyQuote = await MessageBodyUtils.splitBodyAndQuote(messageBody: bodyValue) await mutate(compactBody: messageBodyQuote.messageBody, quote: messageBodyQuote.quote) } + await task.finish() } diff --git a/MailCore/Utils/MessageBodyUtils.swift b/MailCore/Utils/MessageBodyUtils.swift index 156923a1f..3b3d7f07a 100644 --- a/MailCore/Utils/MessageBodyUtils.swift +++ b/MailCore/Utils/MessageBodyUtils.swift @@ -24,7 +24,9 @@ public enum MessageBodyUtils { private static let blockquote = "blockquote" private static var quoteDescriptors = [ - "#divRplyFwdMsg", // Outlook + // Do not detect this quote as long as we can't detect siblings quotes or else a single reply will be missing among the + // many replies of an Outlook reply "chain", which is worst than simply ignoring it +// "#divRplyFwdMsg", // Outlook "#isForwardContent", "#isReplyContent", "#mailcontent:not(table)", @@ -42,29 +44,41 @@ public enum MessageBodyUtils { "blockquote[type=\"cite\"]" ] - public static func splitBodyAndQuote(messageBody: String) -> MessageBodyQuote? { - do { - let htmlDocumentWithQuote = try SwiftSoup.parse(messageBody) - let htmlDocumentWithoutQuote = try SwiftSoup.parse(messageBody) + public static func splitBodyAndQuote(messageBody: String) async -> MessageBodyQuote { + let task = Task { + do { + let htmlDocumentWithQuote = try await SwiftSoup.parse(messageBody) + let htmlDocumentWithoutQuote = try await SwiftSoup.parse(messageBody) - let blockquoteElement = try findAndRemoveLastParentBlockQuote(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote) - var currentQuoteDescriptor = - try findFirstKnownParentQuoteDescriptor(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote) + let blockquoteElement = try findAndRemoveLastParentBlockQuote(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote) + var currentQuoteDescriptor = + try findFirstKnownParentQuoteDescriptor(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote) - if currentQuoteDescriptor.isEmpty { - currentQuoteDescriptor = blockquoteElement == nil ? "" : blockquote + if currentQuoteDescriptor.isEmpty { + currentQuoteDescriptor = blockquoteElement == nil ? "" : blockquote + } + + let (body, quote) = try await splitBodyAndQuote( + blockquoteElement: blockquoteElement, + htmlDocumentWithQuote: htmlDocumentWithQuote, + currentQuoteDescriptor: currentQuoteDescriptor + ) + return MessageBodyQuote(messageBody: quote?.isEmpty ?? true ? messageBody : body, quote: quote) + } catch { + DDLogError("Error splitting blockquote \(error)") } + return MessageBodyQuote(messageBody: messageBody, quote: nil) + } - let (body, quote) = try splitBodyAndQuote( - blockquoteElement: blockquoteElement, - htmlDocumentWithQuote: htmlDocumentWithQuote, - currentQuoteDescriptor: currentQuoteDescriptor - ) - return MessageBodyQuote(messageBody: quote?.isEmpty ?? true ? messageBody : body, quote: quote) - } catch { - DDLogError("Error splitting blockquote \(error)") + let timeoutTask = Task { + try await Task.sleep(nanoseconds: UInt64(1.5 * Double(NSEC_PER_SEC))) + task.cancel() } - return nil + + let result = await task.value + timeoutTask.cancel() + + return result } private static func findAndRemoveLastParentBlockQuote(htmlDocumentWithoutQuote: Document) throws -> Element? { @@ -76,10 +90,7 @@ public enum MessageBodyUtils { private static func findFirstKnownParentQuoteDescriptor(htmlDocumentWithoutQuote: Document) throws -> String { var currentQuoteDescriptor = "" for quoteDescriptor in quoteDescriptors { - let quotedContentElement = try selectElementAndFollowingSiblings( - document: htmlDocumentWithoutQuote, - quoteDescriptor: quoteDescriptor - ) + let quotedContentElement = try htmlDocumentWithoutQuote.select(quoteDescriptor) if !quotedContentElement.isEmpty() { try quotedContentElement.remove() currentQuoteDescriptor = quoteDescriptor @@ -89,9 +100,9 @@ public enum MessageBodyUtils { } private static func splitBodyAndQuote(blockquoteElement: Element?, htmlDocumentWithQuote: Document, - currentQuoteDescriptor: String) throws -> (String, String?) { + currentQuoteDescriptor: String) async throws -> (String, String?) { if currentQuoteDescriptor == blockquote { - for quotedContentElement in try htmlDocumentWithQuote.select(currentQuoteDescriptor) { + for quotedContentElement in try await htmlDocumentWithQuote.select(currentQuoteDescriptor) { if try quotedContentElement.outerHtml() == blockquoteElement?.outerHtml() { try quotedContentElement.remove() break @@ -99,10 +110,7 @@ public enum MessageBodyUtils { } return try (htmlDocumentWithQuote.outerHtml(), blockquoteElement?.outerHtml()) } else if !currentQuoteDescriptor.isEmpty { - let quotedContentElements = try selectElementAndFollowingSiblings( - document: htmlDocumentWithQuote, - quoteDescriptor: currentQuoteDescriptor - ) + let quotedContentElements = try await htmlDocumentWithQuote.select(currentQuoteDescriptor) try quotedContentElements.remove() return try (htmlDocumentWithQuote.outerHtml(), quotedContentElements.outerHtml()) } else { @@ -119,13 +127,6 @@ public enum MessageBodyUtils { return "[class*=\(cssClass)]" } - /// Some mail clients add the history in a new block, at the same level as the old one. - /// And so we match the current block, as well as all those that follow and that are at the same level - /// - Returns: [Elements] containing all the blocks that have been matched - private static func selectElementAndFollowingSiblings(document: Document, quoteDescriptor: String) throws -> Elements { - return try document.select("\(quoteDescriptor), \(quoteDescriptor) ~ *") - } - private static func selectLastParentBlockQuote(document: Document) throws -> Element? { return try document.select("\(blockquote):not(\(blockquote) \(blockquote)):last-of-type").first() } diff --git a/MailCore/Utils/NotificationsHelper.swift b/MailCore/Utils/NotificationsHelper.swift index f382e28b2..06c914600 100644 --- a/MailCore/Utils/NotificationsHelper.swift +++ b/MailCore/Utils/NotificationsHelper.swift @@ -136,7 +136,7 @@ public enum NotificationsHelper { content.title = MailResourcesStrings.Localizable.unknownRecipientTitle } content.subtitle = message.formattedSubject - content.body = getCleanBodyFrom(message: message) + content.body = await getCleanBodyFrom(message: message) content.threadIdentifier = "\(mailboxId)_\(userId)" content.targetContentIdentifier = "\(userId)_\(mailboxId)_\(message.uid)" content.badge = await getUnreadCount() as NSNumber @@ -148,20 +148,21 @@ public enum NotificationsHelper { return content } - private static func getCleanBodyFrom(message: Message) -> String { + private static func getCleanBodyFrom(message: Message) async -> String { guard let fullBody = message.body?.value, - let bodyType = message.body?.type, - let body = MessageBodyUtils.splitBodyAndQuote(messageBody: fullBody)?.messageBody else { + let bodyType = message.body?.type else { return message.preview } + let body = await MessageBodyUtils.splitBodyAndQuote(messageBody: fullBody).messageBody + guard bodyType != "text/plain" else { return body.trimmingCharacters(in: .whitespacesAndNewlines) } do { let basicHtml = try SwiftSoup.clean(body, Whitelist.basic())! - let parsedBody = try SwiftSoup.parse(basicHtml) + let parsedBody = try await SwiftSoup.parse(basicHtml) let rawText = try parsedBody.text(trimAndNormaliseWhitespace: false) return rawText.trimmingCharacters(in: .whitespacesAndNewlines) diff --git a/Project.swift b/Project.swift index 9bda2222c..1f781bcf7 100644 --- a/Project.swift +++ b/Project.swift @@ -47,7 +47,7 @@ let project = Project(name: "Mail", .package(url: "https://github.com/dkk/WrappingHStack", .upToNextMajor(from: "2.0.0")), .package(url: "https://github.com/kean/Nuke", .upToNextMajor(from: "12.1.3")), .package(url: "https://github.com/airbnb/lottie-ios", .exact("3.5.0")), - .package(url: "https://github.com/scinfu/SwiftSoup", .upToNextMajor(from: "2.6.1")), + .package(url: "https://github.com/Ambrdctr/SwiftSoup", .branch("master")), .package(url: "https://github.com/johnpatrickmorgan/NavigationBackport", .upToNextMajor(from: "0.8.1")), .package(url: "https://github.com/aheze/Popovers", .upToNextMajor(from: "1.3.2")), .package(url: "https://github.com/shaps80/SwiftUIBackports", .upToNextMajor(from: "1.15.1"))