From adfcbe0afbcc364e4317bd606f0d24457f0cc826 Mon Sep 17 00:00:00 2001 From: Valentin Perignon Date: Tue, 27 Feb 2024 14:28:56 +0100 Subject: [PATCH] feat: Update blockquote detection algorithm --- .../Thread/Message/MessageBodyView.swift | 2 +- .../Message/MessageView+Preprocessing.swift | 2 +- MailCore/Models/Message.swift | 8 +- MailCore/Utils/MessageBodyUtils.swift | 76 +++++++++---------- 4 files changed, 42 insertions(+), 46 deletions(-) diff --git a/Mail/Views/Thread/Message/MessageBodyView.swift b/Mail/Views/Thread/Message/MessageBodyView.swift index a10d47a0e4..132a33aa5b 100644 --- a/Mail/Views/Thread/Message/MessageBodyView.swift +++ b/Mail/Views/Thread/Message/MessageBodyView.swift @@ -60,7 +60,7 @@ struct MessageBodyView: View { loadBody(blockRemoteContent: newValue) } - if presentableBody.quote != nil { + if !presentableBody.quotes.isEmpty { Button(model.showBlockQuote ? MailResourcesStrings.Localizable.messageHideQuotedText : MailResourcesStrings.Localizable.messageShowQuotedText) { diff --git a/Mail/Views/Thread/Message/MessageView+Preprocessing.swift b/Mail/Views/Thread/Message/MessageView+Preprocessing.swift index 6f1a7a09bb..ed3f044e96 100644 --- a/Mail/Views/Thread/Message/MessageView+Preprocessing.swift +++ b/Mail/Views/Thread/Message/MessageView+Preprocessing.swift @@ -227,7 +227,7 @@ final class InlineAttachmentWorker { let updatedPresentableBody = PresentableBody( body: detachedBody, compactBody: compactBodyCopy, - quote: presentableBody.quote + quotes: presentableBody.quotes ) // Mutate DOM if task is active diff --git a/MailCore/Models/Message.swift b/MailCore/Models/Message.swift index cb8dc82591..98653ef899 100644 --- a/MailCore/Models/Message.swift +++ b/MailCore/Models/Message.swift @@ -485,19 +485,19 @@ public struct MessageActionResult: Codable { public struct PresentableBody: Equatable { public var body: Body? public var compactBody: String? - public var quote: String? + public var quotes = [String]() public init(message: Message) { body = message.body } - public init(body: Body?, compactBody: String?, quote: String?) { + public init(body: Body?, compactBody: String?, quotes: [String]) { self.body = body self.compactBody = compactBody - self.quote = quote + self.quotes = quotes } public init(presentableBody: PresentableBody) { - self.init(body: presentableBody.body, compactBody: presentableBody.compactBody, quote: presentableBody.quote) + self.init(body: presentableBody.body, compactBody: presentableBody.compactBody, quotes: presentableBody.quotes) } } diff --git a/MailCore/Utils/MessageBodyUtils.swift b/MailCore/Utils/MessageBodyUtils.swift index bc851cab6a..626d2790ae 100644 --- a/MailCore/Utils/MessageBodyUtils.swift +++ b/MailCore/Utils/MessageBodyUtils.swift @@ -25,9 +25,9 @@ public enum MessageBodyUtils { private static let blockquote = "blockquote" private static var quoteDescriptors = [ - // Do not detect this quote as long as we can't detect siblings quotes or else a single reply will be missing among the - // many replies of an Outlook reply "chain", which is worst than simply ignoring it -// "#divRplyFwdMsg", // Outlook + // The reply and forward #divRplyFwdMsg div only contains the header. + // The previous message body is written right next to this div and can't be detected + // "#divRplyFwdMsg", // Outlook "#isForwardContent", "#isReplyContent", "#mailcontent:not(table)", @@ -42,7 +42,7 @@ public enum MessageBodyUtils { anyCssClassContaining(cssClass: "yahoo_quoted"), anyCssClassContaining(cssClass: "zmail_extra"), // Zoho "[name=\"quote\"]", // GMX - "blockquote[type=\"cite\"]" + "blockquote[type=\"cite\"]" // iOS and macOS Mail ] public static func prepareWithPrintOption(message: Message) async -> PresentableBody? { @@ -62,39 +62,38 @@ public enum MessageBodyUtils { return PresentableBody( body: messageBody, compactBody: messageBodyQuote.messageBody, - quote: messageBodyQuote.quote + quotes: messageBodyQuote.quotes ) } catch { print("error: \(error.localizedDescription)") } - let messageBodyQuote = MessageBodyQuote(messageBody: bodyValue, quote: nil) - return PresentableBody(body: messageBody, compactBody: messageBodyQuote.messageBody, quote: messageBodyQuote.quote) + let messageBodyQuote = MessageBodyQuote(messageBody: bodyValue) + return PresentableBody(body: messageBody, compactBody: messageBodyQuote.messageBody, quotes: messageBodyQuote.quotes) } - public static func splitBodyAndQuote(messageBody: String) async -> MessageBodyQuote { - let task = Task { - do { - let htmlDocumentWithQuote = try await SwiftSoup.parse(messageBody) - let htmlDocumentWithoutQuote = try await SwiftSoup.parse(messageBody) + public static func splitContentAndQuote(body: String) async throws -> (String, [String]) { + let parsedBody = try await SwiftSoup.parse(body) - let blockquoteElement = try findAndRemoveLastParentBlockQuote(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote) - var currentQuoteDescriptor = - try findFirstKnownParentQuoteDescriptor(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote) + var quotes = [String]() + for quoteDescriptor in quoteDescriptors { + let foundQuotes = try await parsedBody.select(quoteDescriptor) + for foundQuote in foundQuotes { + try quotes.append(foundQuote.outerHtml()) + try foundQuote.remove() + } + } - if currentQuoteDescriptor.isEmpty { - currentQuoteDescriptor = blockquoteElement == nil ? "" : blockquote - } + return try (parsedBody.outerHtml(), quotes) + } - let (body, quote) = try await splitBodyAndQuote( - blockquoteElement: blockquoteElement, - htmlDocumentWithQuote: htmlDocumentWithQuote, - currentQuoteDescriptor: currentQuoteDescriptor - ) - return MessageBodyQuote(messageBody: quote?.isEmpty ?? true ? messageBody : body, quote: quote) + public static func splitBodyAndQuote(messageBody: String) async -> MessageBodyQuote { + let task = Task { + do { + return try await extractQuotesFromBody(messageBody) } catch { DDLogError("Error splitting blockquote \(error)") + return MessageBodyQuote(messageBody: messageBody) } - return MessageBodyQuote(messageBody: messageBody, quote: nil) } let timeoutTask = Task { @@ -108,22 +107,19 @@ public enum MessageBodyUtils { return result } - private static func findAndRemoveLastParentBlockQuote(htmlDocumentWithoutQuote: Document) throws -> Element? { - let element = try selectLastParentBlockQuote(document: htmlDocumentWithoutQuote) - try element?.remove() - return element - } + private static func extractQuotesFromBody(_ body: String) async throws -> MessageBodyQuote { + let parsedBody = try await SwiftSoup.parse(body) - private static func findFirstKnownParentQuoteDescriptor(htmlDocumentWithoutQuote: Document) throws -> String { - var currentQuoteDescriptor = "" + var quotes = [String]() for quoteDescriptor in quoteDescriptors { - let quotedContentElement = try htmlDocumentWithoutQuote.select(quoteDescriptor) - if !quotedContentElement.isEmpty() { - try quotedContentElement.remove() - currentQuoteDescriptor = quoteDescriptor + let foundQuotes = try await parsedBody.select(quoteDescriptor) + for foundQuote in foundQuotes { + try quotes.append(foundQuote.outerHtml()) + try foundQuote.remove() } } - return currentQuoteDescriptor + + return try MessageBodyQuote(messageBody: parsedBody.outerHtml(), quotes: quotes) } private static func splitBodyAndQuote(blockquoteElement: Element?, htmlDocumentWithQuote: Document, @@ -238,10 +234,10 @@ public enum MessageBodyUtils { public struct MessageBodyQuote { public let messageBody: String - public let quote: String? + public let quotes: [String] - public init(messageBody: String, quote: String?) { + public init(messageBody: String, quotes: [String] = []) { self.messageBody = messageBody - self.quote = quote + self.quotes = quotes } }