Skip to content

Commit

Permalink
feat: Update blockquote detection algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
valentinperignon committed Feb 27, 2024
1 parent c2e690f commit f0f87e0
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 50 deletions.
2 changes: 1 addition & 1 deletion Mail/Views/Thread/Message/MessageBodyView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct MessageBodyView: View {
loadBody(blockRemoteContent: newValue)
}

if presentableBody.quote != nil {
if !presentableBody.quotes.isEmpty {
Button(model.showBlockQuote
? MailResourcesStrings.Localizable.messageHideQuotedText
: MailResourcesStrings.Localizable.messageShowQuotedText) {
Expand Down
2 changes: 1 addition & 1 deletion Mail/Views/Thread/Message/MessageView+Preprocessing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ final class InlineAttachmentWorker {
let updatedPresentableBody = PresentableBody(
body: detachedBody,
compactBody: compactBodyCopy,
quote: presentableBody.quote
quotes: presentableBody.quotes
)

// Mutate DOM if task is active
Expand Down
8 changes: 4 additions & 4 deletions MailCore/Models/Message.swift
Original file line number Diff line number Diff line change
Expand Up @@ -485,19 +485,19 @@ public struct MessageActionResult: Codable {
public struct PresentableBody: Equatable {
public var body: Body?
public var compactBody: String?
public var quote: String?
public var quotes = [String]()

public init(message: Message) {
body = message.body
}

public init(body: Body?, compactBody: String?, quote: String?) {
public init(body: Body?, compactBody: String?, quotes: [String]) {
self.body = body
self.compactBody = compactBody
self.quote = quote
self.quotes = quotes
}

public init(presentableBody: PresentableBody) {
self.init(body: presentableBody.body, compactBody: presentableBody.compactBody, quote: presentableBody.quote)
self.init(body: presentableBody.body, compactBody: presentableBody.compactBody, quotes: presentableBody.quotes)
}
}
80 changes: 36 additions & 44 deletions MailCore/Utils/MessageBodyUtils.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ public enum MessageBodyUtils {
private static let blockquote = "blockquote"

private static var quoteDescriptors = [
// Do not detect this quote as long as we can't detect siblings quotes or else a single reply will be missing among the
// many replies of an Outlook reply "chain", which is worst than simply ignoring it
// "#divRplyFwdMsg", // Outlook
// The reply and forward #divRplyFwdMsg div only contains the header.
// The previous message body is written right next to this div and can't be detected
// "#divRplyFwdMsg", // Outlook
"#isForwardContent",
"#isReplyContent",
"#mailcontent:not(table)",
Expand All @@ -42,7 +42,7 @@ public enum MessageBodyUtils {
anyCssClassContaining(cssClass: "yahoo_quoted"),
anyCssClassContaining(cssClass: "zmail_extra"), // Zoho
"[name=\"quote\"]", // GMX
"blockquote[type=\"cite\"]"
"blockquote[type=\"cite\"]" // iOS and macOS Mail
]

public static func prepareWithPrintOption(message: Message) async -> PresentableBody? {
Expand All @@ -62,39 +62,38 @@ public enum MessageBodyUtils {
return PresentableBody(
body: messageBody,
compactBody: messageBodyQuote.messageBody,
quote: messageBodyQuote.quote
quotes: messageBodyQuote.quotes
)
} catch {
print("error: \(error.localizedDescription)")
}
let messageBodyQuote = MessageBodyQuote(messageBody: bodyValue, quote: nil)
return PresentableBody(body: messageBody, compactBody: messageBodyQuote.messageBody, quote: messageBodyQuote.quote)
let messageBodyQuote = MessageBodyQuote(messageBody: bodyValue)
return PresentableBody(body: messageBody, compactBody: messageBodyQuote.messageBody, quotes: messageBodyQuote.quotes)
}

public static func splitBodyAndQuote(messageBody: String) async -> MessageBodyQuote {
let task = Task {
do {
let htmlDocumentWithQuote = try await SwiftSoup.parse(messageBody)
let htmlDocumentWithoutQuote = try await SwiftSoup.parse(messageBody)
public static func splitContentAndQuote(body: String) async throws -> (String, [String]) {
let parsedBody = try await SwiftSoup.parse(body)

let blockquoteElement = try findAndRemoveLastParentBlockQuote(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote)
var currentQuoteDescriptor =
try findFirstKnownParentQuoteDescriptor(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote)
var quotes = [String]()
for quoteDescriptor in quoteDescriptors {
let foundQuotes = try await parsedBody.select(quoteDescriptor)
for foundQuote in foundQuotes {
try quotes.append(foundQuote.outerHtml())
try foundQuote.remove()
}
}

if currentQuoteDescriptor.isEmpty {
currentQuoteDescriptor = blockquoteElement == nil ? "" : blockquote
}
return try (parsedBody.outerHtml(), quotes)
}

let (body, quote) = try await splitBodyAndQuote(
blockquoteElement: blockquoteElement,
htmlDocumentWithQuote: htmlDocumentWithQuote,
currentQuoteDescriptor: currentQuoteDescriptor
)
return MessageBodyQuote(messageBody: quote?.isEmpty ?? true ? messageBody : body, quote: quote)
public static func splitBodyAndQuote(messageBody: String) async -> MessageBodyQuote {
let task = Task {
do {
return try await extractQuotesFromBody(messageBody)
} catch {
DDLogError("Error splitting blockquote \(error)")
return MessageBodyQuote(messageBody: messageBody)
}
return MessageBodyQuote(messageBody: messageBody, quote: nil)
}

let timeoutTask = Task {
Expand All @@ -108,22 +107,19 @@ public enum MessageBodyUtils {
return result
}

private static func findAndRemoveLastParentBlockQuote(htmlDocumentWithoutQuote: Document) throws -> Element? {
let element = try selectLastParentBlockQuote(document: htmlDocumentWithoutQuote)
try element?.remove()
return element
}
private static func extractQuotesFromBody(_ body: String) async throws -> MessageBodyQuote {
let parsedBody = try await SwiftSoup.parse(body)

private static func findFirstKnownParentQuoteDescriptor(htmlDocumentWithoutQuote: Document) throws -> String {
var currentQuoteDescriptor = ""
var quotes = [String]()
for quoteDescriptor in quoteDescriptors {
let quotedContentElement = try htmlDocumentWithoutQuote.select(quoteDescriptor)
if !quotedContentElement.isEmpty() {
try quotedContentElement.remove()
currentQuoteDescriptor = quoteDescriptor
let foundQuotes = try await parsedBody.select(quoteDescriptor)
for foundQuote in foundQuotes {
try quotes.append(foundQuote.outerHtml())
try foundQuote.remove()
}
}
return currentQuoteDescriptor

return try MessageBodyQuote(messageBody: parsedBody.outerHtml(), quotes: quotes)
}

private static func splitBodyAndQuote(blockquoteElement: Element?, htmlDocumentWithQuote: Document,
Expand Down Expand Up @@ -230,18 +226,14 @@ public enum MessageBodyUtils {
private static func anyCssClassContaining(cssClass: String) -> String {
return "[class*=\(cssClass)]"
}

private static func selectLastParentBlockQuote(document: Document) throws -> Element? {
return try document.select("\(blockquote):not(\(blockquote) \(blockquote)):last-of-type").first()
}
}

public struct MessageBodyQuote {
public let messageBody: String
public let quote: String?
public let quotes: [String]

public init(messageBody: String, quote: String?) {
public init(messageBody: String, quotes: [String] = []) {
self.messageBody = messageBody
self.quote = quote
self.quotes = quotes
}
}

0 comments on commit f0f87e0

Please sign in to comment.