Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Update blockquote detection algorithm #1307

Merged
merged 1 commit into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Mail/Views/Thread/Message/MessageBodyView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct MessageBodyView: View {
loadBody(blockRemoteContent: newValue)
}

if presentableBody.quote != nil {
if !presentableBody.quotes.isEmpty {
Button(model.showBlockQuote
? MailResourcesStrings.Localizable.messageHideQuotedText
: MailResourcesStrings.Localizable.messageShowQuotedText) {
Expand Down
2 changes: 1 addition & 1 deletion Mail/Views/Thread/Message/MessageView+Preprocessing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ final class InlineAttachmentWorker {
let updatedPresentableBody = PresentableBody(
body: detachedBody,
compactBody: compactBodyCopy,
quote: presentableBody.quote
quotes: presentableBody.quotes
)

// Mutate DOM if task is active
Expand Down
8 changes: 4 additions & 4 deletions MailCore/Models/Message.swift
Original file line number Diff line number Diff line change
Expand Up @@ -485,19 +485,19 @@ public struct MessageActionResult: Codable {
public struct PresentableBody: Equatable {
public var body: Body?
public var compactBody: String?
public var quote: String?
public var quotes = [String]()

public init(message: Message) {
body = message.body
}

public init(body: Body?, compactBody: String?, quote: String?) {
public init(body: Body?, compactBody: String?, quotes: [String]) {
self.body = body
self.compactBody = compactBody
self.quote = quote
self.quotes = quotes
}

public init(presentableBody: PresentableBody) {
self.init(body: presentableBody.body, compactBody: presentableBody.compactBody, quote: presentableBody.quote)
self.init(body: presentableBody.body, compactBody: presentableBody.compactBody, quotes: presentableBody.quotes)
}
}
80 changes: 36 additions & 44 deletions MailCore/Utils/MessageBodyUtils.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ public enum MessageBodyUtils {
private static let blockquote = "blockquote"

private static var quoteDescriptors = [
// Do not detect this quote as long as we can't detect siblings quotes or else a single reply will be missing among the
// many replies of an Outlook reply "chain", which is worst than simply ignoring it
// "#divRplyFwdMsg", // Outlook
// The reply and forward #divRplyFwdMsg div only contains the header.
// The previous message body is written right next to this div and can't be detected
// "#divRplyFwdMsg", // Outlook
"#isForwardContent",
"#isReplyContent",
"#mailcontent:not(table)",
Expand All @@ -42,7 +42,7 @@ public enum MessageBodyUtils {
anyCssClassContaining(cssClass: "yahoo_quoted"),
anyCssClassContaining(cssClass: "zmail_extra"), // Zoho
"[name=\"quote\"]", // GMX
"blockquote[type=\"cite\"]"
"blockquote[type=\"cite\"]" // iOS and macOS Mail
]

public static func prepareWithPrintOption(message: Message) async -> PresentableBody? {
Expand All @@ -62,39 +62,38 @@ public enum MessageBodyUtils {
return PresentableBody(
body: messageBody,
compactBody: messageBodyQuote.messageBody,
quote: messageBodyQuote.quote
quotes: messageBodyQuote.quotes
)
} catch {
print("error: \(error.localizedDescription)")
}
let messageBodyQuote = MessageBodyQuote(messageBody: bodyValue, quote: nil)
return PresentableBody(body: messageBody, compactBody: messageBodyQuote.messageBody, quote: messageBodyQuote.quote)
let messageBodyQuote = MessageBodyQuote(messageBody: bodyValue)
return PresentableBody(body: messageBody, compactBody: messageBodyQuote.messageBody, quotes: messageBodyQuote.quotes)
}

public static func splitBodyAndQuote(messageBody: String) async -> MessageBodyQuote {
let task = Task {
do {
let htmlDocumentWithQuote = try await SwiftSoup.parse(messageBody)
let htmlDocumentWithoutQuote = try await SwiftSoup.parse(messageBody)
public static func splitContentAndQuote(body: String) async throws -> (String, [String]) {
let parsedBody = try await SwiftSoup.parse(body)

let blockquoteElement = try findAndRemoveLastParentBlockQuote(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote)
var currentQuoteDescriptor =
try findFirstKnownParentQuoteDescriptor(htmlDocumentWithoutQuote: htmlDocumentWithoutQuote)
var quotes = [String]()
for quoteDescriptor in quoteDescriptors {
let foundQuotes = try await parsedBody.select(quoteDescriptor)
for foundQuote in foundQuotes {
try quotes.append(foundQuote.outerHtml())
try foundQuote.remove()
}
}

if currentQuoteDescriptor.isEmpty {
currentQuoteDescriptor = blockquoteElement == nil ? "" : blockquote
}
return try (parsedBody.outerHtml(), quotes)
}

let (body, quote) = try await splitBodyAndQuote(
blockquoteElement: blockquoteElement,
htmlDocumentWithQuote: htmlDocumentWithQuote,
currentQuoteDescriptor: currentQuoteDescriptor
)
return MessageBodyQuote(messageBody: quote?.isEmpty ?? true ? messageBody : body, quote: quote)
public static func splitBodyAndQuote(messageBody: String) async -> MessageBodyQuote {
let task = Task {
do {
return try await extractQuotesFromBody(messageBody)
} catch {
DDLogError("Error splitting blockquote \(error)")
return MessageBodyQuote(messageBody: messageBody)
}
return MessageBodyQuote(messageBody: messageBody, quote: nil)
}

let timeoutTask = Task {
Expand All @@ -108,22 +107,19 @@ public enum MessageBodyUtils {
return result
}

private static func findAndRemoveLastParentBlockQuote(htmlDocumentWithoutQuote: Document) throws -> Element? {
let element = try selectLastParentBlockQuote(document: htmlDocumentWithoutQuote)
try element?.remove()
return element
}
private static func extractQuotesFromBody(_ body: String) async throws -> MessageBodyQuote {
let parsedBody = try await SwiftSoup.parse(body)

private static func findFirstKnownParentQuoteDescriptor(htmlDocumentWithoutQuote: Document) throws -> String {
var currentQuoteDescriptor = ""
var quotes = [String]()
for quoteDescriptor in quoteDescriptors {
let quotedContentElement = try htmlDocumentWithoutQuote.select(quoteDescriptor)
if !quotedContentElement.isEmpty() {
try quotedContentElement.remove()
currentQuoteDescriptor = quoteDescriptor
let foundQuotes = try await parsedBody.select(quoteDescriptor)
for foundQuote in foundQuotes {
try quotes.append(foundQuote.outerHtml())
try foundQuote.remove()
}
}
return currentQuoteDescriptor

return try MessageBodyQuote(messageBody: parsedBody.outerHtml(), quotes: quotes)
}

private static func splitBodyAndQuote(blockquoteElement: Element?, htmlDocumentWithQuote: Document,
Expand Down Expand Up @@ -230,18 +226,14 @@ public enum MessageBodyUtils {
private static func anyCssClassContaining(cssClass: String) -> String {
return "[class*=\(cssClass)]"
}

private static func selectLastParentBlockQuote(document: Document) throws -> Element? {
return try document.select("\(blockquote):not(\(blockquote) \(blockquote)):last-of-type").first()
}
}

public struct MessageBodyQuote {
public let messageBody: String
public let quote: String?
public let quotes: [String]

public init(messageBody: String, quote: String?) {
public init(messageBody: String, quotes: [String] = []) {
self.messageBody = messageBody
self.quote = quote
self.quotes = quotes
}
}
Loading