Skip to content

Commit

Permalink
FIXED: Amazon links did not contain og tags with the default user agent
Browse files Browse the repository at this point in the history
- Added 'userAgent' with a default value to preserve current behavior
- Included the 'googleBotUserAgent' static which can be used to override the default
- Not only does it include OG tags but the pages are way smaller due to Amazon optimizing for google search indexing (huge performance boost to SLP)
- Added unit tests for Amazon links and tested across several other major ecommerce sites
  • Loading branch information
BrightChad committed Nov 13, 2021
1 parent 250e850 commit 546b86e
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 4 deletions.
18 changes: 14 additions & 4 deletions Sources/SwiftLinkPreview.swift
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,22 @@ open class SwiftLinkPreview: NSObject {
public let workQueue: DispatchQueue
public let responseQueue: DispatchQueue
public let cache: Cache
public let userAgent: String

public static let defaultWorkQueue = DispatchQueue.global(qos: .userInitiated)

public static let defaultUserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
// Google Bot https://github.com/jhy/jsoup/issues/976
public static let googleBotUserAgent = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"

// MARK: - Constructor

//Swift-only init with default parameters
@nonobjc public init(session: URLSession = URLSession.shared, workQueue: DispatchQueue = SwiftLinkPreview.defaultWorkQueue, responseQueue: DispatchQueue = DispatchQueue.main, cache: Cache = DisabledCache.instance) {
@nonobjc public init(session: URLSession = URLSession.shared, workQueue: DispatchQueue = SwiftLinkPreview.defaultWorkQueue, responseQueue: DispatchQueue = DispatchQueue.main, cache: Cache = DisabledCache.instance, userAgent: String = SwiftLinkPreview.defaultUserAgent) {
self.workQueue = workQueue
self.responseQueue = responseQueue
self.cache = cache
self.session = session
self.userAgent = userAgent
}

//Objective-C init with default parameters
Expand All @@ -57,25 +62,29 @@ open class SwiftLinkPreview: NSObject {
let _workQueue: DispatchQueue = SwiftLinkPreview.defaultWorkQueue
let _responseQueue: DispatchQueue = DispatchQueue.main
let _cache: Cache = DisabledCache.instance
let _userAgent: String = SwiftLinkPreview.defaultUserAgent

self.workQueue = _workQueue
self.responseQueue = _responseQueue
self.cache = _cache
self.session = _session
self.userAgent = _userAgent
}

//Objective-C init with paramaters. nil objects will default. Timeout values are ignored if InMemoryCache is disabled.
@objc public init(session: URLSession?, workQueue: DispatchQueue?, responseQueue: DispatchQueue?, disableInMemoryCache: Bool, cacheInvalidationTimeout: TimeInterval, cacheCleanupInterval: TimeInterval) {
@objc public init(session: URLSession?, workQueue: DispatchQueue?, responseQueue: DispatchQueue?, disableInMemoryCache: Bool, cacheInvalidationTimeout: TimeInterval, cacheCleanupInterval: TimeInterval, userAgent: String?) {

let _session = session ?? URLSession.shared
let _workQueue = workQueue ?? SwiftLinkPreview.defaultWorkQueue
let _responseQueue = responseQueue ?? DispatchQueue.main
let _cache: Cache = disableInMemoryCache ? DisabledCache.instance : InMemoryCache(invalidationTimeout: cacheInvalidationTimeout, cleanupInterval: cacheCleanupInterval)
let _userAgent = userAgent ?? SwiftLinkPreview.defaultUserAgent

self.workQueue = _workQueue
self.responseQueue = _responseQueue
self.cache = _cache
self.session = _session
self.userAgent = _userAgent

}

Expand Down Expand Up @@ -380,7 +389,8 @@ extension SwiftLinkPreview {
}
var request = URLRequest( url: sourceUrl )
request.addValue("text/html,application/xhtml+xml,application/xml", forHTTPHeaderField: "Accept")
request.addValue("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36", forHTTPHeaderField: "user-Agent")
request.addValue(self.userAgent, forHTTPHeaderField: "user-Agent")

let (data, urlResponse, error) = session.synchronousDataTask(with: request )
if let error = error {
if !cancellable.isCancelled {
Expand Down
55 changes: 55 additions & 0 deletions SwiftLinkPreviewTests/HugeTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,60 @@ class HugeTests: XCTestCase {
}

}

// MARK: - Amazon
func testAmazonLinksWithDefaultUserAgent() {

// Amazon links are huge and serve up very different html based on the user agent string
// Some user agents don't contain og tags and will fail to locate title and images
let amazonUrl = "https://www.amazon.com/Beginning-HTML5-CSS3-Dummies-Tittel/dp/1118657209/"
let expectation = self.expectation(description: "Loading web page")
var result:Response?

let updatedSlp = SwiftLinkPreview(userAgent: SwiftLinkPreview.googleBotUserAgent)

updatedSlp.preview(amazonUrl) {

result = $0
expectation.fulfill()

} onError: { error in

print(error)
XCTAssertNil(error)

}

waitForExpectations(timeout: 15, handler: nil)
XCTAssert(!result!.title!.trim.isEmpty)
XCTAssertNotNil(result!.image)

}

func testAmazonLinksWithOriginalSlpUserAgent() {

// Amazon links are huge and serve up very different html based on the user agent string
// Some user agents don't contain og tags and will fail to locate title and images
let amazonUrl = "https://www.amazon.com/Beginning-HTML5-CSS3-Dummies-Tittel/dp/1118657209/"
let expectation = self.expectation(description: "Loading web page")
var result:Response?

slp.preview(amazonUrl) {

result = $0
expectation.fulfill()

} onError: { error in

print(error)
XCTAssertNil(error)

}

waitForExpectations(timeout: 15, handler: nil)
XCTAssert(!result!.title!.trim.isEmpty)
XCTAssertNil(result!.image)

}

}

0 comments on commit 546b86e

Please sign in to comment.