<a href="https://colab.research.google.com/github/Davies123/gen_test/blob/master/Copy_of_blank_swift.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import Foundation
import FoundationNetworking

let startUrl = URL(string: "https://www.dailyfx.com/")!
let wordToSearch = "Euro"
let maximumPagesToVisit = 20

let semaphore = DispatchSemaphore(value: 0)
var visitedPages: Set<URL> = []
var pagesToVisit: Set<URL> = [startUrl]


func crawl() {
    guard visitedPages.count <= maximumPagesToVisit else {
        print("🏁 Reached max number of pages to visit")
        semaphore.signal()
        return
    }
    guard let pageToVisit = pagesToVisit.popFirst() else {
        print("🏁 No more pages to visit")
        semaphore.signal()
        return
    }
    if visitedPages.contains(pageToVisit) {
        crawl()
    } else {
        visit(page: pageToVisit)
    }
}

func visit(page url: URL) {
    visitedPages.insert(url)

    let task = URLSession.shared.dataTask(with: url) { data, response, error in
        defer { crawl() }
        guard
                let data = data,
                error == nil,
                let document = String(data: data, encoding: .utf8) else { return }
        parse(document: document, url: url)
    }

    print("🔎 Visiting page: \(url)")
    task.resume()
}

func parse(document: String, url: URL) {
    func find(word: String) {
        if document.contains(word) {
            print("✅ Word '\(word)' found at page \(url)")
        }
    }

    func collectLinks() -> [URL] {
        func getMatches(pattern: String, text: String) -> [String] {

            func trim(url: String) -> String {
                return String(url.dropLast()).substring(from: url.index(url.startIndex, offsetBy: "href=\"".count))
            }

            let regex = try! NSRegularExpression(pattern: pattern, options: [.caseInsensitive])
            let matches = regex.matches(in: text, options: [.reportCompletion], range: NSRange(location: 0, length: text.count))
            return matches.map { trim(url: (text as NSString).substring(with: $0.range)) }
        }

        let pattern = "href=\"(http://.*?|https://.*?)\""
        let matches = getMatches(pattern: pattern, text: document)
        return matches.flatMap { URL(string: $0) }
    }

    find(word: wordToSearch)
    collectLinks().forEach { pagesToVisit.insert($0) }
}

crawl()
semaphore.wait()


🔎 Visiting page: https://www.dailyfx.com/
✅ Word 'Euro' found at page https://www.dailyfx.com/
🔎 Visiting page: https://plus.dailyfx.com/home.do?ib=dailyfx4
🔎 Visiting page: https://a.c-dn.net/b/21ajwL.css#3df67a09-ie6.css
🔎 Visiting page: https://a.c-dn.net/b/2pItEj.css#e0123057-dfx.css
✅ Word 'Euro' found at page https://a.c-dn.net/b/2pItEj.css#e0123057-dfx.css
🔎 Visiting page: https://a.c-dn.net/b/3TLLLv.css#c1380573-dfxp.css
🔎 Visiting page: https://www.dailyfxasia.com/cn
✅ Word 'Euro' found at page https://www.dailyfxasia.com/cn
🔎 Visiting page: https://www.youtube.com/user/DailyFXNews?sub_confirmation=1
✅ Word 'Euro' found at page https://www.youtube.com/user/DailyFXNews?sub_confirmation=1
🔎 Visiting page: http://www.youtube.com/user/DailyFXNews
✅ Word 'Euro' found at page http://www.youtube.com/user/DailyFXNews
🔎 Visiting page: https://accounts.google.com/ServiceLogin?uilel=3&amp;service=youtube&amp;passive=true&amp;hl=en&amp;continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fact