-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.clj
47 lines (35 loc) · 1.25 KB
/
utils.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
(ns link-checker.utils
(:require [clojure.string :as string]))
(def regex-char-esc-smap
(let [esc-chars "()&^%$#!?*.+"]
(zipmap esc-chars
(map #(str "\\" %) esc-chars))))
(defn str-to-pattern [s]
(let [s (string/replace s #"&" "&")]
(->> s
(replace regex-char-esc-smap)
(reduce str))))
(defn good-ref? [ref html]
(let [ref (if (string/starts-with? ref "#")
(subs ref 1)
ref)
ref (str-to-pattern ref)
pat (re-pattern (str "id=['\"]" ref "['\"]|name=['\"]" ref "['\"]"))]
(re-find pat html)))
(defn bad-ref? [ref html]
(not (good-ref? ref html)))
(defn drop-ref [url]
(first (string/split url #"#")))
(defn revert-result [broken-links]
(let [
;; reverting
broken-links (mapcat (fn [link]
(map (fn [from-link]
(assoc from-link :bad-url (:url link)))
(:from link)))
broken-links)
;; delete refs from urls: http://domain/path#ref -> http://domain/path
broken-links (map (fn [link]
(update link :url drop-ref))
broken-links)]
(distinct broken-links)))