-
Notifications
You must be signed in to change notification settings - Fork 0
/
irc-log-to-html.clj
172 lines (158 loc) · 7.87 KB
/
irc-log-to-html.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
(ns irc-log
(:use [clojure.string :as str :only []]
[clojure.java.shell :only [sh]])
(:import (java.util Date)
(java.text SimpleDateFormat)
(java.nio ByteBuffer)
(java.io File BufferedReader FileInputStream FileOutputStream
InputStreamReader OutputStreamWriter)))
(def #^SimpleDateFormat file-name-fmt (SimpleDateFormat. "yyyy-MM-dd"))
(def #^SimpleDateFormat html-fmt (SimpleDateFormat. "MMM dd yyyy"))
(defn xhtml [v]
(let [astr (fn [m] (apply str (mapcat #(list \ (name (key %))
\= \" (val %) \") m)))]
(cond (and (vector? v) (keyword? (first v)))
(let [[i1 & v2] v
[i2 & v3] v2
tag+attrs (str \< (name i1) (when (map? i2) (astr i2)))
content (if (map? i2) v3 v2)]
(cond (seq content) (str tag+attrs ">" (xhtml [content])
"</" (name i1) \>)
(= :script i1) (str tag+attrs "></script>")
:else (str tag+attrs " />")))
(or (vector? v) (seq? v))
(apply str (map xhtml v))
:else v)))
(defmacro hash-syms [& syms]
(cons 'hash-map (mapcat #(list (keyword (name %)) %) syms)))
(def escape-map {\& "&", \< "<", \> ">",
\" """, \newline "<br />"})
(def link-re #"(?:https?://|www\.)(?:<[^>]*>|[^<>\s])*(?=(?:>|<|[.()\[\]])*(?:\s|$))")
(def wrap-re #"(?:<[^>]*>|&[^;]*;|[^/&?]){1,50}[/&?]*")
(defn text-to-html [text]
(let [escaped (apply str (map #(or (escape-map %) %) text))
linked (str/replace escaped link-re
(fn [url]
(let [urltext (reduce #(str %1 "<wbr />" %2)
(re-seq wrap-re url))
linktext (xhtml [:a {:href url
:class "nm"}
urltext])]
(str/replace linktext "$" "\\$"))))]
(str linked "\n")))
(defn #^String html-header [date]
(let [datestr (.format html-fmt date)]
(str "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n"
" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n"
"<html xmlns=\"http://www.w3.org/1999/xhtml\">\n"
(xhtml
[:head [:title "#clojure log - " datestr]
[:meta {:http-equiv "Content-Type"
:content "application/xhtml+xml; charset=UTF-8"}]
[:meta {:name "viewport"
:content (str "width=device-width,"
"minimum-scale=1.0,"
"maximum-scale=1.0")}]
[:link {:type "text/css" :href "irc.css"
:rel "stylesheet"}]])
"<body>"
(xhtml [:h1 "#clojure log - " datestr])
"<div id=\"narrow\">"
(xhtml [
[:dl
[:dt
[:form {:action "http://www.google.com/cse" :id "cse-search-box"}
[:div
[:input {:type "hidden" :name "cx"
:value "partner-pub-1237864095616304:e7qm3gycp2b"}]
[:input {:type "hidden" :name "ie" :value "UTF-8"}]
[:input {:type "text" :id "q" :name "q" :size "10"}]
[:input {:type "submit" :id "sa" :name "sa" :value "Go"}]]]
[:script {:type "text/javascript"
:src "http://www.google.com/coop/cse/brand?form=cse-search-box&lang=en"}]
[:a {:id "book", :href "http://joyofclojure.com/"}
[:i "The Joy of Clojure"]]]
[:dd [:a {:href "http://clojure.org/"} "Main Clojure site"]]
[:dd [:a {:href "http://groups.google.com/group/clojure"}
"Google Group"]]
[:dd [:a {:href "irc://irc.freenode.net/clojure"} "IRC"]]
[:dd [:a {:href "/date/"} "List of all logged dates"]]]
[:div {:id "nav-head" :class "nav"}
[:noscript "Turn on JavaScript for date navigation."]
" "]])
"<div id=\"main\">")))
(defn #^String html-footer [date]
(str "</div>"
(xhtml
[[:div {:id "nav-foot" :class "nav"} " "]
[:div {:class "foot"} "Logging service provided by "
[:a {:class "nm" :href "http://n01se.net/"} "n01se.net"]]
[:script {:type "text/javascript" :src "irc.js"}]])
"</div></body></html>\n"))
(defn minutes [timestr]
(Integer/parseInt (second (re-seq #"\d+" timestr))))
(defn html-post [prevpost {:keys [timestr speak emote text imc]}]
(let [htmltext (text-to-html text)
prevminute (if-let [ptime (:timestr prevpost)] (minutes ptime) 99)]
(xhtml
[:p (when (re-find #"rhickey_*" (str speak)) {:class "bdfl"})
[:a (merge {:name (str timestr (when (< 0 imc) (char (+ imc 96))))}
(when (<= 5 (- (minutes timestr) prevminute)) {:class "nh"}))
(second (re-find #"^0?(.*)" timestr))]
" "
(when (or emote (not= speak (:speak prevpost)))
[:b (if emote "*" (str speak ":")) " "])
(if speak htmltext [[:em emote] " " htmltext])])))
(defn parse-post [prevs line]
(if-let [[_ timestr speak emote text]
(re-matches #"(..:..)(?: \S+:)? (?:<.(\S+)> | \* (\S+))(.*)" line)]
(let [imc (let [p (peek prevs)]
(if (= timestr (:timestr p))
(+ 1 (:imc p))
0))
offset (count prevs)]
;(println line)
;(prn (hash-syms timestr speak emote text offset imc))
(conj prevs (hash-syms timestr speak emote text offset imc)))
prevs))
(defn log-to-html [date log-file html-file]
;(println "Parsing" log-file)
(let [goodposts (with-open [in (-> (FileInputStream. log-file)
(InputStreamReader. "UTF-8")
BufferedReader.)]
(reduce parse-post [] (line-seq in)))]
(when-not (empty? goodposts)
(with-open [out (-> (FileOutputStream. html-file)
(OutputStreamWriter. "UTF-8"))]
(.write out (html-header date))
(doseq [string (map html-post
(cons nil goodposts) goodposts)]
(.write out #^String string))
(.write out (html-footer date)))
html-file)))
(defn update-html
"Converts the log files in log-dir to html and saves them to
html-dir. Starts with the latest (alphabetically) and works
backwards until an html file is found that was modified more
recently than the log file. Returns a list of files updated, the
first in the list is the latest (appropriate for use as main page)"
[log-dir html-dir]
(let [log-files (reverse (sort-by #(.getName %) (.listFiles log-dir)))
base-names (map #((re-find #"^(.*)\.log" (.getName %)) 1) log-files)
html-files (map #(File. html-dir (str % ".html")) base-names)
dates (map #(.parse file-name-fmt %) base-names)
html-to-sync (for [[l h] (map vector log-files html-files)
:while (<= (.lastModified h) (.lastModified l))]
h)]
(filter identity (doall (pmap log-to-html dates log-files html-to-sync)))))
(defn update-remote-html [log-dir html-dir link-name rsync-target]
(when-let [[latest :as html-files] (seq (map #(.getPath %)
(update-html log-dir html-dir)))]
(sh "ln" "-sf" latest link-name)
(println (sh "rsync" "-ua" "--files-from=-" "." rsync-target
:in (str/join "\n" (cons link-name html-files))))))
(update-remote-html
(File. "/home/chouser/commlog/irssi/clojure")
(File. "date") "index.html"
"clojurelog@n01se.net:clojure-log.n01se.net/")
(shutdown-agents)