diff --git a/otus-16/src/otus_16/homework.clj b/otus-16/src/otus_16/homework.clj index a54c2bf..363a74d 100644 --- a/otus-16/src/otus_16/homework.clj +++ b/otus-16/src/otus_16/homework.clj @@ -1,21 +1,114 @@ -(ns otus-16.homework) +(ns otus-16.homework + (:require + [clojure.java.io :as io] + [clojure.string :as str] + [clojure.core.reducers :as r])) +(def log-regex + #"^([\d+.]+) (\S+) (\S+) (\[[\w+/]+:[\w+:]+ \+\d+\]) \"(.+?)\" (\d{3}) (\d+) \"([^\"]+)\" \"(.+?)\"") +(defn parse-log + [^String log] + (let [[_ ip _ user-name date-time request response size referer user-agent] (re-find log-regex log)] + {:ip ip + :user-name user-name + :date-time date-time + :request request + :response response + :size size + :referer referer + :user-agent user-agent})) -(defn solution [& {:keys [url referrer] - :or {url :all referrer :all}}] - (println "doing something") - {:total-bytes 12345 - ;; если указан параметр url, то в хэш-мапе будет только одно значение - :bytes-by-url {"some-url" 12345} - ;; если указан параметр referrer, то в хэш-мапе будет только одно значение - :urls-by-referrer {"some-referrer" 12345}}) +(defn get-logs-paths + [dir-path] + (->> (io/as-file dir-path) + (file-seq) + (filter #(.isFile %)))) +(defn read-logs + [logs] + (map #(io/reader %) logs)) +(defn ->url [log] + {:pre [(contains? log :request)]} + (let [request (-> log :request)] + (when request + (-> request + (str/split #" ") + second)))) + +(defn filter-by-url + [url logs] + (if (= url :all) + logs + (r/filter #(= url (->url %)) logs))) + +(comment + (r/fold str (filter-by-url "test" [{:request "test"} {:request "test2"} {:request "test"}]))) + +(defn ->referer [log] + {:pre [(contains? log :referer)]} + (-> log :referer)) + +(defn filter-by-referer + [referer logs] + (if (= referer :all) + logs + (r/filter #(= referer (->referer %)) logs))) + +(defn close-logs + [logs] + (map #(.close %) logs)) + +(defn parse-int-safe + [^String s] + (try + (Integer/parseInt s) + (catch Exception _ + 0))) + +(defn sum-size + [log-file] + (->> log-file + (r/map #(parse-int-safe (:size %))) + (r/fold +))) + +(defn process-partition + [logs {:keys [url referrer]}] + (->> logs + (r/map parse-log) + (filter-by-url url) + (filter-by-referer referrer))) + +(defn sum-partition + [partition] + (reduce #(+ %1 (sum-size %2)) + 0 + partition)) + +(defn process-log + [log-file filter-params] + (->> log-file + (line-seq) + (partition-all 5000) + (pmap #(process-partition % filter-params)) + (sum-partition))) + +(defn solution + [& {:keys [url referrer] + :or {url :all referrer :all}}] + (let [logs (-> "./logs" + get-logs-paths + read-logs)] + (->> logs + (pmap #(process-log % {:url url :referrer referrer})) + (reduce +) + (println "Bytes:")) + (close-logs logs))) (comment ;; возможные вызовы функции - (solution) - (solution :url "some-url") - (solution :referrer "some-referrer") - (solution :url "some-url" :referrer "some-referrer")) + (time (solution)) + (time (solution :url "/rss/")) + (solution :referrer "some-referrer") + (solution :url "some-url" :referrer "some-referrer")) diff --git a/otus-16/test/otus_16/homework_test.clj b/otus-16/test/otus_16/homework_test.clj index ed5db27..3eff5a9 100644 --- a/otus-16/test/otus_16/homework_test.clj +++ b/otus-16/test/otus_16/homework_test.clj @@ -1,6 +1,20 @@ (ns otus-16.homework-test (:require [clojure.test :refer :all] - [otus-16.core :refer :all])) + [otus-16.homework :as sut])) +(def stub-row "66.249.68.12 - - [20/Sep/2020:22:11:20 +0000] \"GET /%D0%BC%D0%B0%D0%BB%D0%B5%D0%BD%D1%8C%D0%BA%D0%B8%D0%B9-%D0%BC%D0%B0%D0%BB%D1%8C%D1%87%D0%B8%D0%BA-%D0%BF%D0%BE-%D0%B8%D0%BC%D0%B5%D0%BD%D0%B8-%D0%9D%D1%83%D1%80%D0%B1%D0%B5%D0%BA-%D0%B6%D0%B8%D0%BB-%D0%B2-%D0%BD%D0%B5%D0%B1%D0%BE%D0%BB%D1%8C%D1%88%D0%BE%D0%B9/?p=2 HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.110 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\"") +(deftest parse-log-test + (testing "parse-log" + (is (= {:ip "66.249.68.12", + :user-name "-", + :date-time "[20/Sep/2020:22:11:20 +0000]", + :request + "GET /%D0%BC%D0%B0%D0%BB%D0%B5%D0%BD%D1%8C%D0%BA%D0%B8%D0%B9-%D0%BC%D0%B0%D0%BB%D1%8C%D1%87%D0%B8%D0%BA-%D0%BF%D0%BE-%D0%B8%D0%BC%D0%B5%D0%BD%D0%B8-%D0%9D%D1%83%D1%80%D0%B1%D0%B5%D0%BA-%D0%B6%D0%B8%D0%BB-%D0%B2-%D0%BD%D0%B5%D0%B1%D0%BE%D0%BB%D1%8C%D1%88%D0%BE%D0%B9/?p=2 HTTP/1.1", + :response "304", + :size "0", + :referer "-", + :user-agent + "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.110 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"} + (sut/parse-log stub-row)))))