From 8db7d6a65aa71df4cdc257d74d54042c590fc113 Mon Sep 17 00:00:00 2001 From: bondiano Date: Sun, 2 Jul 2023 11:10:43 +0600 Subject: [PATCH 1/2] finish 8th homework --- otus-16/src/otus_16/homework.clj | 104 ++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 10 deletions(-) diff --git a/otus-16/src/otus_16/homework.clj b/otus-16/src/otus_16/homework.clj index a54c2bf..12120ff 100644 --- a/otus-16/src/otus_16/homework.clj +++ b/otus-16/src/otus_16/homework.clj @@ -1,21 +1,105 @@ -(ns otus-16.homework) +(ns otus-16.homework + (:require + [clojure.java.io :as io] + [clojure.string :as str] + [clojure.core.reducers :as r])) +(defn parse-log + [^String log] + (let [regex #"^([\d+.]+) (\S+) (\S+) (\[[\w+/]+:[\w+:]+ \+\d+\]) \"(.+?)\" (\d{3}) (\d+) \"([^\"]+)\" \"(.+?)\"" + [_ ip user-name date-time request response size referer user-agent] (re-find regex log)] + {:ip ip + :user-name user-name + :date-time date-time + :request request + :response response + :size size + :referer referer + :user-agent user-agent})) +(defn get-logs-paths + [dir-path] + (->> (io/as-file dir-path) + (.listFiles) + (filter #(.isFile %)) + (map #(.getPath %)))) -(defn solution [& {:keys [url referrer] - :or {url :all referrer :all}}] - (println "doing something") - {:total-bytes 12345 - ;; если указан параметр url, то в хэш-мапе будет только одно значение - :bytes-by-url {"some-url" 12345} - ;; если указан параметр referrer, то в хэш-мапе будет только одно значение - :urls-by-referrer {"some-referrer" 12345}}) +(defn read-logs + [logs] + (map #(io/reader %) logs)) +(defn filter-by-url + [url logs] + (if (= url :all) + logs + (letfn [(->url [log] (-> log :request (str/split #" ") second))] + (r/filter #(= url (->url %)) logs)))) +(defn filter-by-referer + [referer logs] + (if (= referer :all) + logs + (letfn [(->referer [log] (-> log :referer))] + (r/filter #(= referer (->referer %)) logs)))) + +(defn close-logs + [logs] + (map #(.close %) logs)) + +(defn parse-int-safe + [^String s] + (try + (Integer/parseInt s) + (catch Exception _ + 0))) + +(defn sum-size + [log-file] + (->> log-file + (r/map #(parse-int-safe (:size %))) + (r/fold +))) + +(defn sum-partition + [partition] + (reduce #(+ %1 (sum-size %2)) + 0 + partition)) + +(defn process-log + [log-file & + {:keys [url referrer]}] + (->> log-file + (line-seq) + (partition-all 5000) + (pmap #(map parse-log %)) + (filter-by-url url) + (filter-by-referer referrer) + (sum-partition))) + +(comment + (time + (let [file (io/reader "./logs/access.log.2")] + (-> file + (process-log {:url :all :referrer :all}) + (println)) + (.close file)))) + + +(defn solution + [& {:keys [url referrer] + :or {url :all referrer :all}}] + (let [logs (-> "./logs" + get-logs-paths + read-logs)] + (->> logs + (pmap #(process-log % :url url :referrer referrer)) + (reduce +) + (println "Bytes:")) + (close-logs logs))) (comment ;; возможные вызовы функции - (solution) + (time (solution)) (solution :url "some-url") (solution :referrer "some-referrer") (solution :url "some-url" :referrer "some-referrer")) From 449c025010d548775b575d95c4c156b9edfe9f50 Mon Sep 17 00:00:00 2001 From: bondiano Date: Thu, 13 Jul 2023 12:42:15 +0600 Subject: [PATCH 2/2] fix comment after review --- otus-16/src/otus_16/homework.clj | 75 ++++++++++++++------------ otus-16/test/otus_16/homework_test.clj | 16 +++++- 2 files changed, 57 insertions(+), 34 deletions(-) diff --git a/otus-16/src/otus_16/homework.clj b/otus-16/src/otus_16/homework.clj index 12120ff..363a74d 100644 --- a/otus-16/src/otus_16/homework.clj +++ b/otus-16/src/otus_16/homework.clj @@ -4,10 +4,12 @@ [clojure.string :as str] [clojure.core.reducers :as r])) +(def log-regex + #"^([\d+.]+) (\S+) (\S+) (\[[\w+/]+:[\w+:]+ \+\d+\]) \"(.+?)\" (\d{3}) (\d+) \"([^\"]+)\" \"(.+?)\"") + (defn parse-log [^String log] - (let [regex #"^([\d+.]+) (\S+) (\S+) (\[[\w+/]+:[\w+:]+ \+\d+\]) \"(.+?)\" (\d{3}) (\d+) \"([^\"]+)\" \"(.+?)\"" - [_ ip user-name date-time request response size referer user-agent] (re-find regex log)] + (let [[_ ip _ user-name date-time request response size referer user-agent] (re-find log-regex log)] {:ip ip :user-name user-name :date-time date-time @@ -19,28 +21,40 @@ (defn get-logs-paths [dir-path] - (->> (io/as-file dir-path) - (.listFiles) - (filter #(.isFile %)) - (map #(.getPath %)))) + (->> (io/as-file dir-path) + (file-seq) + (filter #(.isFile %)))) (defn read-logs [logs] (map #(io/reader %) logs)) +(defn ->url [log] + {:pre [(contains? log :request)]} + (let [request (-> log :request)] + (when request + (-> request + (str/split #" ") + second)))) + (defn filter-by-url [url logs] (if (= url :all) logs - (letfn [(->url [log] (-> log :request (str/split #" ") second))] - (r/filter #(= url (->url %)) logs)))) + (r/filter #(= url (->url %)) logs))) + +(comment + (r/fold str (filter-by-url "test" [{:request "test"} {:request "test2"} {:request "test"}]))) + +(defn ->referer [log] + {:pre [(contains? log :referer)]} + (-> log :referer)) (defn filter-by-referer [referer logs] (if (= referer :all) logs - (letfn [(->referer [log] (-> log :referer))] - (r/filter #(= referer (->referer %)) logs)))) + (r/filter #(= referer (->referer %)) logs))) (defn close-logs [logs] @@ -59,6 +73,13 @@ (r/map #(parse-int-safe (:size %))) (r/fold +))) +(defn process-partition + [logs {:keys [url referrer]}] + (->> logs + (r/map parse-log) + (filter-by-url url) + (filter-by-referer referrer))) + (defn sum-partition [partition] (reduce #(+ %1 (sum-size %2)) @@ -66,24 +87,12 @@ partition)) (defn process-log - [log-file & - {:keys [url referrer]}] - (->> log-file - (line-seq) - (partition-all 5000) - (pmap #(map parse-log %)) - (filter-by-url url) - (filter-by-referer referrer) - (sum-partition))) - -(comment - (time - (let [file (io/reader "./logs/access.log.2")] - (-> file - (process-log {:url :all :referrer :all}) - (println)) - (.close file)))) - + [log-file filter-params] + (->> log-file + (line-seq) + (partition-all 5000) + (pmap #(process-partition % filter-params)) + (sum-partition))) (defn solution [& {:keys [url referrer] @@ -92,14 +101,14 @@ get-logs-paths read-logs)] (->> logs - (pmap #(process-log % :url url :referrer referrer)) + (pmap #(process-log % {:url url :referrer referrer})) (reduce +) (println "Bytes:")) (close-logs logs))) (comment ;; возможные вызовы функции - (time (solution)) - (solution :url "some-url") - (solution :referrer "some-referrer") - (solution :url "some-url" :referrer "some-referrer")) + (time (solution)) + (time (solution :url "/rss/")) + (solution :referrer "some-referrer") + (solution :url "some-url" :referrer "some-referrer")) diff --git a/otus-16/test/otus_16/homework_test.clj b/otus-16/test/otus_16/homework_test.clj index ed5db27..3eff5a9 100644 --- a/otus-16/test/otus_16/homework_test.clj +++ b/otus-16/test/otus_16/homework_test.clj @@ -1,6 +1,20 @@ (ns otus-16.homework-test (:require [clojure.test :refer :all] - [otus-16.core :refer :all])) + [otus-16.homework :as sut])) +(def stub-row "66.249.68.12 - - [20/Sep/2020:22:11:20 +0000] \"GET /%D0%BC%D0%B0%D0%BB%D0%B5%D0%BD%D1%8C%D0%BA%D0%B8%D0%B9-%D0%BC%D0%B0%D0%BB%D1%8C%D1%87%D0%B8%D0%BA-%D0%BF%D0%BE-%D0%B8%D0%BC%D0%B5%D0%BD%D0%B8-%D0%9D%D1%83%D1%80%D0%B1%D0%B5%D0%BA-%D0%B6%D0%B8%D0%BB-%D0%B2-%D0%BD%D0%B5%D0%B1%D0%BE%D0%BB%D1%8C%D1%88%D0%BE%D0%B9/?p=2 HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.110 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\"") +(deftest parse-log-test + (testing "parse-log" + (is (= {:ip "66.249.68.12", + :user-name "-", + :date-time "[20/Sep/2020:22:11:20 +0000]", + :request + "GET /%D0%BC%D0%B0%D0%BB%D0%B5%D0%BD%D1%8C%D0%BA%D0%B8%D0%B9-%D0%BC%D0%B0%D0%BB%D1%8C%D1%87%D0%B8%D0%BA-%D0%BF%D0%BE-%D0%B8%D0%BC%D0%B5%D0%BD%D0%B8-%D0%9D%D1%83%D1%80%D0%B1%D0%B5%D0%BA-%D0%B6%D0%B8%D0%BB-%D0%B2-%D0%BD%D0%B5%D0%B1%D0%BE%D0%BB%D1%8C%D1%88%D0%BE%D0%B9/?p=2 HTTP/1.1", + :response "304", + :size "0", + :referer "-", + :user-agent + "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.110 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"} + (sut/parse-log stub-row)))))