Skip to content

Commit

Permalink
Merge branch 'release/release-0.3'
Browse files Browse the repository at this point in the history
  • Loading branch information
ksseono committed Dec 1, 2018
2 parents d37e319 + 9206ee5 commit 7b467a2
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 65 deletions.
6 changes: 3 additions & 3 deletions README.md
Expand Up @@ -7,7 +7,7 @@ A [Open Korean Text Processor](https://github.com/open-korean-text/open-korean-t

## Dependencies

* [org.openkoreantext/open-korean-text 2.3.0](https://github.com/open-korean-text/open-korean-text/releases/tag/open-korean-text-2.3.0)
* [org.openkoreantext/open-korean-text 2.3.1](https://github.com/open-korean-text/open-korean-text/releases/tag/open-korean-text-2.3.1)


## Get Started
Expand All @@ -17,7 +17,7 @@ A [Open Korean Text Processor](https://github.com/open-korean-text/open-korean-t
[Leiningen](https://leiningen.org) dependency in `project.clj` (from [Clojars](https://clojars.org/open-korean-text-4clj)):

```clojure
[open-korean-text-4clj "0.2.5"]
[open-korean-text-4clj "0.3"]
```

[Maven](http://maven.apache.org/) dependency information in pom.xml:
Expand All @@ -26,7 +26,7 @@ A [Open Korean Text Processor](https://github.com/open-korean-text/open-korean-t
<dependency>
<groupId>open-korean-text-4clj</groupId>
<artifactId>open-korean-text-4clj</artifactId>
<version>0.2.5</version>
<version>0.3</version>
</dependency>
```

Expand Down
7 changes: 2 additions & 5 deletions project.clj
@@ -1,14 +1,11 @@
(defproject open-korean-text-4clj "0.2.5"
(defproject open-korean-text-4clj "0.3"
:description "Open Korean Text Processor wrapper for Clojure"
:url "http://github.com/open-korean-text/open-korean-text-4clj"
:license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"}

:dependencies [[org.clojure/clojure "1.9.0"]
[org.openkoreantext/open-korean-text "2.3.0"]]

:profiles {:dev {:dependencies [[midje "1.9.1"]]
:plugins [[lein-midje "3.2.1"]]}}
[org.openkoreantext/open-korean-text "2.3.1"]]

:repl-options {:init-ns open-korean-text-4clj.core}

Expand Down
126 changes: 69 additions & 57 deletions test/open_korean_text_4clj/core_test.clj
@@ -1,82 +1,94 @@
(ns open-korean-text-4clj.core-test
(:require [midje.sweet :refer :all]
(:require [clojure.test :refer :all]
[open-korean-text-4clj.core :refer :all])
(:import [org.openkoreantext.processor KoreanPosJava]))

(fact "test normalize"
(normalize "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ") => "한국어를 처리하는 예시입니다ㅋㅋㅋ")
(deftest normalize-test
(is (= (normalize "한국어를 처리하는 예시입니닼ㅋㅋㅋㅋㅋ")
"한국어를 처리하는 예시입니다ㅋㅋㅋ")))


(facts "test tokenize"
(deftest tokenize-test
(testing "default operation"
(let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ")]
(is (= (-> tokens count)
8))
(is (= (-> tokens (get 6) :text)
""))))

(fact "default operation"
(let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ")]
(-> tokens count) => 8
(-> tokens (get 6) :text) => ""))
(testing "with normalization"
(let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :norm true)]
(is (= (-> tokens count)
7))
(is (= (-> tokens (get 5) :text)
"입니다"))))

(fact "with normalization"
(let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :norm true)]
(-> tokens count) => 7
(-> tokens (get 5) :text) => "입니다"))
(testing "with normalization & stemming"
(let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :norm true :stem true)]
(is (= (-> tokens count)
7))
(is (= (-> tokens (get 5) :text)
"이다"))))

(fact "with normalization & stemming"
(let [tokens (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :norm true :stem true)]
(-> tokens count) => 7
(-> tokens (get 5) :text) => "이다"))
(testing "as-strs (return texts only)"
(is (.contains (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true)
""))

(fact "as-strs (return texts only)"
(tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true)
=> (contains "")
(is (.contains (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true :norm true)
"입니다"))

(tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true :norm true)
=> (contains "입니다")
(is (.contains (tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true :norm true :stem true)
"이다"))))

(tokenize "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true :norm true :stem true)
=> (contains "이다")))

(deftest tokenize-top-n-test
(let [tokens (tokenize-top-n "대선 후보" 3)]
(is (= (-> tokens count)
2))
(is (= (-> tokens first first first :text)
"대선"))))

(fact "tokenize-top-n"
(let [tokens (tokenize-top-n "대선 후보" 3)]
(-> tokens count) => 2
(-> tokens first first first :text) => "대선"))

(fact "detokenize"
(let [s (detokenize ["연세", "대학교", "보건", "대학원","","오신","","","환영","합니다", "!"])]
s => (contains "연세대학교 보건 대학원")
s => (contains "환영합니다")))
(deftest detokenize-test
(let [s (detokenize ["연세", "대학교", "보건", "대학원","","오신","","","환영","합니다", "!"])]
(is (.contains s "연세대학교 보건 대학원"))
(is (.contains s "환영합니다!"))))

(facts "extract-phrases"

(fact "default operation"
(let [phrases (extract-phrases "한국어를 처리하는 예시입니닼ㅋㅋ")]
(-> phrases count) => 4
(-> phrases (get 2) :text) => "처리하는 예시"))
(deftest extract-phrases-test
(testing "default operation"
(let [phrases (extract-phrases "한국어를 처리하는 예시입니닼ㅋㅋ")]
(is (= (-> phrases count)
4))
(is (= (-> phrases (get 2) :text)
"처리하는 예시"))))

(fact "as-strs (return texts only)"
(extract-phrases "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true)
=> (contains "처리하는 예시")))
(testing "as-strs (return texts only)"
(is (.contains (extract-phrases "한국어를 처리하는 예시입니닼ㅋㅋ" :as-strs true)
"처리하는 예시"))))

(fact "split-sentences"
(-> (split-sentences "가을이다! 남자는 가을을 탄다...... 그렇지? 루루야! 버버리코트 사러 가자!!!!")
(get 1)
:text)
=>
"남자는 가을을 탄다......")
(deftest split-sentences-test
(is (= (-> (split-sentences "가을이다! 남자는 가을을 탄다...... 그렇지? 루루야! 버버리코트 사러 가자!!!!")
(get 1)
:text)
"남자는 가을을 탄다......")))

(fact "add-nouns-to-dictionary"
(-> (tokenize "불방망이") (get 0) :text)
=> ""

(add-nouns-to-dictionary ["불방망이"])
(deftest add-nouns-to-dictionary-test
(is (= (-> (tokenize "불방망이") (get 0) :text)
""))

(-> (tokenize "불방망이") (get 0) :text)
=> "불방망이")
(add-nouns-to-dictionary ["불방망이"])

(fact "add-words-to-dictionary"
(-> (tokenize "그라믄 당신 먼저 얼렁 가이소") (get 0) :text)
=> ""
(is (= (-> (tokenize "불방망이") (get 0) :text)
"불방망이")))

(add-words-to-dictionary KoreanPosJava/Conjunction ["그라믄"])

(-> (tokenize "그라믄 당신 먼저 얼렁 가이소") (get 0) :text)
=> "그라믄")
(deftest add-words-to-dictionary-test
(is (= (-> (tokenize "그라믄 당신 먼저 얼렁 가이소") (get 0) :text)
""))

(add-words-to-dictionary KoreanPosJava/Conjunction ["그라믄"])

(is (= (-> (tokenize "그라믄 당신 먼저 얼렁 가이소") (get 0) :text)
"그라믄")))

0 comments on commit 7b467a2

Please sign in to comment.