/
kurkumator.el
55 lines (44 loc) · 3.66 KB
/
kurkumator.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
(require 'levenshtein)
(setq ko-list (list "азаза" "анархия" "анон" "анус" "аутизм" "аутист" "бан" "батхерт" "биекция" "биткоин" "бнв" "бомбануло" "бомбит" "борщ" "бугурт" "будущее" "бухло" "быдло" "ватник" "ветеран" "вин" "внезапно" "гей" "гейос" "говно" "двач" "дедфуд" "десу" "диван" "для" "дрочить" "ебать" "жопа" "жопу" "забери" "заблевал" "задрот" "запили" "затралел" "зафорсил" "зашквареный" "збс" "итт" "кококо" "костыли" "крипто" "кукарек" "кун" "куркума" "лайк" "лалка" "ле" "лизнул" "линуск" "лисп" "личкрафт" "лойс" "лол" "лох" "лул" "лях" "маман" "мамка" "матан" "моар" "мюсли" "нано" "наркоман" "нассал" "нахуй" "нинужно" "ня" "няшмяш" "обосрал" "обосрался" "опущено" "отсос" "пердак" "пердолик" "петух" "петушок" "петушон" "пидон" "пидор" "пидорас" "пидораха" "пизда" "пиздолис" "писечка" "полизал" "полущ" "порст" "поссал" "потрачено" "потрачено" "ппц" "прост" "профит" "пруф" "прыщи" "пук" "пукан" "путин" "рабство" "разорвало" "рак" "рашка" "репост" "сасай" "свежо" "сиськи" "скатываешь" "скатывать" "слил" "соси" "сосноль" "соснул" "спайс" "спали" "спалил" "сперма" "среньк" "сыч" "твою" "тебя" "трал" "транс" "трап" "тред" "тян" "уау" "удобно" "уебать" "упоротый" "упороть" "успех" "фейл" "форс" "форсил" "форсить" "функциональщик" "хаскель" "хуйта" "хуле" "чат" "членодевка" "чухан" "шиндошс" "шкварить" "шлюха" "шлюха" "эпик"))
(defvar *max-diff* 10)
(defvar *cons-string* "бвгджзйклмнпрстфхцчшщ")
(defvar *vowel-string* "аеёиоуыэюя")
(defun contract (word)
"Remove all vowels and squash repetetive consonants"
(let* ((devow-rx (format "\\([%s]*\\)" *vowel-string*))
(squeeze-rx (format "\\([%s]\\)\\1+" *cons-string*))
(devow (replace-regexp-in-string devow-rx "" word)))
(replace-regexp-in-string squeeze-rx "\\1" devow)))
(defvar co-list (mapcar 'contract ko-list))
(defvar co-ko-list (mapcar* 'cons co-list ko-list))
(defun find-nearest (word)
(setq res nil)
(setq ldiff 1024)
(dolist (cw co-ko-list res)
(let ((cdiff (levenshtein-distance (contract word) (car cw))))
(when (and (> ldiff cdiff) (< cdiff *max-diff*))
(progn
(setq res (cdr cw))
(setq ldiff cdiff))))))
(defun koko-word (start end)
(goto-char start)
(when (> (- end start) 3) ;; don't touch words widh 3 or less letters
(let* ((cand (buffer-substring start end))
(res (find-nearest cand)))
(when res
(kill-region start end)
(insert res)))))
(defun kokoify (start end)
(interactive "r")
(save-excursion
(setq foundp t)
(goto-char start)
(while foundp
(setq foundp
(re-search-forward "\\b\\([а-яА-Я]+\\)\\b" end t))
(if foundp
(progn
(koko-word (match-beginning 1) (match-end 1))
(goto-char (match-end 1)))))))
;; select the region and call M-x kokoify
(provide 'kurkumator)