From 4a3e4944372a8439bf49f77e96c529632870a6e2 Mon Sep 17 00:00:00 2001 From: BrewingWeasel Date: Tue, 14 Nov 2023 20:21:57 -0500 Subject: [PATCH 1/4] feat: add wiktionary dictionary --- Cargo.lock | 50 ++++++++++++++++++++++ shared/src/lib.rs | 1 + src-tauri/Cargo.toml | 1 + src-tauri/src/dictionary.rs | 37 ++++++++++++++++ src-ui/src/settings/dictionary_settings.rs | 27 ++++++++++++ 5 files changed, 116 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 6e89884..fbee877 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,6 +103,7 @@ dependencies = [ "chrono", "dirs", "reqwest", + "select", "serde", "serde_json", "shared", @@ -220,6 +221,21 @@ version = "0.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -2278,6 +2294,18 @@ dependencies = [ "tendril", ] +[[package]] +name = "markup5ever_rcdom" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9521dd6750f8e80ee6c53d65e2e4656d7de37064f3a7a5d2d11d05df93839c2" +dependencies = [ + "html5ever 0.26.0", + "markup5ever 0.11.0", + "tendril", + "xml5ever", +] + [[package]] name = "matchers" version = "0.1.0" @@ -3515,6 +3543,17 @@ dependencies = [ "libc", ] +[[package]] +name = "select" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f9da09dc3f4dfdb6374cbffff7a2cffcec316874d4429899eefdc97b3b94dcd" +dependencies = [ + "bit-set", + "html5ever 0.26.0", + "markup5ever_rcdom", +] + [[package]] name = "selectors" version = "0.22.0" @@ -5322,6 +5361,17 @@ dependencies = [ "libc", ] +[[package]] +name = "xml5ever" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4034e1d05af98b51ad7214527730626f019682d797ba38b51689212118d8e650" +dependencies = [ + "log", + "mac", + "markup5ever 0.11.0", +] + [[package]] name = "xxhash-rust" version = "0.8.7" diff --git a/shared/src/lib.rs b/shared/src/lib.rs index a61f142..8e253f8 100644 --- a/shared/src/lib.rs +++ b/shared/src/lib.rs @@ -23,6 +23,7 @@ pub enum Dictionary { File(String, DictFileType), Url(String), Command(String), + Wiktionary(String), } #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 0384293..b9d795f 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -25,6 +25,7 @@ spacy-parsing = { path = "../spacy-parsing" } toml = "0.8.2" dirs = "5.0.1" chrono = { version = "0.4.31", features = ["serde"] } +select = "0.6.0" [features] diff --git a/src-tauri/src/dictionary.rs b/src-tauri/src/dictionary.rs index 5b63e8a..6f147ce 100644 --- a/src-tauri/src/dictionary.rs +++ b/src-tauri/src/dictionary.rs @@ -1,3 +1,7 @@ +use select::{ + document::Document, + predicate::{Attr, Name, Predicate}, +}; use shared::*; use std::{error::Error, fs}; use tauri::State; @@ -51,6 +55,38 @@ async fn get_def_command(lemma: &str, cmd: &str) -> Result String { + let mut c = language.chars(); + match c.next() { + None => String::new(), + Some(f) => f.to_uppercase().collect::() + c.as_str().to_lowercase().as_str(), + } +} + +async fn get_def_wiktionary(lemma: &str, language: &str) -> Result> { + let language = to_title(language); + let text = reqwest::get(format!("https://wiktionary.org/wiki/{lemma}")) + .await? + .text() + .await?; + let doc = Document::from_read(text.as_bytes())?; + + let mut def = String::new(); + for node in doc.find(Name("h2").descendant(Attr("id", language.as_str()))) { + let mut node = node.parent().unwrap(); + while let Some(cur_node) = node.next() { + if cur_node.name() == Some("h2") { + break; + } + if cur_node.as_comment().is_none() && cur_node.attr("class") != Some("mw-editsection") { + def.push_str(&cur_node.html()); + } + node = cur_node; + } + } + Ok(format!("
'{def}
")) +} + #[tauri::command] pub async fn get_defs( state: State<'_, SakinyjeState>, @@ -75,6 +111,7 @@ async fn get_def(dict: &Dictionary, lemma: &str) -> Result get_def_from_file(lemma, f, dict_type), Dictionary::Url(url) => get_def_url(lemma, url).await, Dictionary::Command(cmd) => get_def_command(lemma, cmd).await, + Dictionary::Wiktionary(lang) => get_def_wiktionary(lemma, lang).await, } } diff --git a/src-ui/src/settings/dictionary_settings.rs b/src-ui/src/settings/dictionary_settings.rs index 40084d8..989ff02 100644 --- a/src-ui/src/settings/dictionary_settings.rs +++ b/src-ui/src/settings/dictionary_settings.rs @@ -74,6 +74,11 @@ fn DictionaryRepresentation( wdict(Dictionary::Command(String::new())); } } + "wiktionary" => { + if !matches!(rdict(), Dictionary::Wiktionary(_)) { + wdict(Dictionary::Wiktionary(String::new())); + } + } _ => unreachable!(), }; view! { @@ -135,6 +140,28 @@ fn DictionaryRepresentation( } .into_view() } + Dictionary::Wiktionary(url) => { + let (read_sig, write_sig) = create_signal(url); + view! { +
+ + +
+ } + .into_view() + } Dictionary::File(filename, dict_type) => { view! { Date: Tue, 14 Nov 2023 20:52:39 -0500 Subject: [PATCH 2/4] fix: not being able to select wiktionary in settings --- src-ui/src/settings/dictionary_settings.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src-ui/src/settings/dictionary_settings.rs b/src-ui/src/settings/dictionary_settings.rs index 989ff02..4ad40c1 100644 --- a/src-ui/src/settings/dictionary_settings.rs +++ b/src-ui/src/settings/dictionary_settings.rs @@ -93,6 +93,9 @@ fn DictionaryRepresentation( + {move || match rdict() { From 4d6de1c4a9d7a20a5665092d69d5bed32615cf51 Mon Sep 17 00:00:00 2001 From: BrewingWeasel Date: Tue, 14 Nov 2023 20:54:01 -0500 Subject: [PATCH 3/4] fix: showing edit labels for wiktionary --- src-tauri/src/dictionary.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src-tauri/src/dictionary.rs b/src-tauri/src/dictionary.rs index 6f147ce..b9b99e2 100644 --- a/src-tauri/src/dictionary.rs +++ b/src-tauri/src/dictionary.rs @@ -1,6 +1,6 @@ use select::{ document::Document, - predicate::{Attr, Name, Predicate}, + predicate::{Attr, Class, Name, Predicate}, }; use shared::*; use std::{error::Error, fs}; @@ -78,7 +78,13 @@ async fn get_def_wiktionary(lemma: &str, language: &str) -> Result Date: Wed, 15 Nov 2023 19:26:42 -0500 Subject: [PATCH 4/4] feat: allow hiding morphology info --- shared/src/lib.rs | 2 +- src-tauri/src/dictionary.rs | 14 ++++++- src-ui/src/settings/dictionary_settings.rs | 45 ++++++++++++++++++---- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/shared/src/lib.rs b/shared/src/lib.rs index 8e253f8..f0a7b90 100644 --- a/shared/src/lib.rs +++ b/shared/src/lib.rs @@ -23,7 +23,7 @@ pub enum Dictionary { File(String, DictFileType), Url(String), Command(String), - Wiktionary(String), + Wiktionary(String, bool), } #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] diff --git a/src-tauri/src/dictionary.rs b/src-tauri/src/dictionary.rs index b9b99e2..630b0cf 100644 --- a/src-tauri/src/dictionary.rs +++ b/src-tauri/src/dictionary.rs @@ -63,7 +63,11 @@ fn to_title(language: &str) -> String { } } -async fn get_def_wiktionary(lemma: &str, language: &str) -> Result> { +async fn get_def_wiktionary( + lemma: &str, + language: &str, + ignore_morph: bool, +) -> Result> { let language = to_title(language); let text = reqwest::get(format!("https://wiktionary.org/wiki/{lemma}")) .await? @@ -85,6 +89,10 @@ async fn get_def_wiktionary(lemma: &str, language: &str) -> Result Result get_def_from_file(lemma, f, dict_type), Dictionary::Url(url) => get_def_url(lemma, url).await, Dictionary::Command(cmd) => get_def_command(lemma, cmd).await, - Dictionary::Wiktionary(lang) => get_def_wiktionary(lemma, lang).await, + Dictionary::Wiktionary(lang, ignore_morph) => { + get_def_wiktionary(lemma, lang, *ignore_morph).await + } } } diff --git a/src-ui/src/settings/dictionary_settings.rs b/src-ui/src/settings/dictionary_settings.rs index 4ad40c1..b45a307 100644 --- a/src-ui/src/settings/dictionary_settings.rs +++ b/src-ui/src/settings/dictionary_settings.rs @@ -75,8 +75,8 @@ fn DictionaryRepresentation( } } "wiktionary" => { - if !matches!(rdict(), Dictionary::Wiktionary(_)) { - wdict(Dictionary::Wiktionary(String::new())); + if !matches!(rdict(), Dictionary::Wiktionary(_, _)) { + wdict(Dictionary::Wiktionary(String::new(), false)); } } _ => unreachable!(), @@ -93,7 +93,7 @@ fn DictionaryRepresentation( - @@ -143,8 +143,8 @@ fn DictionaryRepresentation( } .into_view() } - Dictionary::Wiktionary(url) => { - let (read_sig, write_sig) = create_signal(url); + Dictionary::Wiktionary(url, _) => { + let (read_url, write_url) = create_signal(url); view! {
@@ -152,14 +152,19 @@ fn DictionaryRepresentation( id="wiktionary" type="text" on:input=move |ev| { - write_sig(event_target_value(&ev)); + write_url(event_target_value(&ev)); } on:change=move |_| { - wdict.update(|v| { *v = Dictionary::Wiktionary(read_sig()) }) + wdict + .update(|v| { + if let Dictionary::Wiktionary(_, hide_morph) = v { + *v = Dictionary::Wiktionary(read_url(), *hide_morph); + } + }) } - prop:value=read_sig + prop:value=read_url />
} @@ -178,6 +183,30 @@ fn DictionaryRepresentation( } } +//
+// +// +//
+ #[component] fn file_dictionary_representation( filename: String,