diff --git a/src/bi_webgraph.rs b/src/bi_webgraph.rs index b1b3983..86c8a52 100644 --- a/src/bi_webgraph.rs +++ b/src/bi_webgraph.rs @@ -146,7 +146,7 @@ impl WeightedBipartiteGraph for BiWebgraph { #[inline(always)] /// Returns the number of source nodes. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, @@ -171,7 +171,7 @@ impl WeightedBipartiteGraph for BiWebgraph { #[inline(always)] /// Returns the number of destination nodes. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, @@ -196,7 +196,7 @@ impl WeightedBipartiteGraph for BiWebgraph { #[inline(always)] /// Returns the number of edges. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, @@ -224,7 +224,7 @@ impl WeightedBipartiteGraph for BiWebgraph { /// # Arguments /// * `src_id`: A `usize` which is the source node identifier. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, @@ -257,7 +257,7 @@ impl WeightedBipartiteGraph for BiWebgraph { /// # Arguments /// * `dst_id`: A `usize` which is the destination node identifier. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, @@ -292,7 +292,7 @@ impl WeightedBipartiteGraph for BiWebgraph { /// # Arguments /// * `dst_id`: A `usize` which is the destination node identifier. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, @@ -330,7 +330,7 @@ impl WeightedBipartiteGraph for BiWebgraph { /// # Arguments /// * `src_id`: A `usize` which is the source node identifier. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, @@ -371,7 +371,7 @@ impl WeightedBipartiteGraph for BiWebgraph { /// # Arguments /// * `dst_id`: A `usize` which is the destination node identifier. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, @@ -407,7 +407,7 @@ impl WeightedBipartiteGraph for BiWebgraph { #[inline(always)] /// Returns the weights of the edges. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, @@ -442,7 +442,7 @@ impl WeightedBipartiteGraph for BiWebgraph { #[inline(always)] /// Returns the degrees of the nodes. /// - /// # Example + /// # Examples /// In this example, we create the trigram corpus associated /// to the ANIMALS dataset which we provide within this crate, /// and then we convert it to webgraph format. Secondarily, diff --git a/src/corpus.rs b/src/corpus.rs index c7b8c23..504977b 100644 --- a/src/corpus.rs +++ b/src/corpus.rs @@ -97,7 +97,7 @@ where /// Returns a reference to underlying graph. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -127,7 +127,7 @@ where #[inline(always)] /// Returns the number of keys in the corpus. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -145,7 +145,7 @@ where #[inline(always)] /// Returns the number of ngrams in the corpus. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -166,16 +166,16 @@ where /// # Arguments /// * `key_id` - The id of the key to get. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; /// /// let animals: Corpus<_, TriGram> = Corpus::from(ANIMALS); /// - /// assert_eq!(animals.key_from_id(0), "Aardvark"); - /// assert_eq!(animals.key_from_id(1), "Abyssinian"); - /// assert_eq!(animals.key_from_id(20), "Alligator"); + /// assert_eq!(animals.key_from_id(0), &"Aardvark"); + /// assert_eq!(animals.key_from_id(1), &"Abyssinian"); + /// assert_eq!(animals.key_from_id(20), &"Alligator"); /// ``` pub fn key_from_id( &self, @@ -190,7 +190,7 @@ where /// # Arguments /// * `ngram_id` - The id of the ngram to get. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -218,7 +218,7 @@ where /// # Arguments /// * `ngram` - The ngram to get the id from. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -245,7 +245,7 @@ where /// # Arguments /// * `key_id` - The id of the key to get the number of ngrams from. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -266,7 +266,7 @@ where /// # Arguments /// * `ngram_id` - The id of the ngram to get the number of keys from. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -287,7 +287,7 @@ where /// # Arguments /// * `ngram_id` - The id of the ngram to get the key ids from. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -308,7 +308,7 @@ where /// # Arguments /// * `key_id` - The id of the key to get the ngram ids from. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -329,7 +329,7 @@ where /// # Arguments /// * `key_id` - The id of the key to get the ngram co-occurrences from. /// - /// # Example + /// # Examples /// We check that all values are greater than 0. /// /// ```rust @@ -355,7 +355,7 @@ where #[inline(always)] /// Returns all co-occurrences. /// - /// # Example + /// # Examples /// We check that all values are greater than 0. /// /// ```rust @@ -376,7 +376,7 @@ where /// # Arguments /// * `key_id` - The id of the key to get the ngrams and their co-occurrences from. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -401,7 +401,7 @@ where /// # Arguments /// * `key_id` - The id of the key to get the ngrams and their co-occurrences from. /// - /// # Example + /// # Examples /// We check that all of the ngrams returned appear in the corpus and /// that all of the co-occurrences are greater than 0. /// @@ -442,7 +442,7 @@ where /// # Arguments /// * `key_id` - The id of the key to get the ngrams from. /// - /// # Example + /// # Examples /// We check that all of the ngrams returned appear in the corpus. /// /// ```rust @@ -480,7 +480,7 @@ where /// # Returns /// An iterator over the keys associated to the ngram. /// - /// # Example + /// # Examples /// We check that the keys returned by the keys_from_ngram_id method are the /// exactly same keys returned keys_from_ngram method. /// @@ -515,7 +515,7 @@ where /// # Arguments /// * `ngram` - The ngram to get the number of keys from. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -568,7 +568,7 @@ where /// # Returns /// An iterator over the keys associated to the ngram. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -606,7 +606,7 @@ where /// # Implementative details /// This function is implemented using a Binary Heap. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; diff --git a/src/corpus_par_from.rs b/src/corpus_par_from.rs index 7856330..5d8a381 100644 --- a/src/corpus_par_from.rs +++ b/src/corpus_par_from.rs @@ -19,7 +19,7 @@ where /// # Arguments /// * `keys` - The keys to create the corpus from. /// - /// # Example + /// # Examples /// In the following example, we create a corpus from the set of keys /// defined by the `ANIMALS` constant array. We provide several synonims /// for arrays, such as MonoGrams, BiGrams, TriGrams, and so on. This is @@ -64,13 +64,13 @@ where /// /// let animals = vec!["cat", "dog", "bird", "fish", "lion"]; /// - /// let bigram_corpus: Corpus<&[&str], BiGram> = Corpus::par_from(&animals); - /// let trigram_corpus: Corpus<&[&str], TriGram> = Corpus::par_from(&animals); - /// let tetragram_corpus: Corpus<&[&str], TetraGram> = Corpus::par_from(&animals); - /// let pentagram_corpus: Corpus<&[&str], PentaGram> = Corpus::par_from(&animals); - /// let hexagram_corpus: Corpus<&[&str], HexaGram> = Corpus::par_from(&animals); - /// let heptagram_corpus: Corpus<&[&str], HeptaGram> = Corpus::par_from(&animals); - /// let octagram_corpus: Corpus<&[&str], OctaGram> = Corpus::par_from(&animals); + /// let bigram_corpus: Corpus, BiGram> = Corpus::par_from(animals.clone()); + /// let trigram_corpus: Corpus, TriGram> = Corpus::par_from(animals.clone()); + /// let tetragram_corpus: Corpus, TetraGram> = Corpus::par_from(animals.clone()); + /// let pentagram_corpus: Corpus, PentaGram> = Corpus::par_from(animals.clone()); + /// let hexagram_corpus: Corpus, HexaGram> = Corpus::par_from(animals.clone()); + /// let heptagram_corpus: Corpus, HeptaGram> = Corpus::par_from(animals.clone()); + /// let octagram_corpus: Corpus, OctaGram> = Corpus::par_from(animals.clone()); /// ``` /// /// And references of arrays: @@ -80,13 +80,13 @@ where /// /// let animals = ["cat", "dog", "bird", "fish", "lion"]; /// - /// let bigram_corpus: Corpus<&[&str; 5], BiGram> = Corpus::par_from(&animals); - /// let trigram_corpus: Corpus<&[&str; 5], TriGram> = Corpus::par_from(&animals); - /// let tetragram_corpus: Corpus<&[&str; 5], TetraGram> = Corpus::par_from(&animals); - /// let pentagram_corpus: Corpus<&[&str; 5], PentaGram> = Corpus::par_from(&animals); - /// let hexagram_corpus: Corpus<&[&str; 5], HexaGram> = Corpus::par_from(&animals); - /// let heptagram_corpus: Corpus<&[&str; 5], HeptaGram> = Corpus::par_from(&animals); - /// let octagram_corpus: Corpus<&[&str; 5], OctaGram> = Corpus::par_from(&animals); + /// let bigram_corpus: Corpus<[&str; 5], BiGram> = Corpus::par_from(animals); + /// let trigram_corpus: Corpus<[&str; 5], TriGram> = Corpus::par_from(animals); + /// let tetragram_corpus: Corpus<[&str; 5], TetraGram> = Corpus::par_from(animals); + /// let pentagram_corpus: Corpus<[&str; 5], PentaGram> = Corpus::par_from(animals); + /// let hexagram_corpus: Corpus<[&str; 5], HexaGram> = Corpus::par_from(animals); + /// let heptagram_corpus: Corpus<[&str; 5], HeptaGram> = Corpus::par_from(animals); + /// let octagram_corpus: Corpus<[&str; 5], OctaGram> = Corpus::par_from(animals); /// ``` /// /// In all of these examples, we have used char-based grams. We can also use u8-based grams: @@ -96,13 +96,13 @@ where /// /// let animals = vec!["cat", "dog", "bird", "fish", "lion"]; /// - /// let bigram_corpus: Corpus<&[&str], BiGram> = Corpus::par_from(&animals); - /// let trigram_corpus: Corpus<&[&str], TriGram> = Corpus::par_from(&animals); - /// let tetragram_corpus: Corpus<&[&str], TetraGram> = Corpus::par_from(&animals); - /// let pentagram_corpus: Corpus<&[&str], PentaGram> = Corpus::par_from(&animals); - /// let hexagram_corpus: Corpus<&[&str], HexaGram> = Corpus::par_from(&animals); - /// let heptagram_corpus: Corpus<&[&str], HeptaGram> = Corpus::par_from(&animals); - /// let octagram_corpus: Corpus<&[&str], OctaGram> = Corpus::par_from(&animals); + /// let bigram_corpus: Corpus, BiGram> = Corpus::par_from(animals.clone()); + /// let trigram_corpus: Corpus, TriGram> = Corpus::par_from(animals.clone()); + /// let tetragram_corpus: Corpus, TetraGram> = Corpus::par_from(animals.clone()); + /// let pentagram_corpus: Corpus, PentaGram> = Corpus::par_from(animals.clone()); + /// let hexagram_corpus: Corpus, HexaGram> = Corpus::par_from(animals.clone()); + /// let heptagram_corpus: Corpus, HeptaGram> = Corpus::par_from(animals.clone()); + /// let octagram_corpus: Corpus, OctaGram> = Corpus::par_from(animals.clone()); /// ``` /// /// It is also pretty easy to define normalizations for the keys. For instance, you can @@ -113,18 +113,18 @@ where /// /// let animals = vec!["cat", "dog", "bIrd", "Fish", "Lion"]; /// - /// let bigram_corpus: Corpus<&[&str], BiGram, Lowercase> = Corpus::par_from(&animals); - /// let trigram_corpus: Corpus<&[&str], TriGram, Lowercase> = Corpus::par_from(&animals); - /// let tetragram_corpus: Corpus<&[&str], TetraGram, Lowercase> = - /// Corpus::par_from(&animals); - /// let pentagram_corpus: Corpus<&[&str], PentaGram, Lowercase> = - /// Corpus::par_from(&animals); - /// let hexagram_corpus: Corpus<&[&str], HexaGram, Lowercase> = - /// Corpus::par_from(&animals); - /// let heptagram_corpus: Corpus<&[&str], HeptaGram, Lowercase> = - /// Corpus::par_from(&animals); - /// let octagram_corpus: Corpus<&[&str], OctaGram, Lowercase> = - /// Corpus::par_from(&animals); + /// let bigram_corpus: Corpus, BiGram, Lowercase> = Corpus::par_from(animals.clone()); + /// let trigram_corpus: Corpus, TriGram, Lowercase> = Corpus::par_from(animals.clone()); + /// let tetragram_corpus: Corpus, TetraGram, Lowercase> = + /// Corpus::par_from(animals.clone()); + /// let pentagram_corpus: Corpus, PentaGram, Lowercase> = + /// Corpus::par_from(animals.clone()); + /// let hexagram_corpus: Corpus, HexaGram, Lowercase> = + /// Corpus::par_from(animals.clone()); + /// let heptagram_corpus: Corpus, HeptaGram, Lowercase> = + /// Corpus::par_from(animals.clone()); + /// let octagram_corpus: Corpus, OctaGram, Lowercase> = + /// Corpus::par_from(animals.clone()); /// ``` pub fn par_from(keys: KS) -> Self { // We start by parsing the keys to extract the ngrams, the cooccurrences, the key offsets, diff --git a/src/lib.rs b/src/lib.rs index 0684ccf..a231265 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,4 +42,5 @@ pub mod prelude { pub use crate::bi_webgraph::*; pub use crate::ngram_search::*; pub use crate::tfidf::*; + pub use crate::search::*; } diff --git a/src/ngram_search.rs b/src/ngram_search.rs index af4b5f3..8cca91b 100644 --- a/src/ngram_search.rs +++ b/src/ngram_search.rs @@ -1,9 +1,6 @@ //! Submodule providing the trigram search implementation. -use crate::{ - prelude::*, - search::{MaxNgramDegree, QueryHashmap, SearchConfig}, -}; +use crate::prelude::*; #[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] /// Struct providing an ngram search configuration. @@ -83,6 +80,22 @@ impl NgramSearchConfig { self } + #[inline(always)] + /// Returns the maximum degree of the ngrams to consider in the search. + /// + /// # Examples + /// + /// ```rust + /// use ngrammatic::prelude::*; + /// + /// let config: NgramSearchConfig = NgramSearchConfig::default(); + /// + /// assert_eq!(config.max_ngram_degree(), MaxNgramDegree::Default); + /// ``` + pub fn max_ngram_degree(&self) -> MaxNgramDegree { + self.search_config.max_ngram_degree() + } + #[inline(always)] /// Set the warp factor to use in the trigram similarity calculation. /// @@ -123,7 +136,7 @@ where /// * `limit` - The maximum number of results to return. /// * `max_counts` - Excludes ngrams with counts above this value. By default, equal to the maximum between 1/10 of the number of keys and 100. /// - /// # Example + /// # Examples /// We can use the ANIMALS dataset shipped with the library to search for similar keys. /// We use as unit of the ngram a `char`, and we search for trigrams similar to the key "cat". /// Using a `char` is an `u32`, so four times more expensive than using a `u8` or a `ASCIIChar`, @@ -136,10 +149,10 @@ where /// /// let corpus: Corpus<&[&str; 699], BiGram> = Corpus::from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.ngram_search("Cat", NgramSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` /// /// Now let's proceed with an example to highlight the importance of normalizing the input. @@ -153,7 +166,7 @@ where /// /// let corpus: Corpus<&[&str; 699], BiGram> = Corpus::from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.ngram_search("catt", NgramSearchConfig::default()); /// /// assert!(results.is_empty()); @@ -169,10 +182,10 @@ where /// /// let corpus: Corpus<&[&str; 699], BiGram, Lowercase> = Corpus::from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.ngram_search("catt", NgramSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` /// /// In the next example we will see how to use `ASCIIChar` as ngram unit. When @@ -184,10 +197,10 @@ where /// /// let corpus: Corpus<&[&str; 699], BiGram> = Corpus::from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.ngram_search("Cat", NgramSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` /// /// In the next example we will see how to use `u8` as ngram unit. The key difference between @@ -200,10 +213,10 @@ where /// /// let corpus: Corpus<&[&str; 699], BiGram> = Corpus::from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.ngram_search("Cat", NgramSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` pub fn ngram_search( &self, @@ -224,7 +237,7 @@ where /// * `key` - The key to search for in the corpus /// * `config` - The configuration for the search. /// - /// # Example + /// # Examples /// In this example we use the ANIMALS dataset shipped with the library to search for similar keys, /// using the version of the search with a custom warp factor. /// @@ -235,9 +248,9 @@ where /// /// let config = NgramSearchConfig::default().set_warp(2.5).unwrap(); /// - /// let results: Vec> = corpus.ngram_search_with_warp("Cat", config); + /// let results: Vec> = corpus.ngram_search_with_warp("Cat", config); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` pub fn ngram_search_with_warp( &self, @@ -278,7 +291,7 @@ where /// * `key` - The key to search for in the corpus /// * `config` - The configuration for the search. /// - /// # Example + /// # Examples /// This is the concurrent version of the `ngram_search` method. /// Please look at the documentation of the `ngram_search` method for the extended /// documentation. @@ -288,10 +301,10 @@ where /// /// let corpus: Corpus<&[&str; 699], BiGram> = Corpus::par_from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.ngram_par_search("Cat", NgramSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` pub fn ngram_par_search( &self, @@ -312,7 +325,7 @@ where /// * `key` - The key to search for in the corpus /// * `config` - The configuration for the search. /// - /// # Example + /// # Examples /// This is the concurrent version of the `ngram_search_with_warp` method. /// Please look at the documentation of the `ngram_search_with_warp` method for the extended /// documentation. @@ -324,9 +337,9 @@ where /// /// let config = NgramSearchConfig::default().set_warp(2.5).unwrap(); /// - /// let results: Vec> = corpus.ngram_par_search_with_warp("Cat", config); + /// let results: Vec> = corpus.ngram_par_search_with_warp("Cat", config); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` pub fn ngram_par_search_with_warp( &self, diff --git a/src/par_search.rs b/src/par_search.rs index 800067e..836de52 100644 --- a/src/par_search.rs +++ b/src/par_search.rs @@ -42,7 +42,7 @@ where let key: &K = key.as_ref(); let query_hashmap = self.ngram_ids_from_ngram_counts(key.counts()); let query_hashmap_ref = &query_hashmap; - let max_ngram_degree = config.max_ngram_degree(self.number_of_keys()); + let max_ngram_degree = config.compute_max_ngram_degree(self.number_of_keys()); // We identify all of the ngrams to be considered in the search, which // are the set of ngrams that contain any of the grams in the ngram diff --git a/src/report.rs b/src/report.rs index b38c7a5..5a86b58 100644 --- a/src/report.rs +++ b/src/report.rs @@ -47,7 +47,7 @@ where { /// Returns a report of the corpus. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; diff --git a/src/search.rs b/src/search.rs index d09130f..47f253b 100644 --- a/src/search.rs +++ b/src/search.rs @@ -182,10 +182,16 @@ impl SearchConfig { /// /// # Arguments /// * `number_of_keys` - The number of keys in the corpus. - pub(crate) fn max_ngram_degree(&self, number_of_keys: usize) -> usize { + pub(crate) fn compute_max_ngram_degree(&self, number_of_keys: usize) -> usize { self.max_ngram_degree.max_ngram_degree(number_of_keys) } + #[inline(always)] + /// Returns the max ngram degree. + pub fn max_ngram_degree(&self) -> MaxNgramDegree { + self.max_ngram_degree + } + #[inline(always)] /// Returns the minimum similarity value for a result to be included in the output. pub fn minimum_similarity_score(&self) -> F { @@ -314,7 +320,7 @@ where let query_hashmap_ref = &query_hashmap; let mut heap = SearchResultsHeap::new(config.maximum_number_of_results()); - let max_ngram_degree = config.max_ngram_degree(self.number_of_keys()); + let max_ngram_degree = config.compute_max_ngram_degree(self.number_of_keys()); // We identify all of the ngrams to be considered in the search, which // are the set of ngrams that contain any of the grams in the ngram diff --git a/src/tfidf.rs b/src/tfidf.rs index dc95e78..b413359 100644 --- a/src/tfidf.rs +++ b/src/tfidf.rs @@ -1,8 +1,5 @@ //! Submodule providing a term frequency-inverse document frequency (TF-IDF) implementation. -use crate::{ - prelude::*, - search::{MaxNgramDegree, QueryHashmap, SearchConfig}, -}; +use crate::prelude::*; use std::cmp::Ordering; #[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] @@ -39,12 +36,34 @@ impl Default for TFIDFSearchConfig { impl TFIDFSearchConfig { #[inline(always)] /// Returns the minimum similarity value for a result to be included in the output. + /// + /// # Examples + /// + /// ```rust + /// use ngrammatic::prelude::*; + /// + /// let config = TFIDFSearchConfig::default(); + /// let minimum_similarity_score: f32 = config.minimum_similarity_score(); + /// + /// assert_eq!(minimum_similarity_score, 0.7_f32); + /// ``` pub fn minimum_similarity_score(&self) -> F { self.search_config.minimum_similarity_score() } #[inline(always)] /// Returns the maximum number of results to return. + /// + /// # Examples + /// + /// ```rust + /// use ngrammatic::prelude::*; + /// + /// let config: TFIDFSearchConfig = TFIDFSearchConfig::default(); + /// let maximum_number_of_results = config.maximum_number_of_results(); + /// + /// assert_eq!(maximum_number_of_results, 10); + /// ``` pub fn maximum_number_of_results(&self) -> usize { self.search_config.maximum_number_of_results() } @@ -54,6 +73,25 @@ impl TFIDFSearchConfig { /// /// # Arguments /// * `minimum_similarity_score` - The minimum similarity value for a result to be included in the output. + /// + /// # Raises + /// * If the minimum similarity score is not a valid float or is not in the range 0.0 to 1.0. + /// + /// # Examples + /// + /// ```rust + /// use ngrammatic::prelude::*; + /// + /// let config = TFIDFSearchConfig::default(); + /// assert_eq!(config.minimum_similarity_score(), 0.7_f32); + /// assert_eq!( + /// config.set_minimum_similarity_score(f32::NAN), + /// Err("The minimum similarity score must not be NaN") + /// ); + /// let config = config.set_minimum_similarity_score(0.5_f32).unwrap(); + /// + /// assert_eq!(config.minimum_similarity_score(), 0.5_f32); + /// ``` pub fn set_minimum_similarity_score( mut self, minimum_similarity_score: F, @@ -69,6 +107,18 @@ impl TFIDFSearchConfig { /// /// # Arguments /// * `maximum_number_of_results` - The maximum number of results to return. + /// + /// # Examples + /// + /// ```rust + /// use ngrammatic::prelude::*; + /// + /// let config: TFIDFSearchConfig = TFIDFSearchConfig::default(); + /// assert_eq!(config.maximum_number_of_results(), 10); + /// let config = config.set_maximum_number_of_results(5); + /// + /// assert_eq!(config.maximum_number_of_results(), 5); + /// ``` pub fn set_maximum_number_of_results(mut self, maximum_number_of_results: usize) -> Self { self.search_config = self .search_config @@ -81,11 +131,38 @@ impl TFIDFSearchConfig { /// /// # Arguments /// * `max_ngram_degree` - The maximum degree of the ngrams to consider in the search. + /// + /// # Examples + /// + /// ```rust + /// use ngrammatic::prelude::*; + /// + /// let config: TFIDFSearchConfig = TFIDFSearchConfig::default(); + /// assert_eq!(config.max_ngram_degree(), MaxNgramDegree::Default); + /// let config = config.set_max_ngram_degree(MaxNgramDegree::None); + /// + /// assert_eq!(config.max_ngram_degree(), MaxNgramDegree::None); + /// ``` pub fn set_max_ngram_degree(mut self, max_ngram_degree: MaxNgramDegree) -> Self { self.search_config = self.search_config.set_max_ngram_degree(max_ngram_degree); self } + #[inline(always)] + /// Returns the maximum degree of the ngrams to consider in the search. + /// + /// # Examples + /// + /// ```rust + /// use ngrammatic::prelude::*; + /// + /// let config: TFIDFSearchConfig = TFIDFSearchConfig::default(); + /// assert_eq!(config.max_ngram_degree(), MaxNgramDegree::Default); + /// ``` + pub fn max_ngram_degree(&self) -> MaxNgramDegree { + self.search_config.max_ngram_degree() + } + #[inline(always)] /// Set the K1 constant. /// @@ -188,7 +265,7 @@ where #[inline(always)] /// Returns the average document length of the corpus. /// - /// # Example + /// # Examples /// ```rust /// use ngrammatic::prelude::*; /// @@ -264,7 +341,7 @@ where /// * `key` - The key to search for in the corpus. /// * `config` - The TF-IDF search configuration. /// - /// # Example + /// # Examples /// We can use the ANIMALS dataset shipped with the library to search for similar keys using /// the TF-IDF similarity metric. /// We use as unit of the ngram a `char`, and we search for trigrams similar to the key "cat". @@ -279,15 +356,15 @@ where /// /// let corpus: Corpus<&[&str; 699], BiGram> = Corpus::from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.tf_idf_search("Cat", TFIDFSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.tf_idf_search("Catt", TFIDFSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` pub fn tf_idf_search( &self, @@ -316,22 +393,22 @@ where /// * `key` - The key to search for in the corpus. /// * `config` - The TF-IDF search configuration. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; /// /// let corpus: Corpus<&[&str; 699], BiGram> = Corpus::par_from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.warped_tf_idf_search("Cat", TFIDFSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.warped_tf_idf_search("Catt", TFIDFSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` pub fn warped_tf_idf_search( &self, @@ -378,7 +455,7 @@ where /// * `key` - The key to search for in the corpus. /// * `config` - The TF-IDF search configuration. /// - /// # Example + /// # Examples /// This is the concurrent version of the example in the `tf_idf_search` method. /// If you need a more detailed version of the example, please refer to the documentation of the /// sequential `tf_idf_search` method. @@ -388,15 +465,15 @@ where /// /// let corpus: Corpus<&[&str; 699], BiGram> = Corpus::from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.tf_idf_par_search("Cat", TFIDFSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.tf_idf_par_search("Catt", TFIDFSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` pub fn tf_idf_par_search( &self, @@ -429,15 +506,15 @@ where /// /// let corpus: Corpus<&[&str; 699], BiGram> = Corpus::par_from(&ANIMALS); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.warped_tf_idf_par_search("Cat", TFIDFSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// - /// let results: Vec> = + /// let results: Vec> = /// corpus.warped_tf_idf_par_search("Catt", TFIDFSearchConfig::default()); /// - /// assert_eq!(results[0].key(), "Cat"); + /// assert_eq!(results[0].key(), &"Cat"); /// ``` pub fn warped_tf_idf_par_search( &self, diff --git a/src/traits/ascii_char.rs b/src/traits/ascii_char.rs index b2362bd..9255cdb 100644 --- a/src/traits/ascii_char.rs +++ b/src/traits/ascii_char.rs @@ -77,7 +77,7 @@ impl ASCIIChar { #[inline(always)] /// Returns the lowercase version of the character. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -95,7 +95,7 @@ impl ASCIIChar { #[inline(always)] /// Returns the uppercase version of the character. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -113,7 +113,7 @@ impl ASCIIChar { #[inline(always)] /// Returns whether the current character is a space-like. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -130,7 +130,7 @@ impl ASCIIChar { #[inline(always)] /// Returns whether the current character is alphanumeric. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; @@ -214,7 +214,7 @@ where pub trait ToASCIICharIterator: IntoIterator { /// Converts the iterator to an `ASCIICharIterator`. /// - /// # Example + /// # Examples /// /// ```rust /// use ngrammatic::prelude::*; diff --git a/src/traits/char_normalizer.rs b/src/traits/char_normalizer.rs index 5968428..d3eb21c 100644 --- a/src/traits/char_normalizer.rs +++ b/src/traits/char_normalizer.rs @@ -620,7 +620,7 @@ where #[inline(always)] /// Trims spaces from the left of the iterator. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to trim spaces from the left of a string /// composed of `char`: @@ -662,7 +662,7 @@ where #[inline(always)] /// Trims spaces from the right of the iterator. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to trim spaces from the right of a string /// composed of `char`: @@ -707,7 +707,7 @@ where #[inline(always)] /// Trims spaces from both sides of the iterator. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to trim spaces from both sides of a string /// composed of `char`: @@ -752,7 +752,7 @@ where #[inline(always)] /// Trims null characters from the left of the iterator. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to trim null characters from the left of a string /// composed of `char`: @@ -794,7 +794,7 @@ where #[inline(always)] /// Trims null characters from the right of the iterator. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to trim null characters from the right of a string /// composed of `char`: @@ -839,7 +839,7 @@ where #[inline(always)] /// Trims null characters from both sides of the iterator. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to trim null characters from both sides of a string /// composed of `char`: @@ -884,7 +884,7 @@ where #[inline(always)] /// Converts all characters to lowercase. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to convert all characters to lowercase /// of a string composed of `char`: @@ -927,7 +927,7 @@ where #[inline(always)] /// Converts all non-alphanumerical characters to spaces. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to convert all non-alphanumerical characters to spaces /// of a string composed of `char`: @@ -972,7 +972,7 @@ where #[inline(always)] /// Normalizes spaces, removing subsequent spaces. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to normalize spaces, removing subsequent spaces /// of a string composed of `char`: diff --git a/src/traits/iter_ngrams.rs b/src/traits/iter_ngrams.rs index 0b12c5b..4a87ad1 100644 --- a/src/traits/iter_ngrams.rs +++ b/src/traits/iter_ngrams.rs @@ -68,7 +68,7 @@ where #[inline(always)] /// Converts an iterator of grams to an iterator of n-grams. /// - /// # Example + /// # Examples /// /// An example for when using an iterator of `u8` bigrams: /// ```rust diff --git a/src/traits/key.rs b/src/traits/key.rs index 773e4c3..21773f6 100644 --- a/src/traits/key.rs +++ b/src/traits/key.rs @@ -22,7 +22,7 @@ pub trait Key, G: Gram>: AsRef<>::Ref> { /// Returns an iterator over the grams of the key. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to get the grams of a key /// represented by a string, composed of `u8`: @@ -66,7 +66,7 @@ pub trait Key, G: Gram>: AsRef<>::Ref> { /// Returns the counts of the ngrams. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to get the counts of the ngrams /// of a key represented by a string, composed of `u8`: diff --git a/src/traits/keys.rs b/src/traits/keys.rs index 2eba6e2..0a2e51d 100644 --- a/src/traits/keys.rs +++ b/src/traits/keys.rs @@ -95,7 +95,7 @@ impl> Keys for [K] { impl Keys for &R where - R: Keys, + R: Keys + ?Sized, { type K = R::K; type KeyRef<'a> = R::KeyRef<'a> where Self: 'a; @@ -131,6 +131,6 @@ where } fn iter(&self) -> Self::IterKeys<'_> { - self.into_iter_from(0) + self.iter_from(0) } } diff --git a/src/traits/numerical.rs b/src/traits/numerical.rs index eb0692a..671fdae 100644 --- a/src/traits/numerical.rs +++ b/src/traits/numerical.rs @@ -28,7 +28,7 @@ pub trait Three { pub trait BetweenOneAndThree: PartialOrd + One + Three + Sized { /// Check if the value is between one and three. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to check if a value is between one and three: /// ```rust diff --git a/src/traits/padder.rs b/src/traits/padder.rs index 48f773e..4338b8e 100644 --- a/src/traits/padder.rs +++ b/src/traits/padder.rs @@ -20,7 +20,7 @@ where { /// Adds padding to the left (beginning) of the iterator. /// - /// # Example + /// # Examples /// /// An example for when using an iterator of `u8`: /// ```rust @@ -68,7 +68,7 @@ where /// Adds padding to the right (end) of the iterator. /// - /// # Example + /// # Examples /// /// An example for when using an iterator of `u8`: /// ```rust @@ -116,7 +116,7 @@ where /// Adds padding to both sides of the iterator. /// - /// # Example + /// # Examples /// An example for when using an iterator of `u8`: /// ```rust /// use ngrammatic::prelude::*; diff --git a/src/traits/underscored.rs b/src/traits/underscored.rs index 8f61b77..3d81f9c 100644 --- a/src/traits/underscored.rs +++ b/src/traits/underscored.rs @@ -6,7 +6,7 @@ pub trait Underscored { /// Returns the integer formatted with underscores. /// - /// # Example + /// # Examples /// /// The following example demonstrates how to format an integer with underscores: /// ```rust