From 09d184fbf84641da2b945da0e54aff85ec08c2ae Mon Sep 17 00:00:00 2001 From: Nik Putilin Date: Thu, 23 May 2024 19:29:31 +0200 Subject: [PATCH 1/2] Basic version --- src/main.rs | 66 ++++++++++++++++++++++++++++++++++++++-- src/testing_resources.rs | 8 +++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 550b55b..8489e50 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ mod testing_resources; +use std::collections::HashMap; use clap::Parser; use std::fs; @@ -19,6 +20,10 @@ struct Args { #[arg(short = 'w')] should_words: bool, + /// Print the frequency of words in each input file + #[arg(short = 'f')] + should_words_frequency: bool, + /// Paths to input files we want to `wc`. If more than one input file is /// specified, a line of cumulative counts for all the files is displayed /// on a separate line after the output for the last file. @@ -28,23 +33,34 @@ struct Args { fn main() { let parsed_args = Args::parse(); let should_words: bool; + let should_words_frequency: bool; let should_lines: bool; let should_characters: bool; let mut should_exit_with_err: bool = false; - if !parsed_args.should_characters && !parsed_args.should_lines && !parsed_args.should_words { + + if parsed_args.should_words_frequency { + should_words_frequency = true; + should_characters = false; + should_lines = false; + should_words = false; + } else if !parsed_args.should_characters && !parsed_args.should_lines && !parsed_args.should_words { // Compat with wc behavior, no flags passed means all these should be on. should_characters = true; should_lines = true; should_words = true; + should_words_frequency = false; } else { should_characters = parsed_args.should_characters; should_lines = parsed_args.should_lines; should_words = parsed_args.should_words; + should_words_frequency = false; } let mut total_words: usize = 0; let mut total_lines: usize = 0; let mut total_characters: usize = 0; + let mut total_words_frequency: HashMap = HashMap::new(); + for path in parsed_args.paths.iter() { let file_contents = match fs::read_to_string(path.clone()) { Ok(x) => x, @@ -69,8 +85,15 @@ fn main() { total_characters += characters_in_this_content; print!("{:>8}", characters_in_this_content); } + if should_words_frequency { + let (word_frequency_in_this_content, word_freq) = count_frequency_of_words_in_content(&file_contents); + merge_word_freq(&mut total_words_frequency, &word_freq); + print!("{}", word_frequency_in_this_content); + } + println!(" {}", path) } + // Now if more than 1 path, print total if parsed_args.paths.len() > 1 { if should_lines { @@ -82,6 +105,9 @@ fn main() { if should_characters { print!("{:>8}", total_characters); } + if should_words_frequency { + print!("{}", frequency_of_words_to_string(&total_words_frequency)); + } println!(" total") } if should_exit_with_err { @@ -89,6 +115,13 @@ fn main() { } } +fn merge_word_freq(total: &mut HashMap, iteration_result: &HashMap) { + for (word, count) in iteration_result.iter() { + let total_count = total.entry(word.into()).or_insert(0); + *total_count += count; + } +} + fn count_lines_in_content(content: &str) -> usize { // My initial implementation // content.split('\n').fold(0, |lines: u64, _x| lines + 1) @@ -106,9 +139,32 @@ fn count_words_in_content(content: &str) -> usize { content.split_ascii_whitespace().count() } +fn count_frequency_of_words_in_content(content: &str) -> (String, HashMap) { + let mut word_freq: HashMap = HashMap::new(); + for word in content.split_ascii_whitespace() { + let count = word_freq.entry(word.into()).or_insert(0); + *count += 1; + } + + (frequency_of_words_to_string(&word_freq), word_freq) +} + +fn frequency_of_words_to_string(word_freq: &HashMap) -> String { + let mut sorted_word_freq = word_freq.iter().collect::>(); + sorted_word_freq.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); + sorted_word_freq.truncate(10); + + let mut res = "".to_string(); + for (word, count) in sorted_word_freq.iter() { + res.push_str(&format!("{} {}\n", count, word)); + } + + res +} + #[cfg(test)] mod tests { - use crate::testing_resources::EXAMPLE_CONTENT_EMPTY; + use crate::testing_resources::{EXAMPLE_CONTENT_EMPTY, EXAMPLE_FREQUENCY_CONTENT_WITH_FOUR_LINES}; use crate::testing_resources::EXAMPLE_CONTENT_FIVE_WORDS; use crate::testing_resources::EXAMPLE_CONTENT_TEN_CHARS; use crate::testing_resources::EXAMPLE_CONTENT_WITH_FOUR_LINES; @@ -132,4 +188,10 @@ mod tests { assert_eq!(10, count_characters_in_content(EXAMPLE_CONTENT_TEN_CHARS)); assert_eq!(0, count_characters_in_content(EXAMPLE_CONTENT_EMPTY)); } + + #[test] + fn test_count_frequency_of_words_in_content() { + let (res, _) = count_frequency_of_words_in_content(EXAMPLE_CONTENT_WITH_FOUR_LINES); + assert_eq!(EXAMPLE_FREQUENCY_CONTENT_WITH_FOUR_LINES, res); + } } diff --git a/src/testing_resources.rs b/src/testing_resources.rs index ec631cd..5d0c538 100644 --- a/src/testing_resources.rs +++ b/src/testing_resources.rs @@ -5,6 +5,14 @@ line three line four "; +#[cfg(test)] +pub const EXAMPLE_FREQUENCY_CONTENT_WITH_FOUR_LINES: &str = "4 line +1 four +1 one +1 three +1 two +"; + #[cfg(test)] pub const EXAMPLE_CONTENT_EMPTY: &str = ""; From b77794d25bf1257089dc75f5b1be65529eb1e7e1 Mon Sep 17 00:00:00 2001 From: Nik Putilin Date: Thu, 23 May 2024 19:49:31 +0200 Subject: [PATCH 2/2] Add bench --- Cargo.lock | 408 ++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 6 + benches/my_benchmark.rs | 12 ++ src/frequency.rs | 24 +++ src/lib.rs | 1 + src/main.rs | 29 +-- 6 files changed, 455 insertions(+), 25 deletions(-) create mode 100644 benches/my_benchmark.rs create mode 100644 src/frequency.rs create mode 100644 src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 08ddb58..feb3337 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.13" @@ -65,6 +80,12 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + [[package]] name = "bstr" version = "1.9.1" @@ -76,6 +97,51 @@ dependencies = [ "serde", ] +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.5.4" @@ -122,6 +188,73 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "difflib" version = "0.4.0" @@ -134,24 +267,136 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "libc" version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + [[package]] name = "memchr" version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "plotters" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" + +[[package]] +name = "plotters-svg" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" +dependencies = [ + "plotters-backend", +] + [[package]] name = "predicates" version = "3.1.0" @@ -197,11 +442,54 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + [[package]] name = "regex-automata" version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "rusty-wc" @@ -209,6 +497,22 @@ version = "0.1.0" dependencies = [ "assert_cmd", "clap", + "criterion", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", ] [[package]] @@ -231,6 +535,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "strsim" version = "0.11.1" @@ -254,6 +569,16 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "unicode-ident" version = "1.0.12" @@ -275,6 +600,89 @@ dependencies = [ "libc", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +dependencies = [ + "windows-sys", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index 8840484..5fb9921 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,9 @@ edition = "2021" [dependencies] assert_cmd = "2.0.14" clap = { version = "4.5.4", features = ["derive"] } +criterion = "0.5.1" + + +[[bench]] +name = "my_benchmark" +harness = false \ No newline at end of file diff --git a/benches/my_benchmark.rs b/benches/my_benchmark.rs new file mode 100644 index 0000000..7bb0c51 --- /dev/null +++ b/benches/my_benchmark.rs @@ -0,0 +1,12 @@ +use std::fs; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use rusty_wc::frequency; + +fn criterion_benchmark(c: &mut Criterion) { + let file_contents = fs::read_to_string("LICENSE").unwrap(); + + c.bench_function("frequency in license", |b| b.iter(|| frequency::count_frequency_of_words_in_content(black_box(&file_contents)))); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); \ No newline at end of file diff --git a/src/frequency.rs b/src/frequency.rs new file mode 100644 index 0000000..9084a83 --- /dev/null +++ b/src/frequency.rs @@ -0,0 +1,24 @@ +use std::collections::HashMap; + +pub fn count_frequency_of_words_in_content(content: &str) -> (String, HashMap) { + let mut word_freq: HashMap = HashMap::new(); + for word in content.split_ascii_whitespace() { + let count = word_freq.entry(word.into()).or_insert(0); + *count += 1; + } + + (frequency_of_words_to_string(&word_freq), word_freq) +} + +pub fn frequency_of_words_to_string(word_freq: &HashMap) -> String { + let mut sorted_word_freq = word_freq.iter().collect::>(); + sorted_word_freq.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); + sorted_word_freq.truncate(10); + + let mut res = "".to_string(); + for (word, count) in sorted_word_freq.iter() { + res.push_str(&format!("{} {}\n", count, word)); + } + + res +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..b33dd46 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod frequency; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 8489e50..48c2d3d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ mod testing_resources; +mod frequency; use std::collections::HashMap; use clap::Parser; @@ -86,7 +87,7 @@ fn main() { print!("{:>8}", characters_in_this_content); } if should_words_frequency { - let (word_frequency_in_this_content, word_freq) = count_frequency_of_words_in_content(&file_contents); + let (word_frequency_in_this_content, word_freq) = frequency::count_frequency_of_words_in_content(&file_contents); merge_word_freq(&mut total_words_frequency, &word_freq); print!("{}", word_frequency_in_this_content); } @@ -106,7 +107,7 @@ fn main() { print!("{:>8}", total_characters); } if should_words_frequency { - print!("{}", frequency_of_words_to_string(&total_words_frequency)); + print!("{}", frequency::frequency_of_words_to_string(&total_words_frequency)); } println!(" total") } @@ -139,31 +140,9 @@ fn count_words_in_content(content: &str) -> usize { content.split_ascii_whitespace().count() } -fn count_frequency_of_words_in_content(content: &str) -> (String, HashMap) { - let mut word_freq: HashMap = HashMap::new(); - for word in content.split_ascii_whitespace() { - let count = word_freq.entry(word.into()).or_insert(0); - *count += 1; - } - - (frequency_of_words_to_string(&word_freq), word_freq) -} - -fn frequency_of_words_to_string(word_freq: &HashMap) -> String { - let mut sorted_word_freq = word_freq.iter().collect::>(); - sorted_word_freq.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); - sorted_word_freq.truncate(10); - - let mut res = "".to_string(); - for (word, count) in sorted_word_freq.iter() { - res.push_str(&format!("{} {}\n", count, word)); - } - - res -} - #[cfg(test)] mod tests { + use crate::frequency::count_frequency_of_words_in_content; use crate::testing_resources::{EXAMPLE_CONTENT_EMPTY, EXAMPLE_FREQUENCY_CONTENT_WITH_FOUR_LINES}; use crate::testing_resources::EXAMPLE_CONTENT_FIVE_WORDS; use crate::testing_resources::EXAMPLE_CONTENT_TEN_CHARS;