diff --git a/src/main.rs b/src/main.rs index 368e83c..d7171f7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ mod testing_resources; use clap::Parser; +use std::collections::HashMap; use std::fs; /// wc impl in rust @@ -8,17 +9,25 @@ use std::fs; #[command(version, about, long_about = None)] struct Args { /// Print the number of lines in each input file - #[arg(short = 'l')] + #[clap(short = 'l', conflicts_with = "should_frequency")] should_lines: bool, /// Print the number of bytes in each input file - #[arg(short = 'c')] + #[clap(short = 'c', conflicts_with = "should_frequency")] should_characters: bool, /// Print the number of words in each input file - #[arg(short = 'w')] + #[clap(short = 'w', conflicts_with = "should_frequency")] should_words: bool, + /// Print the n most frequent words in each input file and their count + #[clap(short = 'f')] + should_frequency: bool, + + /// How many words to print when using the -f flag + #[clap(short = 'n', default_value_t = 10)] + frequency_word_count: usize, + /// Paths to input files we want to `wc`. If more than one input file is /// specified, a line of cumulative counts for all the files is displayed /// on a separate line after the output for the last file. @@ -27,11 +36,17 @@ struct Args { fn main() { let parsed_args = Args::parse(); + let should_frequency = parsed_args.should_frequency; + let frequency_word_count = parsed_args.frequency_word_count; let should_words: bool; let should_lines: bool; let should_characters: bool; let mut should_exit_with_err: bool = false; - if !parsed_args.should_characters && !parsed_args.should_lines && !parsed_args.should_words { + if !parsed_args.should_characters + && !parsed_args.should_lines + && !parsed_args.should_words + && !parsed_args.should_frequency + { // Compat with wc behavior, no flags passed means all these should be on. should_characters = true; should_lines = true; @@ -45,6 +60,7 @@ fn main() { let mut total_words: usize = 0; let mut total_lines: usize = 0; let mut total_characters: usize = 0; + let mut words_frequency: HashMap = HashMap::new(); for path in parsed_args.paths.iter() { let file_contents = match fs::read_to_string(path) { Ok(x) => x, @@ -69,6 +85,10 @@ fn main() { total_characters += characters_in_this_content; print!("{:>8}", characters_in_this_content); } + if should_frequency { + update_words_frequency(&file_contents, &mut words_frequency); + continue; + } println!(" {}", path) } // Now if more than 1 path, print total @@ -84,6 +104,14 @@ fn main() { } println!(" total") } + if should_frequency { + let mut sorted_words: Vec<(&String, &usize)> = words_frequency.iter().collect(); + sorted_words.sort_unstable_by(|(_, f1), (_, f2)| f2.cmp(f1)); + for (word, count) in sorted_words.iter().take(frequency_word_count) { + println!("{} {}", count, word); + } + return; + } if should_exit_with_err { std::process::exit(0x00000001); } @@ -106,6 +134,13 @@ fn count_words_in_content(content: &str) -> usize { content.split_ascii_whitespace().count() } +fn update_words_frequency(content: &str, words_frequency: &mut HashMap) { + for word in content.split_ascii_whitespace() { + let count = words_frequency.entry(word.to_string()).or_insert(0); + *count += 1; + } +} + #[cfg(test)] mod tests { use crate::testing_resources::EXAMPLE_CONTENT_EMPTY;