diff --git a/src/main.rs b/src/main.rs index 368e83c..eb0bdd7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod testing_resources; use clap::Parser; use std::fs; +use std::collections::HashMap; /// wc impl in rust #[derive(Parser, Debug)] @@ -19,34 +20,56 @@ struct Args { #[arg(short = 'w')] should_words: bool, + /// Print the number of words in each input file + #[arg(short = 'f')] + should_frequency: bool, + /// Paths to input files we want to `wc`. If more than one input file is /// specified, a line of cumulative counts for all the files is displayed /// on a separate line after the output for the last file. paths: Vec, } +impl Args { + // validate the input arguments + fn validate_args(&self) -> Result<(), String> { + if self.should_frequency && (self.should_characters || self.should_lines || self.should_words) { + return Err("should_frequency is mutually exclusive".to_string()); + } + Ok(()) + } +} + fn main() { let parsed_args = Args::parse(); let should_words: bool; let should_lines: bool; let should_characters: bool; + let should_frequency: bool; let mut should_exit_with_err: bool = false; - if !parsed_args.should_characters && !parsed_args.should_lines && !parsed_args.should_words { + + //validate the input arguments + parsed_args.validate_args().unwrap(); + + if !parsed_args.should_characters && !parsed_args.should_lines && !parsed_args.should_words && !parsed_args.should_frequency { // Compat with wc behavior, no flags passed means all these should be on. should_characters = true; should_lines = true; should_words = true; + should_frequency = false; } else { should_characters = parsed_args.should_characters; should_lines = parsed_args.should_lines; should_words = parsed_args.should_words; + should_frequency = parsed_args.should_frequency; } let mut total_words: usize = 0; let mut total_lines: usize = 0; let mut total_characters: usize = 0; + let mut words_frequency: HashMap = HashMap::new(); for path in parsed_args.paths.iter() { - let file_contents = match fs::read_to_string(path) { + let file_contents = match fs::read_to_string(path.clone()) { Ok(x) => x, Err(e) => { eprint!("wc: {}: {}", path, e.to_string()); @@ -69,8 +92,16 @@ fn main() { total_characters += characters_in_this_content; print!("{:>8}", characters_in_this_content); } - println!(" {}", path) + if should_frequency { + count_and_update_word_frequency_for_content(&mut words_frequency, file_contents); + } else { + println!(" {}", path) + } + } + if should_frequency { + print_top_frequent_words(&mut words_frequency); } + // Now if more than 1 path, print total if parsed_args.paths.len() > 1 { if should_lines { @@ -82,13 +113,30 @@ fn main() { if should_characters { print!("{:>8}", total_characters); } - println!(" total") + if !should_frequency { + println!(" total") + } } if should_exit_with_err { std::process::exit(0x00000001); } } +fn print_top_frequent_words(words_frequency: &mut HashMap) { + let mut words_frequency_vec: Vec<(&String, &u64)> = words_frequency.iter().collect(); + words_frequency_vec.sort_by(|word_tuple_a, word_tuple_b| word_tuple_b.1.cmp(word_tuple_a.1)); + for (word, count) in words_frequency_vec.iter().take(10) { + println!("{:>8} {}", count, word); + } +} + +fn count_and_update_word_frequency_for_content(words_frequency: &mut HashMap, file_contents: String) { + for word in file_contents.split_whitespace() { + let count = words_frequency.entry(word.to_string()).or_insert(0); + *count += 1; + } +} + fn count_lines_in_content(content: &str) -> usize { // My initial implementation // content.split('\n').fold(0, |lines: u64, _x| lines + 1) @@ -108,6 +156,7 @@ fn count_words_in_content(content: &str) -> usize { #[cfg(test)] mod tests { + use std::path::Path; use crate::testing_resources::EXAMPLE_CONTENT_EMPTY; use crate::testing_resources::EXAMPLE_CONTENT_FIVE_WORDS; use crate::testing_resources::EXAMPLE_CONTENT_TEN_CHARS; @@ -132,4 +181,23 @@ mod tests { assert_eq!(10, count_characters_in_content(EXAMPLE_CONTENT_TEN_CHARS)); assert_eq!(0, count_characters_in_content(EXAMPLE_CONTENT_EMPTY)); } + + #[test] + fn test_word_frequency_licence_file() { + let mut words_frequency: HashMap = HashMap::new(); + + let licence_path = Path::new("LICENSE"); + let file_contents = fs::read_to_string(licence_path).unwrap(); + count_and_update_word_frequency_for_content(&mut words_frequency, file_contents.to_string()); + assert_eq!(309, *words_frequency.get("the").unwrap()); + assert_eq!(208, *words_frequency.get("of").unwrap()); + assert_eq!(174, *words_frequency.get("to").unwrap()); + assert_eq!(165, *words_frequency.get("a").unwrap()); + assert_eq!(131, *words_frequency.get("or").unwrap()); + assert_eq!(102, *words_frequency.get("you").unwrap()); + assert_eq!(89, *words_frequency.get("that").unwrap()); + assert_eq!(86, *words_frequency.get("and").unwrap()); + assert_eq!(72, *words_frequency.get("this").unwrap()); + assert_eq!(70, *words_frequency.get("in").unwrap()); + } }