Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 72 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod testing_resources;

use clap::Parser;
use std::fs;
use std::collections::HashMap;

/// wc impl in rust
#[derive(Parser, Debug)]
Expand All @@ -19,34 +20,56 @@ struct Args {
#[arg(short = 'w')]
should_words: bool,

/// Print the number of words in each input file
#[arg(short = 'f')]
should_frequency: bool,

/// Paths to input files we want to `wc`. If more than one input file is
/// specified, a line of cumulative counts for all the files is displayed
/// on a separate line after the output for the last file.
paths: Vec<String>,
}

impl Args {
// validate the input arguments
fn validate_args(&self) -> Result<(), String> {
if self.should_frequency && (self.should_characters || self.should_lines || self.should_words) {
return Err("should_frequency is mutually exclusive".to_string());
}
Ok(())
}
}

fn main() {
let parsed_args = Args::parse();
let should_words: bool;
let should_lines: bool;
let should_characters: bool;
let should_frequency: bool;
let mut should_exit_with_err: bool = false;
if !parsed_args.should_characters && !parsed_args.should_lines && !parsed_args.should_words {

//validate the input arguments
parsed_args.validate_args().unwrap();

if !parsed_args.should_characters && !parsed_args.should_lines && !parsed_args.should_words && !parsed_args.should_frequency {
// Compat with wc behavior, no flags passed means all these should be on.
should_characters = true;
should_lines = true;
should_words = true;
should_frequency = false;
} else {
should_characters = parsed_args.should_characters;
should_lines = parsed_args.should_lines;
should_words = parsed_args.should_words;
should_frequency = parsed_args.should_frequency;
}

let mut total_words: usize = 0;
let mut total_lines: usize = 0;
let mut total_characters: usize = 0;
let mut words_frequency: HashMap<String, u64> = HashMap::new();
for path in parsed_args.paths.iter() {
let file_contents = match fs::read_to_string(path) {
let file_contents = match fs::read_to_string(path.clone()) {
Ok(x) => x,
Err(e) => {
eprint!("wc: {}: {}", path, e.to_string());
Expand All @@ -69,8 +92,16 @@ fn main() {
total_characters += characters_in_this_content;
print!("{:>8}", characters_in_this_content);
}
println!(" {}", path)
if should_frequency {
count_and_update_word_frequency_for_content(&mut words_frequency, file_contents);
} else {
println!(" {}", path)
}
}
if should_frequency {
print_top_frequent_words(&mut words_frequency);
}

// Now if more than 1 path, print total
if parsed_args.paths.len() > 1 {
if should_lines {
Expand All @@ -82,13 +113,30 @@ fn main() {
if should_characters {
print!("{:>8}", total_characters);
}
println!(" total")
if !should_frequency {
println!(" total")
}
}
if should_exit_with_err {
std::process::exit(0x00000001);
}
}

fn print_top_frequent_words(words_frequency: &mut HashMap<String, u64>) {
let mut words_frequency_vec: Vec<(&String, &u64)> = words_frequency.iter().collect();
words_frequency_vec.sort_by(|word_tuple_a, word_tuple_b| word_tuple_b.1.cmp(word_tuple_a.1));
for (word, count) in words_frequency_vec.iter().take(10) {
println!("{:>8} {}", count, word);
}
}

fn count_and_update_word_frequency_for_content(words_frequency: &mut HashMap<String, u64>, file_contents: String) {
for word in file_contents.split_whitespace() {
let count = words_frequency.entry(word.to_string()).or_insert(0);
*count += 1;
}
}

fn count_lines_in_content(content: &str) -> usize {
// My initial implementation
// content.split('\n').fold(0, |lines: u64, _x| lines + 1)
Expand All @@ -108,6 +156,7 @@ fn count_words_in_content(content: &str) -> usize {

#[cfg(test)]
mod tests {
use std::path::Path;
use crate::testing_resources::EXAMPLE_CONTENT_EMPTY;
use crate::testing_resources::EXAMPLE_CONTENT_FIVE_WORDS;
use crate::testing_resources::EXAMPLE_CONTENT_TEN_CHARS;
Expand All @@ -132,4 +181,23 @@ mod tests {
assert_eq!(10, count_characters_in_content(EXAMPLE_CONTENT_TEN_CHARS));
assert_eq!(0, count_characters_in_content(EXAMPLE_CONTENT_EMPTY));
}

#[test]
fn test_word_frequency_licence_file() {
let mut words_frequency: HashMap<String, u64> = HashMap::new();

let licence_path = Path::new("LICENSE");
let file_contents = fs::read_to_string(licence_path).unwrap();
count_and_update_word_frequency_for_content(&mut words_frequency, file_contents.to_string());
assert_eq!(309, *words_frequency.get("the").unwrap());
assert_eq!(208, *words_frequency.get("of").unwrap());
assert_eq!(174, *words_frequency.get("to").unwrap());
assert_eq!(165, *words_frequency.get("a").unwrap());
assert_eq!(131, *words_frequency.get("or").unwrap());
assert_eq!(102, *words_frequency.get("you").unwrap());
assert_eq!(89, *words_frequency.get("that").unwrap());
assert_eq!(86, *words_frequency.get("and").unwrap());
assert_eq!(72, *words_frequency.get("this").unwrap());
assert_eq!(70, *words_frequency.get("in").unwrap());
}
}