Skip to content

Commit

Permalink
Merge branch 'release/v0.1.2'
Browse files Browse the repository at this point in the history
* release/v0.1.2:
  Bump to v0.1.2
  Exit if src and dest file are both the same file
  Pull version from Cargo
  Add argument parser
  Update cargo.lock, added package details
  • Loading branch information
MichaelSasser committed Jul 9, 2020
2 parents 622afd3 + 3535892 commit dc3bd57
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 71 deletions.
134 changes: 97 additions & 37 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
[package]
name = "wordlist-dedup"
version = "0.1.1"
version = "0.1.2"
authors = ["Michael Sasser <Michael@MichaelSasser.org>"]
edition = "2018"
license = "GPL-3.0+"
keywords = ["deduplication", "wordlist"]
publish = false

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
indicatif = "0.15.0"
clap = "2.33.1"
82 changes: 51 additions & 31 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use clap::{crate_version, App, Arg};
use indicatif::{ProgressBar, ProgressStyle};
use std::env;
use std::ffi::OsStr;
use std::fs::File;
use std::io::BufWriter;
Expand All @@ -18,44 +18,63 @@ fn remove_extension_from_filename(filename: &str) -> Option<&str> {
}

fn main() -> std::io::Result<()> {
let args: Vec<String> = env::args().collect();
//println!("{:?}", args);
if args.len() < 2 {
std::process::exit(1);
}
let input_filename: &str;
let mut output_filename = "";
let mut new_output_filename = String::from("");
match args.len() {
2 => {
input_filename = &args[1];
let ext = match get_extension_from_filename(input_filename) {
let matches = App::new("wordlist-dedup")
.version(crate_version!())
.author("Michael Sasser <Michael@MichaelSasser.org>")
.about("Deduplicate presorted wordlists.")
.arg(
Arg::with_name("SRC")
.required(true)
.help("The presorted source file, wich may contains duplicated lines"),
)
.arg(
Arg::with_name("DEST")
.required(false)
.help("The destination file, to write the deduplicated file to"),
)
.get_matches();

let src_file = matches.value_of("SRC").unwrap();
// println!("The file passed is: {}", myfile);

// let dest_file = matches.value_of("DEST").unwrap_or("input.txt");
let mut new_dest_file = String::from("");
let mut dest_file;
dest_file = match matches.value_of("DEST") {
Some(t) => t,
None => {
let ext = match get_extension_from_filename(src_file) {
Some(t) => t,
None => "",
};
let out_file_stem = match remove_extension_from_filename(input_filename) {
let out_file_stem = match remove_extension_from_filename(src_file) {
Some(t) => t,
None => "",
};
new_output_filename = format!("{}_uniq.{}", out_file_stem, ext);
}
3 => {
input_filename = &args[1];
output_filename = &args[2];
new_dest_file = format!("{}_uniq.{}", out_file_stem, ext);
""
}
_ => std::process::exit(1),
};
if !new_output_filename.is_empty() {
output_filename = new_output_filename.as_str();

// let args: Vec<String> = env::args().collect();
// let mut new_dest_file = String::from("");

if !new_dest_file.is_empty() {
dest_file = new_dest_file.as_str();
}

if dest_file == src_file {
eprintln!("Error: The source file must be different from the destination file.");
std::process::exit(1)
}

// println!("Inputfile: {}", input_filename);
// println!("Outputfile: {}", output_filename);
// println!("Inputfile: {}", src_file);
// println!("Outputfile: {}", dest_file);

let out_path = Path::new(output_filename);
let lines = reader::BufReader::open(input_filename)?;
let out_path = Path::new(dest_file);
let buf_reader = reader::BufReader::open(src_file)?;
let output = File::create(out_path)?;
let mut writer = BufWriter::new(output);
let mut buf_writer = BufWriter::new(output);
let mut line_last: Rc<String> = Rc::new(String::from(""));
let mut dups: u64 = 0;

Expand All @@ -70,7 +89,7 @@ fn main() -> std::io::Result<()> {
);
pb.set_message("Checking for duplicates...");

for line in lines {
for line in buf_reader {
let line_cur = match line {
Ok(t) => t,
Err(_) => {
Expand All @@ -79,14 +98,15 @@ fn main() -> std::io::Result<()> {
};
if line_cur == line_last {
dups += 1;
//println!("Found DUP: {:?} and {:?}", line_cur, line_last)
// for debug purpose uncomment this line:
// println!("Found DUP: {:?} and {:?}", line_cur, line_last)
} else {
write!(writer, "{}", line_cur)?;
write!(buf_writer, "{}", line_cur)?;
}
line_last = line_cur;
}
let msg = format!("Done. Found {} duplicates.", dups);
writer.flush().unwrap();
buf_writer.flush().unwrap();
pb.finish_with_message(msg.as_str());
Ok(())
}
3 changes: 1 addition & 2 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::{
rc::Rc,
};

// const LF: u8 = '\n' as u8;
const LF: u8 = b'\n';

pub struct BufReader {
Expand Down Expand Up @@ -49,6 +48,7 @@ impl Iterator for BufReader {
.transpose()
}
}

pub trait SizeOf {
fn size_of(&mut self) -> Result<u64, io::Error>;
}
Expand All @@ -73,4 +73,3 @@ impl SizeOf for BufReader {
fn new_buf() -> Rc<String> {
Rc::new(String::with_capacity(1024))
}

0 comments on commit dc3bd57

Please sign in to comment.