Permalink
Browse files

Process directories independently so that the work can be split acros…

…s different threads.
  • Loading branch information...
BrendanLeber committed May 1, 2012
1 parent 6e268b8 commit 721e75211d46e9f515be8f4e7f93ec124aeead0c
Showing with 57 additions and 22 deletions.
  1. +57 −22 mdups.cpp
View
@@ -5,6 +5,8 @@
#include <set>
#include <map>
#include <string>
+#include <algorithm>
+#include <iterator>
#include "file_list.h"
#include "paths.h"
#include "md5.h"
@@ -13,8 +15,48 @@ typedef std::set<std::wstring> File_Bag;
typedef std::vector<std::wstring> String_Bag;
typedef std::map<std::wstring, String_Bag> Hash_Bag;
-void process_directory(std::wstring arg, File_Bag& files)
+File_Bag get_files_to_process(String_Bag dirs);
+std::wstring get_file_hash(std::wstring file);
+File_Bag process_directory(std::wstring arg);
+Hash_Bag process_files(File_Bag files);
+
+File_Bag get_files_to_process(String_Bag dirs)
+{
+ std::wcerr << L"Proccessing " << dirs.size() << L" directories\n";
+
+ // get all the files desired by the user
+ std::vector<File_Bag> dir_results;
+ std::for_each(dirs.cbegin(), dirs.cend(), [&dir_results](const std::wstring& arg)
+ {
+#if defined(_DEBUG)
+ std::wcerr << L"dir " << arg << L'\n';
+#endif
+ dir_results.push_back(process_directory(arg));
+ });
+
+ File_Bag files, temp;
+ for (auto it = dir_results.cbegin(); it != dir_results.cend(); ++it) {
+ std::merge(files.begin(), files.end(), it->begin(), it->end(), std::inserter(temp, temp.end()));
+ std::swap(files, temp);
+ temp.erase(temp.begin(), temp.end());
+ }
+
+ return files;
+}
+
+std::wstring get_file_hash(std::wstring file)
{
+ std::ifstream stm(file.c_str(), std::ios_base::in | std::ios_base::binary);
+ MD5 context;
+ context.update(stm);
+ context.finalize();
+ return context.hex_digest();
+}
+
+File_Bag process_directory(std::wstring arg)
+{
+ File_Bag files;
+
// split out the mask from the search path
std::wstring path, mask;
if (path_is_directory(arg)) {
@@ -34,18 +76,11 @@ void process_directory(std::wstring arg, File_Bag& files)
}
// get the files in the directory and add them to the output collection
- File_List bag = file_list(path, mask);
- for (File_List::const_iterator file = bag.cbegin(); file != bag.cend(); ++file)
+ auto bag = file_list(path, mask);
+ for (auto file = bag.cbegin(); file != bag.cend(); ++file)
files.insert(*file);
-}
-std::wstring get_file_hash(std::wstring file)
-{
- std::ifstream stm(file.c_str(), std::ios_base::in | std::ios_base::binary);
- MD5 context;
- context.update(stm);
- context.finalize();
- return context.hex_digest();
+ return files;
}
Hash_Bag process_files(File_Bag files)
@@ -55,14 +90,14 @@ Hash_Bag process_files(File_Bag files)
std::wcerr << L"Files processed " << done << L" of " << files.size() << std::endl;
Hash_Bag results;
- for (File_Bag::const_iterator file = files.cbegin(); file != files.cend(); ++file) {
+ for (auto file = files.cbegin(); file != files.cend(); ++file) {
if (done >= next_step) {
std::wcerr << L"Files processed " << done << L" of " << files.size() << std::endl;
next_step += ten_percent;
}
std::wstring hash = get_file_hash(*file);
- Hash_Bag::iterator it = results.find(hash);
+ auto it = results.find(hash);
if (it == results.end()) {
// no entry with this hash, create a new bag of file names and add it to the results
String_Bag values;
@@ -89,24 +124,24 @@ int wmain(int argc, wchar_t** argv)
return 1;
}
- // get all the files desired by the user
- std::wcerr << L"Proccessing " << argc - 1 << L" directories\n";
- File_Bag files;
- for (int arg = 1; arg < argc; ++arg) {
- process_directory(argv[arg], files);
- }
+ // build argument list for processing
+ String_Bag args;
+ for (int arg = 1; arg < argc; ++arg)
+ args.push_back(argv[arg]);
+
+ File_Bag files = get_files_to_process(args);
std::wcerr << L"Files found " << files.size() << L'\n';
// get the hashes for the files
- Hash_Bag hashes = process_files(files);
+ auto hashes = process_files(files);
// display the results
- for (Hash_Bag::const_iterator hash = hashes.cbegin(); hash != hashes.cend(); hash++) {
+ for (auto hash = hashes.cbegin(); hash != hashes.cend(); hash++) {
if (hash->second.size() < 2)
continue;
std::wcout << L'\n' << hash->first << L'\n';
- for (String_Bag::const_iterator file = hash->second.cbegin(); file != hash->second.cend(); file++)
+ for (auto file = hash->second.cbegin(); file != hash->second.cend(); file++)
std::wcout << L" " << *file << L'\n';
}

0 comments on commit 721e752

Please sign in to comment.