Skip to content

Commit

Permalink
Fix parsing of char code 26
Browse files Browse the repository at this point in the history
  • Loading branch information
pierotofy committed Jan 3, 2024
1 parent 2094634 commit a00037e
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions dedup.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#include "dedup.hpp"

std::tuple<std::string, std::string, size_t> dedup(const std::string &src, const std::string &tgt){
std::ifstream src_if(src);
std::ifstream src_if(src, std::ios_base::in | std::ios_base::binary);
if (!src_if.is_open()) throw std::runtime_error("Cannot open " + src);

std::ifstream tgt_if(tgt);
std::ifstream tgt_if(tgt, std::ios_base::in | std::ios_base::binary);
if (!tgt_if.is_open()) throw std::runtime_error("Cannot open " + tgt);

std::unordered_map<std::uint32_t, bool> seen;
Expand All @@ -18,13 +18,13 @@ std::tuple<std::string, std::string, size_t> dedup(const std::string &src, const
std::string line_s = "";
std::string line_t = "";
size_t removed = 0;

while(!src_if.eof()) {
while(true) {
std::getline(src_if, line_s);
std::getline(tgt_if, line_t);
if (src_if.eof()) break;

std::uint32_t hash = xxh32::hash(line_s.c_str(), static_cast<uint32_t>(line_s.size()), 0);


bool line_exists = seen.count(hash) > 0;
if (!line_exists) {
Expand All @@ -34,6 +34,8 @@ std::tuple<std::string, std::string, size_t> dedup(const std::string &src, const
}else{
removed++;
}


}

src_of.close();
Expand Down

0 comments on commit a00037e

Please sign in to comment.