Skip to content

Commit

Permalink
Even better trace packing
Browse files Browse the repository at this point in the history
  • Loading branch information
Keno authored and maleadt committed Dec 13, 2022
1 parent 755a2ab commit 17e0471
Showing 1 changed file with 31 additions and 3 deletions.
34 changes: 31 additions & 3 deletions src/PackCommand.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct PackFlags {
* files, rather than copying the files themselves */
bool symlink;
std::vector<string> index_dirs;
std::string pack_dir;

PackFlags()
: symlink(false) {}
Expand All @@ -87,6 +88,12 @@ struct FileInfo {
bool is_hardlink;
};

struct PackDir {
string dir;
map<FileHash, string> mapped_files;
PackDir(string dir) : dir(dir) {}
};

static bool name_comparator(const TraceReader::MappedData& d1,
const TraceReader::MappedData d2) {
return d1.file_name < d2.file_name;
Expand Down Expand Up @@ -416,7 +423,8 @@ static map<string, string> compute_canonical_symlink_map(
*/
static map<string, string> compute_canonical_mmapped_files(
const string& trace_dir,
const vector<pair<string, map<FileHash, string>>> indexed_dirs) {
const vector<pair<string, map<FileHash, string>>> indexed_dirs,
PackDir &pack_dir) {
map<string, FileInfo> file_info = gather_file_info(trace_dir);

map<FileHash, string> hash_to_name;
Expand All @@ -434,6 +442,7 @@ static map<string, string> compute_canonical_mmapped_files(

int name_index = 0;
for (auto& p : hash_to_name) {
LOG(debug) << "Processing " << p.second;
bool found = false;
// First see if this file is anywhere in our index. If so, prefer the index
for (auto& indexed_dir : indexed_dirs) {
Expand All @@ -448,11 +457,25 @@ static map<string, string> compute_canonical_mmapped_files(
if (found) {
continue;
}

// Now check if this is in our common pack directory
auto it = pack_dir.mapped_files.find(p.first);
if (it != pack_dir.mapped_files.end()) {
LOG(debug) << "Found in common pack dir";
p.second = symlink_into_trace(it->second, trace_dir, &name_index);
continue;
}

// Copy hardlinked files into the trace to avoid the possibility of someone
// overwriting the original file.
auto& info = file_info[p.second];
if (info.is_hardlink || !is_in_trace_dir(p.second, trace_dir)) {
p.second = copy_into_trace(p.second, trace_dir, &name_index);
if (pack_dir.dir != "") {
auto path = pack_dir.mapped_files[p.first] = copy_into_trace(p.second, pack_dir.dir, &name_index);
p.second = symlink_into_trace(fs::relative(path, trace_dir), trace_dir, &name_index);
} else {
p.second = copy_into_trace(p.second, trace_dir, &name_index);
}
}
}

Expand Down Expand Up @@ -559,6 +582,7 @@ static int pack(const vector<string>& trace_dirs, const PackFlags &flags) {
indexed_dirs.push_back(std::make_pair(p.filename(), index_dir(dir)));
}

PackDir pack_dir(flags.pack_dir);
char buf[PATH_MAX];
for (const string &trace_dir : trace_dirs) {
string dir;
Expand Down Expand Up @@ -603,7 +627,7 @@ static int pack(const vector<string>& trace_dirs, const PackFlags &flags) {
delete_unnecessary_files(canonical_symlink_map, abspath);
} else {
map<string, string> canonical_mmapped_files =
compute_canonical_mmapped_files(abspath, indexed_dirs);
compute_canonical_mmapped_files(abspath, indexed_dirs, pack_dir);
rewrite_mmaps(canonical_mmapped_files, abspath);
delete_unnecessary_files(canonical_mmapped_files, abspath);
}
Expand All @@ -620,6 +644,7 @@ static bool parse_pack_arg(vector<string>& args, PackFlags& flags) {
static const OptionSpec options[] = {
{ 0, "symlink", NO_PARAMETER },
{ 1, "index-dir", HAS_PARAMETER },
{ 2, "pack-dir", HAS_PARAMETER },
};
ParsedOption opt;
auto args_copy = args;
Expand All @@ -634,6 +659,9 @@ static bool parse_pack_arg(vector<string>& args, PackFlags& flags) {
case 1:
flags.index_dirs.push_back(opt.value);
break;
case 2:
flags.pack_dir = opt.value;
break;
default:
DEBUG_ASSERT(0 && "Unknown pack option");
}
Expand Down

0 comments on commit 17e0471

Please sign in to comment.