Skip to content

Commit

Permalink
Merge pull request #6 from obsidiansystems/fetchgit
Browse files Browse the repository at this point in the history
Make fetchGit/fetchTree use Git file ingestion
  • Loading branch information
Ericson2314 committed Jun 4, 2020
2 parents 775ab98 + 3e59168 commit 5b7aed7
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 40 deletions.
107 changes: 79 additions & 28 deletions src/libfetchers/git.cc
Expand Up @@ -3,6 +3,7 @@
#include "globals.hh"
#include "tarfile.hh"
#include "store-api.hh"
#include "git.hh"

#include <sys/time.h>

Expand All @@ -27,6 +28,7 @@ struct GitInput : Input
ParsedURL url;
std::optional<std::string> ref;
std::optional<Hash> rev;
std::optional<Hash> treeHash;
bool shallow = false;
bool submodules = false;

Expand All @@ -42,12 +44,13 @@ struct GitInput : Input
other2
&& url == other2->url
&& rev == other2->rev
&& treeHash == other2->treeHash
&& ref == other2->ref;
}

bool isImmutable() const override
{
return (bool) rev || narHash;
return (bool) rev || treeHash || narHash;
}

std::optional<std::string> getRef() const override { return ref; }
Expand All @@ -59,6 +62,7 @@ struct GitInput : Input
ParsedURL url2(url);
if (url2.scheme != "git") url2.scheme = "git+" + url2.scheme;
if (rev) url2.query.insert_or_assign("rev", rev->gitRev());
if (treeHash) url2.query.insert_or_assign("treeHash", treeHash->gitRev());
if (ref) url2.query.insert_or_assign("ref", *ref);
if (shallow) url2.query.insert_or_assign("shallow", "1");
return url2;
Expand All @@ -72,6 +76,8 @@ struct GitInput : Input
attrs.emplace("ref", *ref);
if (rev)
attrs.emplace("rev", rev->gitRev());
if (treeHash)
attrs.emplace("treeHash", treeHash->gitRev());
if (shallow)
attrs.emplace("shallow", true);
if (submodules)
Expand All @@ -96,25 +102,33 @@ struct GitInput : Input
auto input = std::make_shared<GitInput>(*this);

assert(!rev || rev->type == HashType::SHA1);
assert(!treeHash || treeHash->type == HashType::SHA1);

auto ingestionMethod = treeHash ? FileIngestionMethod::Git : FileIngestionMethod::Recursive;

std::string cacheType = "git";
if (shallow) cacheType += "-shallow";
if (submodules) cacheType += "-submodules";

auto getImmutableAttrs = [&]()
{
return Attrs({
Attrs attrs({
{"type", cacheType},
{"name", name},
{"rev", input->rev->gitRev()},
});
if (input->treeHash)
attrs.insert_or_assign("treeHash", input->treeHash->gitRev());
else
attrs.insert_or_assign("rev", input->rev->gitRev());
return attrs;
};

auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath)
-> std::pair<Tree, std::shared_ptr<const Input>>
{
assert(input->rev);
assert(input->rev || input->treeHash);
assert(!rev || rev == input->rev);
assert(!treeHash || treeHash == input->treeHash);
return {
Tree {
.actualPath = store->toRealPath(storePath),
Expand All @@ -138,7 +152,7 @@ struct GitInput : Input

// If this is a local directory and no ref or revision is
// given, then allow the use of an unclean working tree.
if (!input->ref && !input->rev && isLocal) {
if (!input->ref && !input->rev && !input->treeHash && isLocal) {
bool clean = false;

/* Check whether this repo has any commits. There are
Expand Down Expand Up @@ -195,7 +209,7 @@ struct GitInput : Input
return files.count(file);
};

auto storePath = store->addToStore("source", actualUrl, FileIngestionMethod::Recursive, HashType::SHA256, filter);
auto storePath = store->addToStore("source", actualUrl, ingestionMethod, HashType::SHA256, filter);

auto tree = Tree {
.actualPath = store->printStorePath(storePath),
Expand Down Expand Up @@ -224,7 +238,7 @@ struct GitInput : Input

if (isLocal) {

if (!input->rev)
if (!input->rev && !input->treeHash)
input->rev = Hash(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input->ref })), HashType::SHA1);

repoDir = actualUrl;
Expand All @@ -233,12 +247,20 @@ struct GitInput : Input

if (auto res = getCache()->lookup(store, mutableAttrs)) {
auto rev2 = Hash(getStrAttr(res->first, "rev"), HashType::SHA1);
if (!rev || rev == rev2) {
if (!input->rev || rev == rev2) {
input->rev = rev2;
return makeResult(res->first, std::move(res->second));
}
}

if (auto res = getCache()->lookup(store, mutableAttrs)) {
auto treeHash2 = Hash(getStrAttr(res->first, "treeHash"), HashType::SHA1);
if (!input->treeHash || treeHash == treeHash2) {
input->treeHash = treeHash2;
return makeResult(res->first, std::move(res->second));
}
}

Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(HashType::SHA256, actualUrl).to_string(Base::Base32, false);
repoDir = cacheDir;

Expand All @@ -255,11 +277,12 @@ struct GitInput : Input
bool doFetch;
time_t now = time(0);

/* If a rev was specified, we need to fetch if it's not in the
repo. */
if (input->rev) {
/* If a rev or treeHash is specified, we need to fetch if
it's not in the repo. */
if (input->rev || input->treeHash) {
try {
runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input->rev->gitRev() });
auto gitHash = input->treeHash ? input->treeHash : input->rev;
runProgram("git", true, { "-C", repoDir, "cat-file", "-e", gitHash->gitRev() });
doFetch = false;
} catch (ExecError & e) {
if (WIFEXITED(e.status)) {
Expand Down Expand Up @@ -300,18 +323,27 @@ struct GitInput : Input
utimes(localRefFile.c_str(), times);
}

if (!input->rev)
if (!input->rev && !input->treeHash)
input->rev = Hash(chomp(readFile(localRefFile)), HashType::SHA1);
}

if (input->treeHash) {
auto type = chomp(runProgram("git", true, { "-C", repoDir, "cat-file", "-t", input->treeHash->gitRev() }));
if (type != "tree")
throw Error(format("Need a tree object, found '%s' object in %s") % type % input->treeHash->gitRev());
}

bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true";

if (isShallow && !shallow)
throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", actualUrl);

// FIXME: check whether rev is an ancestor of ref.

printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl);
if (input->rev)
printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl);
else if (input->treeHash)
printTalkative("using tree %s of repo '%s'", input->treeHash->gitRev(), actualUrl);

/* Now that we know the ref, check again whether we have it in
the store. */
Expand All @@ -322,6 +354,9 @@ struct GitInput : Input
AutoDelete delTmpDir(tmpDir, true);
PathFilter filter = defaultPathFilter;

if (submodules && treeHash)
throw Error("Cannot combine tree hashes with git submodules");

if (submodules) {
Path tmpGitDir = createTempDir();
AutoDelete delTmpGitDir(tmpGitDir, true);
Expand All @@ -333,7 +368,7 @@ struct GitInput : Input
runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force",
"--update-head-ok", "--", repoDir, "refs/*:refs/*" });

runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input->rev->gitRev() });
runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input->treeHash ? input->treeHash->gitRev() : input->rev->gitRev() });
runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", actualUrl });
runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" });

Expand All @@ -342,28 +377,41 @@ struct GitInput : Input
// FIXME: should pipe this, or find some better way to extract a
// revision.
auto source = sinkToSource([&](Sink & sink) {
RunOptions gitOptions("git", { "-C", repoDir, "archive", input->rev->gitRev() });
RunOptions gitOptions("git", { "-C", repoDir, "archive", input->treeHash ? input->treeHash->gitRev() : input->rev->gitRev() });
gitOptions.standardOut = &sink;
runProgram2(gitOptions);
});

unpackTarfile(*source, tmpDir);
}

auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, HashType::SHA256, filter);
auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, ingestionMethod == FileIngestionMethod::Git ? HashType::SHA1 : HashType::SHA256, filter);

auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() }));
// verify treeHash is what we actually obtained in the nix store
if (input->treeHash) {
auto path = store->toRealPath(store->printStorePath(storePath));
auto gotHash = dumpGitHash(HashType::SHA1, path);
if (gotHash != input->treeHash)
throw Error("Git hash mismatch in input '%s' (%s), expected '%s', got '%s'",
to_string(), path, input->treeHash->gitRev(), gotHash.gitRev());
}

Attrs infoAttrs({
{"rev", input->rev->gitRev()},
{"lastModified", lastModified},
});
Attrs infoAttrs({});
if (input->treeHash) {
infoAttrs.insert_or_assign("treeHash", input->treeHash->gitRev());
infoAttrs.insert_or_assign("revCount", 0);
infoAttrs.insert_or_assign("lastModified", 0);
} else {
auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() }));
infoAttrs.insert_or_assign("lastModified", lastModified);
infoAttrs.insert_or_assign("rev", input->rev->gitRev());

if (!shallow)
infoAttrs.insert_or_assign("revCount",
std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() })));
if (!shallow)
infoAttrs.insert_or_assign("revCount",
std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() })));
}

if (!this->rev)
if (!this->rev && !this->treeHash)
getCache()->add(
store,
mutableAttrs,
Expand Down Expand Up @@ -400,7 +448,7 @@ struct GitInputScheme : InputScheme
attrs.emplace("type", "git");

for (auto &[name, value] : url.query) {
if (name == "rev" || name == "ref")
if (name == "rev" || name == "ref" || name == "treeHash")
attrs.emplace(name, value);
else
url2.query.emplace(name, value);
Expand All @@ -416,7 +464,7 @@ struct GitInputScheme : InputScheme
if (maybeGetStrAttr(attrs, "type") != "git") return {};

for (auto & [name, value] : attrs)
if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules")
if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "treeHash")
throw Error("unsupported Git input attribute '%s'", name);

auto input = std::make_unique<GitInput>(parseURL(getStrAttr(attrs, "url")));
Expand All @@ -428,6 +476,9 @@ struct GitInputScheme : InputScheme
if (auto rev = maybeGetStrAttr(attrs, "rev"))
input->rev = Hash(*rev, HashType::SHA1);

if (auto treeHash = maybeGetStrAttr(attrs, "treeHash"))
input->treeHash = Hash(*treeHash, HashType::SHA1);

input->shallow = maybeGetBoolAttr(attrs, "shallow").value_or(false);

input->submodules = maybeGetBoolAttr(attrs, "submodules").value_or(false);
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/local-store.cc
Expand Up @@ -1143,7 +1143,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
throw SysError(format("getting attributes of path '%1%'") % srcPath);
if (S_ISDIR(st.st_mode))
for (auto & i : readDirectory(srcPath))
addToStore(i.name, srcPath + "/" + i.name, method, hashAlgo, filter, repair);
addToStore("git", srcPath + "/" + i.name, method, hashAlgo, filter, repair);

dumpGit(hashAlgo, srcPath, sink, filter);
break;
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/remote-store.cc
Expand Up @@ -501,7 +501,7 @@ StorePath RemoteStore::addToStore(const string & name, const Path & _srcPath,
throw SysError(format("getting attributes of path '%1%'") % srcPath);
if (S_ISDIR(st.st_mode))
for (auto & i : readDirectory(srcPath))
addToStore(i.name, srcPath + "/" + i.name, method, hashAlgo, filter, repair);
addToStore("git", srcPath + "/" + i.name, method, hashAlgo, filter, repair);
}

auto conn(getConnection());
Expand Down
5 changes: 4 additions & 1 deletion src/libutil/fs-sink.hh
Expand Up @@ -101,7 +101,10 @@ struct RestoreSink : ParseSink
if (lstat(entry.c_str(), &st))
throw SysError(format("getting attributes of path '%1%'") % entry);
if (S_ISREG(st.st_mode)) {
createRegularFile(destination + "/" + i.name);
if (st.st_mode & S_IXUSR)
createExecutableFile(destination + "/" + i.name);
else
createRegularFile(destination + "/" + i.name);
copyFile(entry);
} else if (S_ISDIR(st.st_mode))
copyDirectory(entry, destination + "/" + i.name);
Expand Down
37 changes: 28 additions & 9 deletions src/libutil/git.cc
Expand Up @@ -112,7 +112,7 @@ static void parse(ParseSink & sink, Source & source, const Path & path, const Pa
Hash hash(HashType::SHA1);
std::copy(hashs.begin(), hashs.end(), hash.hash);

string entryName = getStoreEntry(storeDir, hash, name);
string entryName = getStoreEntry(storeDir, hash, "git");
Path entry = absPath(realStoreDir + "/" + entryName);

struct stat st;
Expand Down Expand Up @@ -154,22 +154,32 @@ GitMode dumpGitBlob(const Path & path, const struct stat st, Sink & sink)

GitMode dumpGitTree(const GitTree & entries, Sink & sink)
{
std::string s1 = "";
vector<uint8_t> v1;

for (auto & i : entries) {
unsigned int mode;
switch (i.second.first) {
case GitMode::Directory: mode = 40000; break;
case GitMode::Executable: mode = 100755; break;
case GitMode::Regular: mode = 100644; break;
}
s1 += (format("%06d %s\0%s"s) % mode % i.first % i.second.second.hash).str();
auto name = i.first;
if (i.second.first == GitMode::Directory)
name.pop_back();
auto s1 = (format("%d %s") % mode % name).str();
std::copy(s1.begin(), s1.end(), std::back_inserter(v1));
v1.push_back(0);
std::copy(i.second.second.hash, i.second.second.hash + 20, std::back_inserter(v1));
}

std::string s2 = (format("tree %d\0%s"s) % s1.size() % s1).str();
vector<uint8_t> v2;
auto s2 = (format("tree %d"s) % v1.size()).str();
std::copy(s2.begin(), s2.end(), std::back_inserter(v2));
v2.push_back(0);
std::copy(v1.begin(), v1.end(), std::back_inserter(v2));

sink(v2.data(), v2.size());

vector<uint8_t> v;
std::copy(s2.begin(), s2.end(), std::back_inserter(v));
sink(v.data(), v.size());
return GitMode::Directory;
}

Expand All @@ -187,8 +197,17 @@ static GitMode dumpGitInternal(HashType ht, const Path & path, Sink & sink, Path
else if (S_ISDIR(st.st_mode)) {
GitTree entries;
for (auto & i : readDirectory(path))
if (filter(path + "/" + i.name))
entries[i.name] = dumpGitHashInternal(ht, path + "/" + i.name, filter);
if (filter(path + "/" + i.name)) {
auto result = dumpGitHashInternal(ht, path + "/" + i.name, filter);

// correctly observe git order, see
// https://github.com/mirage/irmin/issues/352
auto name = i.name;
if (result.first == GitMode::Directory)
name += "/";

entries[name] = result;
}
perm = dumpGitTree(entries, sink);
} else throw Error(format("file '%1%' has an unsupported type") % path);

Expand Down

0 comments on commit 5b7aed7

Please sign in to comment.