Skip to content

Commit

Permalink
Tarball fetcher: Use the content-addressed Git cache
Browse files Browse the repository at this point in the history
Backported from the lazy-trees branch.
  • Loading branch information
edolstra committed Feb 20, 2024
1 parent 930b9c8 commit cabee98
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 93 deletions.
6 changes: 4 additions & 2 deletions src/libcmd/common-eval-args.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "store-api.hh"
#include "command.hh"
#include "tarball.hh"
#include "fetch-to-store.hh"

namespace nix {

Expand Down Expand Up @@ -167,8 +168,9 @@ Bindings * MixEvalArgs::getAutoArgs(EvalState & state)
SourcePath lookupFileArg(EvalState & state, std::string_view s, const Path * baseDir)
{
if (EvalSettings::isPseudoUrl(s)) {
auto storePath = fetchers::downloadTarball(
state.store, EvalSettings::resolvePseudoUrl(s), "source", false).storePath;
auto accessor = fetchers::downloadTarball(
EvalSettings::resolvePseudoUrl(s)).accessor;
auto storePath = fetchToStore(*state.store, SourcePath(accessor), FetchMode::Copy);
return state.rootPath(CanonPath(state.store->toRealPath(storePath)));
}

Expand Down
7 changes: 4 additions & 3 deletions src/libexpr/eval.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2794,10 +2794,11 @@ std::optional<std::string> EvalState::resolveSearchPathPath(const SearchPath::Pa

if (EvalSettings::isPseudoUrl(value)) {
try {
auto storePath = fetchers::downloadTarball(
store, EvalSettings::resolvePseudoUrl(value), "source", false).storePath;
auto accessor = fetchers::downloadTarball(
EvalSettings::resolvePseudoUrl(value)).accessor;
auto storePath = fetchToStore(*store, SourcePath(accessor), FetchMode::Copy);
res = { store->toRealPath(storePath) };
} catch (FileTransferError & e) {
} catch (Error & e) {
logWarning({
.msg = HintFmt("Nix search path entry '%1%' cannot be downloaded, ignoring", value)
});
Expand Down
3 changes: 2 additions & 1 deletion src/libexpr/primops/fetchTree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "tarball.hh"
#include "url.hh"
#include "value-to-json.hh"
#include "fetch-to-store.hh"

#include <ctime>
#include <iomanip>
Expand Down Expand Up @@ -473,7 +474,7 @@ static void fetch(EvalState & state, const PosIdx pos, Value * * args, Value & v
// https://github.com/NixOS/nix/issues/4313
auto storePath =
unpack
? fetchers::downloadTarball(state.store, *url, name, (bool) expectedHash).storePath
? fetchToStore(*state.store, fetchers::downloadTarball(*url).accessor, FetchMode::Copy, name)
: fetchers::downloadFile(state.store, *url, name, (bool) expectedHash).storePath;

if (expectedHash) {
Expand Down
16 changes: 16 additions & 0 deletions src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,22 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
else
throw Error("Commit signature verification on commit %s failed: %s", rev.gitRev(), output);
}

Hash treeHashToNarHash(const Hash & treeHash) override
{
auto accessor = getAccessor(treeHash, false);

fetchers::Attrs cacheKey({{"_what", "treeHashToNarHash"}, {"treeHash", treeHash.gitRev()}});

if (auto res = fetchers::getCache()->lookup(cacheKey))
return Hash::parseAny(fetchers::getStrAttr(*res, "narHash"), HashAlgorithm::SHA256);

auto narHash = accessor->hashPath(CanonPath::root);

fetchers::getCache()->upsert(cacheKey, fetchers::Attrs({{"narHash", narHash.to_string(HashFormat::SRI, true)}}));

return narHash;
}
};

ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create, bool bare)
Expand Down
6 changes: 6 additions & 0 deletions src/libfetchers/git-utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ struct GitRepo
virtual void verifyCommit(
const Hash & rev,
const std::vector<fetchers::PublicKey> & publicKeys) = 0;

/**
* Given a Git tree hash, compute the hash of its NAR
* serialisation. This is memoised on-disk.
*/
virtual Hash treeHashToNarHash(const Hash & treeHash) = 0;
};

ref<GitRepo> getTarballCache();
Expand Down
164 changes: 94 additions & 70 deletions src/libfetchers/tarball.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include "types.hh"
#include "split.hh"
#include "posix-source-accessor.hh"
#include "fs-input-accessor.hh"
#include "store-api.hh"
#include "git-utils.hh"

namespace nix::fetchers {

Expand Down Expand Up @@ -57,10 +60,8 @@ DownloadFileResult downloadFile(
throw;
}

// FIXME: write to temporary file.
Attrs infoAttrs({
{"etag", res.etag},
{"url", res.effectiveUri},
});

if (res.immutableUrl)
Expand Down Expand Up @@ -91,96 +92,98 @@ DownloadFileResult downloadFile(
storePath = std::move(info.path);
}

getCache()->add(
*store,
inAttrs,
infoAttrs,
*storePath,
locked);

if (url != res.effectiveUri)
/* Cache metadata for all URLs in the redirect chain. */
for (auto & url : res.urls) {
inAttrs.insert_or_assign("url", url);
infoAttrs.insert_or_assign("url", *res.urls.rbegin());
getCache()->add(
*store,
{
{"type", "file"},
{"url", res.effectiveUri},
{"name", name},
},
inAttrs,
infoAttrs,
*storePath,
locked);
}

return {
.storePath = std::move(*storePath),
.etag = res.etag,
.effectiveUrl = res.effectiveUri,
.effectiveUrl = *res.urls.rbegin(),
.immutableUrl = res.immutableUrl,
};
}

DownloadTarballResult downloadTarball(
ref<Store> store,
const std::string & url,
const std::string & name,
bool locked,
const Headers & headers)
{
Attrs inAttrs({
{"type", "tarball"},
{"_what", "tarballCache"},
{"url", url},
{"name", name},
});

auto cached = getCache()->lookupExpired(*store, inAttrs);
auto cached = getCache()->lookupExpired(inAttrs);

if (cached && !cached->expired)
return {
.storePath = std::move(cached->storePath),
.lastModified = (time_t) getIntAttr(cached->infoAttrs, "lastModified"),
.immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"),
auto attrsToResult = [&](const Attrs & infoAttrs)
{
auto treeHash = getRevAttr(infoAttrs, "treeHash");
return DownloadTarballResult {
.treeHash = treeHash,
.lastModified = (time_t) getIntAttr(infoAttrs, "lastModified"),
.immutableUrl = maybeGetStrAttr(infoAttrs, "immutableUrl"),
.accessor = getTarballCache()->getAccessor(treeHash, false),
};
};

auto res = downloadFile(store, url, name, locked, headers);
if (cached && !getTarballCache()->hasObject(getRevAttr(cached->infoAttrs, "treeHash")))
cached.reset();

std::optional<StorePath> unpackedStorePath;
time_t lastModified;
if (cached && !cached->expired)
return attrsToResult(cached->infoAttrs);

if (cached && res.etag != "" && getStrAttr(cached->infoAttrs, "etag") == res.etag) {
unpackedStorePath = std::move(cached->storePath);
lastModified = getIntAttr(cached->infoAttrs, "lastModified");
} else {
Path tmpDir = createTempDir();
AutoDelete autoDelete(tmpDir, true);
unpackTarfile(store->toRealPath(res.storePath), tmpDir);
auto members = readDirectory(tmpDir);
if (members.size() != 1)
throw nix::Error("tarball '%s' contains an unexpected number of top-level files", url);
auto topDir = tmpDir + "/" + members.begin()->name;
lastModified = lstat(topDir).st_mtime;
PosixSourceAccessor accessor;
unpackedStorePath = store->addToStore(name, accessor, CanonPath { topDir }, FileIngestionMethod::Recursive, HashAlgorithm::SHA256, {}, defaultPathFilter, NoRepair);
}
auto _res = std::make_shared<Sync<FileTransferResult>>();

Attrs infoAttrs({
{"lastModified", uint64_t(lastModified)},
{"etag", res.etag},
auto source = sinkToSource([&](Sink & sink) {
FileTransferRequest req(url);
req.expectedETag = cached ? getStrAttr(cached->infoAttrs, "etag") : "";
getFileTransfer()->download(std::move(req), sink,
[_res](FileTransferResult r)
{
*_res->lock() = r;
});
});

if (res.immutableUrl)
infoAttrs.emplace("immutableUrl", *res.immutableUrl);
// TODO: fall back to cached value if download fails.

getCache()->add(
*store,
inAttrs,
infoAttrs,
*unpackedStorePath,
locked);
/* Note: if the download is cached, `importTarball()` will receive
no data, which causes it to import an empty tarball. */
TarArchive archive { *source };
auto parseSink = getTarballCache()->getFileSystemObjectSink();
auto lastModified = unpackTarfileToSink(archive, *parseSink);

return {
.storePath = std::move(*unpackedStorePath),
.lastModified = lastModified,
.immutableUrl = res.immutableUrl,
};
auto res(_res->lock());

Attrs infoAttrs;

if (res->cached) {
infoAttrs = cached->infoAttrs;
} else {
infoAttrs.insert_or_assign("etag", res->etag);
infoAttrs.insert_or_assign("treeHash", parseSink->sync().gitRev());
infoAttrs.insert_or_assign("lastModified", uint64_t(lastModified));
if (res->immutableUrl)
infoAttrs.insert_or_assign("immutableUrl", *res->immutableUrl);
}

/* Insert a cache entry for every URL in the redirect chain. */
for (auto & url : res->urls) {
inAttrs.insert_or_assign("url", url);
getCache()->upsert(inAttrs, infoAttrs);
}

// FIXME: add a cache entry for immutableUrl? That could allow
// cache poisoning.

return attrsToResult(infoAttrs);
}

// An input scheme corresponding to a curl-downloadable resource.
Expand All @@ -198,6 +201,8 @@ struct CurlInputScheme : InputScheme

virtual bool isValidURL(const ParsedURL & url, bool requireTree) const = 0;

static const std::set<std::string> specialParams;

std::optional<Input> inputFromURL(const ParsedURL & _url, bool requireTree) const override
{
if (!isValidURL(_url, requireTree))
Expand All @@ -220,8 +225,12 @@ struct CurlInputScheme : InputScheme
if (auto n = string2Int<uint64_t>(*i))
input.attrs.insert_or_assign("revCount", *n);

url.query.erase("rev");
url.query.erase("revCount");
if (auto i = get(url.query, "lastModified"))
if (auto n = string2Int<uint64_t>(*i))
input.attrs.insert_or_assign("lastModified", *n);

for (auto & param : allowedAttrs())
url.query.erase(param);

input.attrs.insert_or_assign("type", std::string { schemeName() });
input.attrs.insert_or_assign("url", url.to_string());
Expand Down Expand Up @@ -275,10 +284,20 @@ struct FileInputScheme : CurlInputScheme
: (!requireTree && !hasTarballExtension(url.path)));
}

std::pair<StorePath, Input> fetch(ref<Store> store, const Input & input) override
std::pair<ref<InputAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
{
auto input(_input);

auto file = downloadFile(store, getStrAttr(input.attrs, "url"), input.getName(), false);
return {std::move(file.storePath), input};

auto narHash = store->queryPathInfo(file.storePath)->narHash;
input.attrs.insert_or_assign("narHash", narHash.to_string(HashFormat::SRI, true));

auto accessor = makeStorePathAccessor(store, file.storePath);

accessor->setPathDisplay("«" + input.to_string() + "»");

return {accessor, input};
}
};

Expand All @@ -296,11 +315,13 @@ struct TarballInputScheme : CurlInputScheme
: (requireTree || hasTarballExtension(url.path)));
}

std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
std::pair<ref<InputAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
{
Input input(_input);
auto url = getStrAttr(input.attrs, "url");
auto result = downloadTarball(store, url, input.getName(), false);
auto input(_input);

auto result = downloadTarball(getStrAttr(input.attrs, "url"), {});

result.accessor->setPathDisplay("«" + input.to_string() + "»");

if (result.immutableUrl) {
auto immutableInput = Input::fromURL(*result.immutableUrl);
Expand All @@ -314,7 +335,10 @@ struct TarballInputScheme : CurlInputScheme
if (result.lastModified && !input.attrs.contains("lastModified"))
input.attrs.insert_or_assign("lastModified", uint64_t(result.lastModified));

return {result.storePath, std::move(input)};
input.attrs.insert_or_assign("narHash",
getTarballCache()->treeHashToNarHash(result.treeHash).to_string(HashFormat::SRI, true));

return {result.accessor, input};
}
};

Expand Down
12 changes: 8 additions & 4 deletions src/libfetchers/tarball.hh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

#include "types.hh"
#include "path.hh"
#include "hash.hh"

#include <optional>

namespace nix {
class Store;
struct InputAccessor;
}

namespace nix::fetchers {
Expand All @@ -28,16 +30,18 @@ DownloadFileResult downloadFile(

struct DownloadTarballResult
{
StorePath storePath;
Hash treeHash;
time_t lastModified;
std::optional<std::string> immutableUrl;
ref<InputAccessor> accessor;
};

/**
* Download and import a tarball into the Git cache. The result is the
* Git tree hash of the root directory.
*/
DownloadTarballResult downloadTarball(
ref<Store> store,
const std::string & url,
const std::string & name,
bool locked,
const Headers & headers = {});

}
Loading

0 comments on commit cabee98

Please sign in to comment.