/
Sources.jl
311 lines (278 loc) · 12 KB
/
Sources.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
using LibGit2
using ProgressMeter
const update! = ProgressMeter.update!
export ArchiveSource, FileSource, GitSource, DirectorySource
"""
An `AbstractSource` is something used as source to build the package. Sources
are installed to `\${WORKSPACE}/srcdir` in the build environment.
Concrete subtypes of `AbstractSource` are:
* [`ArchiveSource`](@ref): a remote archive to download from the Internet;
* [`FileSource`](@ref): a remote file to download from the Internet;
* [`GitSource`](@ref): a remote Git repository to clone;
* [`DirectorySource`](@ref): a local directory to mount.
"""
abstract type AbstractSource end
function check_github_archive(url::String)
# See
# * https://github.blog/changelog/2023-01-30-git-archive-checksums-may-change/
# * https://github.com/bazel-contrib/SIG-rules-authors/issues/11
# * https://github.com/spack/spack/issues/35250
# Note: according to
# <https://github.com/bazel-contrib/SIG-rules-authors/issues/11#issuecomment-1029861300>
# the `/archive/refs/tags` should be more stable, but that didn't happen in the incident
# on 2023-01-30, so it's unclear whether we can trust them.
if contains(url, r"github.com/[^/]+/[^/]+/archive/(refs/tags/)?[^/]+\.(tar\.gz|zip)$")
throw(ArgumentError("""
The archive automatically generated by GitHub
$(url)
may not have a stable checksum in the future, thus cannot be used as a reliable source, see
<https://github.blog/2023-02-21-update-on-the-future-stability-of-source-code-archives-and-hashes/>.
Use a different source, for example a `GitSource`, or an official release artifact uploaded
by the maintainers of the package (*not* the automatic archive produced by GitHub).
"""))
end
end
"""
ArchiveSource(url::String, hash::String; unpack_target::String = "")
Specify a remote archive in one of the supported archive formats (e.g., TAR or
ZIP balls) to be downloaded from the Internet from `url`. `hash` is the
64-character SHA256 checksum of the file.
In the builder environment, the archive will be automatically unpacked to
`\${WORKSPACE}/srcdir`, or in its subdirectory pointed to by the optional
keyword `unpack_target`, if provided.
"""
struct ArchiveSource <: AbstractSource
url::String
hash::String
unpack_target::String
function ArchiveSource(url::String, hash::String, unpack_target::String)
check_github_archive(url)
return new(url, hash, unpack_target)
end
end
ArchiveSource(url::String, hash::String; unpack_target::String = "") =
ArchiveSource(url, hash, unpack_target)
# List of optionally compressed TAR archives that we know how to deal with
const tar_extensions = [".tar", ".tar.gz", ".tgz", ".tar.bz", ".tar.bz2",
".tar.xz", ".tar.Z", ".txz", ".tar.zst"]
# List of general archives that we know about
const archive_extensions = vcat(tar_extensions, ".zip", ".conda")
"""
FileSource(url::String, hash::String; filename::String = basename(url))
Specify a remote file to be downloaded from the Internet from `url`. `hash` is
the 64-character SHA256 checksum of the file.
In the builder environment, the file will be saved under `\${WORKSPACE}/srcdir`
with the same name as the basename of the originating URL, unless the the
keyword argument `filename` is specified.
"""
struct FileSource <: AbstractSource
url::String
hash::String
filename::String
function FileSource(url::String, hash::String, filename::String)
check_github_archive(url)
return new(url, hash, filename)
end
end
FileSource(url::String, hash::String; filename::String = basename(url)) =
FileSource(url, hash, filename)
"""
GitSource(url::String, hash::String; unpack_target::String = "")
Specify a remote Git repository to clone form `url`. `hash` is the 40-character
SHA1 revision to checkout after cloning.
The repository will be cloned in `\${WORKSPACE}/srcdir`, or in its subdirectory
pointed to by the optional keyword `unpack_target`, if provided.
"""
struct GitSource <: AbstractSource
url::String
hash::String
unpack_target::String
end
GitSource(url::String, hash::String; unpack_target::String = "") =
GitSource(url, hash, unpack_target)
"""
DirectorySource(path::String; target::String = basename(path), follow_symlinks=false)
Specify a local directory to mount from `path`.
The content of the directory will be mounted in `\${WORKSPACE}/srcdir`, or in
its subdirectory pointed to by the optional keyword `target`, if provided.
Symbolic links are replaced by a copy of the target when `follow_symlinks` is
`true`.
"""
struct DirectorySource <: AbstractSource
path::String
target::String
follow_symlinks::Bool
end
# When setting up the source, by default we won't follow symlinks. However,
# there are cases where this is necessary, for example when we have symlink
# patchsets across multiple versions of GCC, etc...
DirectorySource(path::String; target::String = "", follow_symlinks::Bool=false) =
DirectorySource(path, target, follow_symlinks)
# Try to guess if a URL is a Git repository
isgitrepo(url::AbstractString) = endswith(url, ".git") || startswith(url, "git://")
# This is not meant to be used as source in the `build_tarballs.jl` scripts but
# only to set up the source in the workspace.
struct SetupSource{T<:AbstractSource}
path::String
hash::String
target::String
follow_symlinks::Bool
end
# `follow_symlinks` is used only for DirectorySource, let's have a method without it.
SetupSource{T}(path::String, hash::String, target::String) where {T} =
SetupSource{T}(path, hash, target, false)
# This is used in wizard/obtain_source.jl to automatically guess the parameter
# of SetupSource from the URL
function SetupSource(url::String, path::String, hash::String, target::String)
if isgitrepo(url)
return SetupSource{GitSource}(path, hash, target)
elseif any(endswith(path, ext) for ext in archive_extensions)
return SetupSource{ArchiveSource}(path, hash, target)
else
return SetupSource{FileSource}(path, hash, target)
end
end
struct PatchSource
name::String
patch::String
end
function download_source(source::T; verbose::Bool = false, downloads_dir = storage_dir("downloads")) where {T<:Union{ArchiveSource,FileSource}}
gettarget(s::ArchiveSource) = s.unpack_target
gettarget(s::FileSource) = s.filename
if isfile(source.url)
# Immediately abspath() a src_url so we don't lose track of
# sources given to us with a relative path
src_path = abspath(source.url)
# And if this is a locally-sourced tarball, just verify
verify(src_path, source.hash) || error("Verification failed")
else
# Otherwise, download and verify
src_path = joinpath(downloads_dir, string(source.hash, "-", basename(source.url)))
download_verify(source.url, source.hash, src_path)
end
return SetupSource{T}(src_path, source.hash, gettarget(source))
end
struct GitTransferProgress
total_objects::Cuint
indexed_objects::Cuint
received_objects::Cuint
local_objects::Cuint
total_deltas::Cuint
indexed_deltas::Cuint
received_bytes::Csize_t
end
function transfer_progress(progress::Ptr{GitTransferProgress}, payloads::Dict)
p = payloads[:transfer_progress]
progress = unsafe_load(progress)
p.n = Int(progress.total_objects)
if progress.total_deltas != 0
p.desc = "Resolving Deltas: "
p.n = Int(progress.total_deltas)
update!(p, Int(max(1, progress.indexed_deltas)))
else
update!(p, Int(max(1, progress.received_objects)))
end
return Cint(0)
end
function cached_git_clone(url::String;
hash_to_check::Union{Nothing, String} = nothing,
downloads_dir::String = storage_dir("downloads"),
verbose::Bool = false,
progressbar::Bool = false,
)
repo_path = joinpath(downloads_dir, "clones", string(basename(url), "-", bytes2hex(sha256(url))))
if isdir(repo_path)
if verbose
@info("Using cached git repository", url, repo_path)
end
# If we didn't just mercilessly obliterate the cached git repo, use it!
LibGit2.with(LibGit2.GitRepo(repo_path)) do repo
# In some cases, we know the hash we're looking for, so only fetch() if
# this git repository doesn't contain the hash we're seeking
# this is not only faster, it avoids race conditions when we have
# multiple builders on the same machine all fetching at once.
if hash_to_check === nothing || !LibGit2.iscommit(hash_to_check, repo)
LibGit2.fetch(repo)
end
end
else
# If there is no repo_path yet, clone it down into a bare repository
if verbose
@info("Cloning git repository", url, repo_path)
end
callbacks = LibGit2.Callbacks()
p = Progress(0, dt=1, desc="Cloning: ")
if progressbar
callbacks[:transfer_progress] = (
@cfunction(
transfer_progress,
Cint,
(Ptr{GitTransferProgress}, Any)
),
p
)
end
LibGit2.clone(url, repo_path; isbare=true, callbacks)
end
return repo_path
end
function download_source(source::GitSource; kwargs...)
src_path = cached_git_clone(source.url; hash_to_check=source.hash, kwargs...)
return SetupSource{GitSource}(src_path, source.hash, source.unpack_target)
end
function download_source(source::DirectorySource; verbose::Bool = false)
if !isdir(source.path)
error("Could not find directory \"$(source.path)\".")
end
if verbose
@info "Directory \"$(source.path)\" found"
end
return SetupSource{DirectorySource}(abspath(source.path), "", source.target, source.follow_symlinks)
end
"""
download_source(source::AbstractSource; verbose::Bool = false)
Download the given `source`. All downloads are cached within the
BinaryBuilder `downloads` storage directory.
"""
download_source
# Add JSON serialization to sources
JSON.lower(fs::ArchiveSource) = Dict("type" => "archive", extract_fields(fs)...)
JSON.lower(fs::FileSource) = Dict("type" => "file", extract_fields(fs)...)
JSON.lower(gs::GitSource) = Dict("type" => "git", extract_fields(gs)...)
JSON.lower(ds::DirectorySource) = Dict("type" => "directory", extract_fields(ds)...)
# When deserialiasing the JSON file, the sources are in the form of
# dictionaries. This function converts the dictionary back to the appropriate
# AbstractSource.
function sourcify(d::Dict)
if d["type"] == "directory"
return DirectorySource(d["path"])
elseif d["type"] == "git"
return GitSource(d["url"], d["hash"])
elseif d["type"] == "file"
return FileSource(d["url"], d["hash"])
elseif d["type"] == "archive"
return ArchiveSource(d["url"], d["hash"])
else
error("Cannot convert to source")
end
end
# XXX: compatibility functions. These are needed until we support old-style
# Pair/String sources specifications.
coerce_source(source::AbstractSource) = source
function coerce_source(source::AbstractString)
@warn "Using a string as source is deprecated, use DirectorySource instead"
return DirectorySource(source)
end
function coerce_source(source::Pair)
src_url, src_hash = source
if endswith(src_url, ".git")
@warn "Using a pair as source is deprecated, use GitSource instead"
return GitSource(src_url, src_hash)
elseif any(endswith(src_url, ext) for ext in archive_extensions)
@warn "Using a pair as source is deprecated, use ArchiveSource instead"
return ArchiveSource(src_url, src_hash)
else
@warn "Using a pair as source is deprecated, use FileSource instead"
return FileSource(src_url, src_hash)
end
end