This repository has been archived by the owner on Mar 1, 2023. It is now read-only.
/
ArtifactWrappers.jl
109 lines (92 loc) · 3.02 KB
/
ArtifactWrappers.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
module ArtifactWrappers
using Pkg.Artifacts
using DocStringExtensions: FIELDS
export ArtifactWrapper, ArtifactFile, get_data_folder
"""
ArtifactFile
A single data file to be downloaded, containing both the url and the
name to use locally.
# Fields
$(FIELDS)
"""
Base.@kwdef struct ArtifactFile
"URL pointing to data to be downloaded"
url::AbstractString = ""
"Local name used for downloaded online data"
filename::AbstractString = ""
end
"""
ArtifactWrapper
A set of data files to be downloaded, grouped by `data_name`. Example:
```
dataset = ArtifactWrapper(
"Artifacts.toml",
"MyDataSet",
ArtifactFile[
ArtifactFile(
url="https://..../SomeNetCDF1.nc",
filename="experiment1.nc",
),
ArtifactFile(
url="https://..../SomeNetCDF2.nc",
filename="experiment2.nc",
),
]
)
```
# Fields
$(FIELDS)
"""
struct ArtifactWrapper
"Path to the used Artifacts.toml"
artifact_toml::AbstractString
"Unique name of dataset"
data_name::AbstractString
"Array of `ArtifactFile`'s, grouped by this dataset"
artifact_files::Vector{ArtifactFile}
end
"""
get_data_folder(art_wrap::ArtifactWrapper)
Get local folder of dataset defined in `art_wrap`.
Example:
```
dataset_path = get_data_folder(dataset)
```
"""
function get_data_folder(art_wrap::ArtifactWrapper)
# Query the `Artifacts.toml` file for the hash bound to the name
# data_name (returns `nothing` if no such binding exists)
data_hash = artifact_hash(art_wrap.data_name, art_wrap.artifact_toml)
# If the name was not bound, or the hash it was bound to does not
# exist, create it!
if data_hash == nothing || !artifact_exists(data_hash)
# create_artifact() returns the content-hash of the artifact
# directory once we're finished creating it
data_hash = create_artifact() do artifact_dir
# We create the artifact by simply downloading a few files
# into the new artifact directory
filenames = [af.filename for af in art_wrap.artifact_files]
urls = [af.url for af in art_wrap.artifact_files]
for (url, filename) in zip(urls, filenames)
download("$(url)", joinpath(artifact_dir, filename))
end
end
# Now bind that hash within our `Artifacts.toml`. `force = true`
# means that if it already exists, just overwrite with the new
# content-hash. Unless the source files change, we do not expect
# the content hash to change, so this should not cause
# unnecessary version control churn.
bind_artifact!(
art_wrap.artifact_toml,
art_wrap.data_name,
data_hash,
force = true,
)
end
# Get the path of the dataset, either newly created or
# previously generated. This should be something like:
# `~/.julia/artifacts/dbd04e28be047a54fbe9bf67e934be5b5e0d357a`
dataset_path = artifact_path(data_hash)
return dataset_path
end
end # module