/
filesystem.jl
320 lines (274 loc) · 9.99 KB
/
filesystem.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
"""
Root storage object for trees which are rooted in the file system (in git
terminology, there exists a "working copy")
## Metadata spec
For File:
```
[datasets.storage]
driver="FileSystem"
type="File"
path=\$(path_to_file)
```
For FileTree:
```
[datasets.storage]
driver="FileSystem"
type="FileTree"
path=\$(path_to_directory)
```
"""
mutable struct FileSystemRoot
path::String
write::Bool
cleanup::Bool
end
function FileSystemRoot(path::AbstractString; write=false, cleanup=false)
path = abspath(path)
root = FileSystemRoot(path, write, cleanup)
if cleanup
finalizer(root) do r
if r.cleanup
rm(r.path, recursive=true, force=true)
end
end
end
return root
end
# These functions sys_abspath and sys_joinpath generate/joins OS-specific
# _local filesystem paths_ out of logical paths. They should be defined only
# for trees which are rooted in the actual filesystem.
sys_joinpath(path::RelPath) = isempty(path.components) ? "" : joinpath(path.components...)
sys_abspath(root::FileSystemRoot) = root.path
function sys_abspath(root::FileSystemRoot, path::RelPath)
rootpath = sys_abspath(root)
return isempty(path.components) ? rootpath : joinpath(rootpath, sys_joinpath(path))
end
sys_abspath(path::AbsPath) = sys_abspath(path.root, path.path)
sys_abspath(tree::FileTree) = sys_abspath(tree.root, tree.path)
sys_abspath(file::File) = sys_abspath(file.root, file.path)
iswriteable(root::FileSystemRoot) = root.write
#--------------------------------------------------
# Storage data interface for trees
#
# TODO: Formalize this interface!
## 1. Query
# TODO: would it be better to express the following dispatch in terms of
# AbsPath{<:FileSystemRoot} rather than usin double dispatch?
Base.isdir(root::FileSystemRoot, path::RelPath) = isdir(sys_abspath(root, path))
Base.isfile(root::FileSystemRoot, path::RelPath) = isfile(sys_abspath(root, path))
Base.ispath(root::FileSystemRoot, path::RelPath) = ispath(sys_abspath(root, path))
Base.filesize(root::FileSystemRoot, path::RelPath) = filesize(sys_abspath(root, path))
Base.summary(io::IO, root::FileSystemRoot) = print(io, sys_abspath(root))
Base.readdir(root::FileSystemRoot, path::RelPath) = readdir(sys_abspath(root, path))
## 2. Mutation
#
# TODO: Likely requires rework!
function Base.mkdir(root::FileSystemRoot, path::RelPath; kws...)
if !iswriteable(root)
error("Cannot make directory in read-only tree")
end
mkdir(sys_abspath(root, path), args...)
return FileTree(root, path)
end
function Base.rm(root::FileSystemRoot, path::RelPath; kws...)
rm(sys_abspath(root,path); kws...)
end
function Base.delete!(root::FileSystemRoot, path::RelPath)
if !iswriteable(root)
error("Cannot delete from read-only tree $root")
end
rm(sys_abspath(root, path); recursive=true)
end
#--------------------------------------------------
# Storage data interface for File
# TODO: Make this the generic implementation for AbstractDataStorage
function Base.open(f::Function, as_type::Type{IO},
root::FileSystemRoot, path; kws...)
@context f(@! open(as_type, root, path; kws...))
end
@! function Base.open(::Type{IO}, root::FileSystemRoot, path;
write=false, read=!write, kws...)
if !iswriteable(root) && write
error("Error writing file at read-only path $path")
end
@! open(sys_abspath(root, path); read=read, write=write, kws...)
end
Base.read(root::FileSystemRoot, path::RelPath, ::Type{T}) where {T} =
read(sys_abspath(root, path), T)
Base.read(root::FileSystemRoot, path::RelPath) =
read(sys_abspath(root, path))
#-------------------------------------------------------------------------------
# Mutation via newdir/newfile
_temp_root(path) = FileSystemRoot(path, write=true, cleanup=true)
"""
newdir()
Create a new `FileTree` on the local temporary directory. If not moved to a
permanent location (for example, with `some_tree["name"] = newdir()`) the
temporary tree will be cleaned up during garbage collection.
"""
function newdir()
# cleanup=false: we manage our own cleanup via the finalizer
path = mktempdir(cleanup=false)
return FileTree(FileSystemRoot(path, write=true, cleanup=true))
end
function newdir(root::FileSystemRoot, path::RelPath; overwrite=false)
p = sys_abspath(root, path)
if overwrite
rm(p, force=true, recursive=true)
end
mkpath(p)
end
function newfile(func=nothing)
path, io = mktemp(cleanup=false)
if func !== nothing
try
func(io)
catch
rm(path)
rethrow()
finally
close(io)
end
else
close(io)
end
return File(_temp_root(path))
end
function newfile(f::Function, root::FileSystemRoot, path::RelPath; kws...)
p = sys_abspath(root, path)
mkpath(dirname(p))
open(f, p, write=true)
end
function newfile(root::FileSystemRoot, path::RelPath; kws...)
newfile(io->nothing, root, path; kws...)
end
#-------------------------------------------------------------------------------
# Move srcpath to destpath, making all attempts to preserve the original
# content of `destpath` if anything goes wrong. We assume that `srcpath` is
# temporary content which doesn't need to be protected.
function mv_force_with_dest_rollback(srcpath, destpath, tempdir_parent)
holding_area = nothing
held_path = nothing
if ispath(destpath)
# If the destination path exists, improve the atomic nature of the
# update by first moving existing data to a temporary directory.
holding_area = mktempdir(tempdir_parent, prefix="jl_to_remove_", cleanup=false)
name = basename(destpath)
held_path = joinpath(holding_area,name)
mv(destpath, held_path)
end
try
mv(srcpath, destpath)
catch
try
if !isnothing(holding_area)
# Attempt to put things back as they were!
mv(held_path, destpath)
end
catch
# At this point we've tried our best to preserve the user's data
# but something has gone wrong, likely at the OS level. The user
# will have to clean up manually if possible.
error("""
Something when wrong while moving data to path $destpath.
We tried restoring the original data to $destpath, but were
met with another error. The original data is preserved in
$held_path
See the catch stack for the root cause.
""")
end
rethrow()
end
if !isnothing(holding_area)
# If we get to here, it's safe to remove the holding area
rm(holding_area, recursive=true)
end
end
function Base.setindex!(tree::FileTree{FileSystemRoot},
tmpdata::Union{File{FileSystemRoot},FileTree{FileSystemRoot}},
path::AbstractString)
if !iswriteable(tree.root)
error("Attempt to move to a read-only tree $tree")
end
if !tmpdata.root.cleanup
type = isdir(tmpdata) ? "directory" : "file"
error("Attempted to move $type which is already rooted in $(tmpdata.root)")
end
if !isempty(tree.path)
# Eh, the number of ways the user can misuse this isn't really funny :-/
error("Temporary trees must be moved in full. The tree had non-empty path $(tree.path)")
end
destpath = sys_abspath(joinpath(tree, RelPath(path)))
srcpath = sys_abspath(tmpdata)
tempdir_parent = sys_abspath(tree)
mkpath(dirname(destpath))
mv_force_with_dest_rollback(srcpath, destpath, tempdir_parent)
# Transfer ownership of the data to `tree`.
tmpdata.root.cleanup = false
tmpdata.root.path = destpath
return tree
end
#--------------------------------------------------
# Filesystem storage driver
function connect_filesystem(f, config, dataset)
path = config["path"]
type = config["type"]
if type in ("File", "Blob")
isfile(path) || throw(ArgumentError("$(repr(path)) should be a file"))
storage = File(FileSystemRoot(path))
elseif type in ("FileTree", "BlobTree")
isdir(path) || throw(ArgumentError("$(repr(path)) should be a directory"))
storage = FileTree(FileSystemRoot(path))
path = dataspec_fragment_as_path(dataset)
if !isnothing(path)
storage = storage[path]
end
else
throw(ArgumentError("DataSet type $type not supported on the filesystem"))
end
f(storage)
end
add_storage_driver("FileSystem"=>connect_filesystem)
#-------------------------------------------------------------------------------
# Deprecations
function Base.abspath(relpath::RelPath)
Base.depwarn("""
`abspath(::RelPath)` defaults to using `pwd()` as the root of the path
but this leads to fragile code so will be removed in the future""",
:abspath)
AbsPath(FileSystemRoot(pwd(); write=true), relpath)
end
# Deprecated newdir() and newfile() variants
function newdir(ctx::FileSystemRoot)
Base.depwarn("""
`newdir(ctx::FileSystemRoot)` is deprecated. Use the in-place
version `newdir(::FileTree, path)` instead.
""", :newdir)
path = mktempdir(sys_abspath(ctx), cleanup=false)
return FileTree(_temp_root(path))
end
function newfile(ctx::FileSystemRoot)
Base.depwarn("""
`newfile(ctx::FileSystemRoot)` is deprecated. Use the in-place
version `newfile(::FileTree, path)` instead.
""", :newfile)
path, io = mktemp(sys_abspath(ctx), cleanup=false)
close(io)
return File(_temp_root(path))
end
function newfile(f::Function, root::FileSystemRoot)
Base.depwarn("""
`newfile(f::Function, ctx::FileSystemRoot)` is deprecated.
Use newfile() or the in-place version `newfile(::FileTree, path)` instead.
""", :newfile)
path, io = mktemp(sys_abspath(root), cleanup=false)
try
f(io)
catch
rm(path)
rethrow()
finally
close(io)
end
return File(_temp_root(path))
end