Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] Julia0.6 support #64

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ The short version is:
1. Stick your data anywhere with a open HTTP link. (Skip this if it is already online.)
2. Write a DataDep registration block.
3. Refer to the data using `datadep"Dataname/file.csv` etc as if it were a file path, and DataDeps.jl will sort out getting in onto your system.
4. For CI purposes set the `DATADEPS_ALWAY_ACCEPT` environment variable.
4. For CI purposes set the `DATADEPS_ALWAYS_ACCEPT` environment variable.

#### Where can I store my data online?
Where ever you want, so long as it gives an Open HTTP(/s) link to download it. **
Expand Down Expand Up @@ -381,7 +381,7 @@ You can set these in the `.juliarc` file using the `ENV` dictionary if you don't
However, most people shouldn't need to.
DataDeps.jl tries to have very sensible defaults.

- `DATADEPS_ALWAY_ACCEPT` -- bypasses the confirmation before downloading data. Set to `true` (or similar string)
- `DATADEPS_ALWAYS_ACCEPT` -- bypasses the confirmation before downloading data. Set to `true` (or similar string)
- This is provided for scripting (in particular CI) use
- Note that it remains your responsibility to understand and read any terms of the data use (this is remains true even if you don't turn on this bypass)
- default `false`
Expand All @@ -391,12 +391,14 @@ DataDeps.jl tries to have very sensible defaults.
- `DATADEPS_DISABLE_DOWNLOAD` -- causes any action that would result in the download being triggered to throw an exception.
- useful e.g. if you are in an environment with metered data, where your datasets should have already been downloaded earlier, and if there were not you want to respond to the situation rather than let DataDeps download them for you.
- default `false`
- `DATADEPS_DISABLE_ERROR_CLEANUP` -- By default DataDeps.jl will cleanup the directory the datadep was being downloaded to if there is an error during the resolution (In any of the `fetch`, `checksum`, or `post_fetch`). For debugging purposes you may wish to disable this cleanup step so you can interrogate the files by hand.


## Extending DataDeps.jl for Contributors
Feel free (encouraged even) to open issues and make PRs.

### Internal Docstrings
As well as the usual all the publicly facing methods having docistrings,
As well as the usual all the publicly facing methods having docstrings,
most of the internal methods do also.
You can view them in the source; or via the julia REPL etc.
Hopefully the internal docstrings make it clear how each method is used.
Expand Down
2 changes: 2 additions & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ install:
# Run installer silently, output to C:\projects\julia
- C:\projects\julia-binary.exe /S /D=C:\projects\julia

build: off # Disable MSBuild

build_script:
# Need to convert from shallow to complete for Pkg.clone to work
- IF EXIST .git\shallow (git fetch --unshallow)
Expand Down
27 changes: 18 additions & 9 deletions src/resolution_automatic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,21 @@ function Base.download(

accept_terms(datadep, localdir, remotepath, i_accept_the_terms_of_use)

local fetched_path
while true
fetched_path = run_fetch(datadep.fetch_method, remotepath, localdir)
if skip_checksum || checksum_pass(datadep.hash, fetched_path)
break
mkpath(localdir)
try
local fetched_path
while true # this is a Do-While loop
fetched_path = run_fetch(datadep.fetch_method, remotepath, localdir)
if skip_checksum || checksum_pass(datadep.hash, fetched_path)
break
end
end
end

run_post_fetch(datadep.post_fetch_method, fetched_path)
run_post_fetch(datadep.post_fetch_method, fetched_path)
catch err
env_bool("DATADEPS_DISABLE_ERROR_CLEANUP") || rm(localdir, force=true, recursive=true)
rethrow(err)
end
end

"""
Expand All @@ -64,7 +70,6 @@ into the local directory and local paths.
Performs in (async) parallel if multiple paths are given
"""
function run_fetch(fetch_method, remotepath, localdir)
mkpath(localdir)
localpath = fetch_method(remotepath, localdir)
localpath
end
Expand Down Expand Up @@ -129,7 +134,11 @@ end
Ensurses the user accepts the terms of use; otherwise errors out.
"""
function accept_terms(datadep::DataDep, localpath, remotepath, ::Void)
if !env_bool("DATADEPS_ALWAY_ACCEPT")
if haskey(ENV, "DATADEPS_ALWAY_ACCEPT")
warn("Environment variable \$DATADEPS_ALWAY_ACCEPT is deprecated. " *
"Please use \$DATADEPS_ALWAYS_ACCEPT instead.")
end
if !(env_bool("DATADEPS_ALWAYS_ACCEPT") || env_bool("DATADEPS_ALWAY_ACCEPT"))
response = check_if_accept_terms(datadep, localpath, remotepath)
accept_terms(datadep, localpath, remotepath, response)
else
Expand Down
31 changes: 1 addition & 30 deletions test/examples.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using Base.Test
using DataDeps

ENV["DATADEPS_ALWAY_ACCEPT"]=true
ENV["DATADEPS_ALWAYS_ACCEPT"]=true

@testset "Pi" begin
register(DataDep(
Expand Down Expand Up @@ -203,38 +203,9 @@ end

@test length(collect(eachline(datadep"UCI Adult/adult.names"))) == 110


end


@testset "Data.Gov Babynames" begin
register(DataDep(
"Baby Names",
"""
Dataset: Baby Names from Social Security Card Applications-National Level Data
Website: https://catalog.data.gov/dataset/baby-names-from-social-security-card-applications-national-level-data
License: CC0

The data (name, year of birth, sex and number) are from a 100 percent sample of Social Security card applications after 1879.
""",
["https://www.ssa.gov/oact/babynames/names.zip",
"https://catalog.data.gov/harvest/object/f8ab4d49-b6b4-47d8-b1bb-b18187094f35"
# Interestingly this metadata file fails on windows to resolve to filename to save to
# See warnings, The `mv` in post_fetch_method is the work-around.
],
Any, # Test that there is no warning about checksum. This data is updated annually
#TODO : Automate this test with new 0.7 test_warn stuff
;
post_fetch_method = [unpack, f->mv(f, "metadata551randstuff.json")]
))

@test !any(endswith.(readdir(datadep"Baby Names"), "zip"))
@test first(eachline(joinpath(datadep"Baby Names", "yob2016.txt")))=="Emma,F,19471"
@test filemode(joinpath(datadep"Baby Names", "metadata551randstuff.json")) > 0
end



@testset "FastText gzipped" begin
register(DataDep("FastText fr",
"""
Expand Down
23 changes: 21 additions & 2 deletions test/main.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ using ExpectationStubs
# HACK: todo, work out how ExpectationStubs should be changed to make this make sense
Base.open(stub::Stub, t::Any, ::AbstractString) = stub(t)

withenv("DATADEPS_ALWAY_ACCEPT"=>"true") do
withenv("DATADEPS_ALWAYS_ACCEPT"=>"true") do
@testset "automatic download" begin
@stub dummyhash
@expect(dummyhash(::Any) = [0x12, 0x34])
Expand Down Expand Up @@ -35,5 +35,24 @@ withenv("DATADEPS_ALWAY_ACCEPT"=>"true") do
macroexpand(:(@datadep_str var)) # this line would throw an error if the varibles were being handle wrong
@test true
end



@testset "Ensure when errors occur the datadep will still retrydownloading" begin
@testset "error in fetch" begin
use_count = 0
function error_down(rp,lp)
use_count += 1
error("no download for you")
end

register(DataDep("TestErrorFetch", "dummy message", "http://example.void", Any,
fetch_method = error_down
))
@test_throws Exception datadep"TestErrorFetch"
@test use_count == 1

@test_throws Exception datadep"TestErrorFetch"
@test use_count == 2 # it should have tried to download again
end
end
end