Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Iterfix #30

Merged
merged 5 commits into from
Apr 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 1 addition & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -602,27 +602,13 @@ siter = ShuffleIterator(copy(data), 2, MersenneTwister(123))
@test size(first(siter)) == size(first(biter))
@test first(siter) != first(biter)

# Flip data along a dimension with a certain probability
probability = 1.0
dimension = 1
fiter = FlipIterator(biter, probability, dimension)
@test first(fiter) == reverse(first(biter), dims=dimension)

# Randomly shift the data while cropping and padding to keep the same size
maxshiftdim1 = 2
maxshiftdim2 = 0
siter = ShiftIterator(biter, maxshiftdim1,maxshiftdim2; rng = MersenneTwister(12))
sdata = first(siter)
@test sdata[1:1,:] == zeros(1,2)
@test sdata[2:4,:] == first(biter)[1:3,:]

# Apply a function to each batch
miter = MapIterator(x -> 2 .* x, biter)
@test first(miter) == 2 .* first(biter)

# Move data to gpu
giter = GpuIterator(miter)
@test first(giter) == first(miter)
@test first(giter) == first(miter) |> gpu

labels = collect(0:5)

Expand Down
2 changes: 1 addition & 1 deletion src/NaiveGAflux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export evolvemodel, AbstractCandidate, CandidateModel, HostCandidate, CacheCandi
export evolve!, AbstractEvolution, NoOpEvolution, AfterEvolution, ResetAfterEvolution, EliteSelection, SusSelection, TournamentSelection, CombinedEvolution, EvolveCandidates

# misc types
export Probability, MutationShield, ApplyIf, RemoveIfSingleInput, RepeatPartitionIterator, MapIterator, GpuIterator, BatchIterator, FlipIterator, ShiftIterator, ShuffleIterator, PersistentArray
export Probability, MutationShield, ApplyIf, RemoveIfSingleInput, RepeatPartitionIterator, SeedIterator, MapIterator, GpuIterator, BatchIterator, FlipIterator, ShiftIterator, ShuffleIterator, PersistentArray

# Persistence
export persist, savemodels
Expand Down
8 changes: 6 additions & 2 deletions src/app/imageclassification/ImageClassification.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ function AutoFlux.fit(c::ImageClassifier, fit_iter, fitnessgen, evostrategy::Abs
Random.seed!(NaiveGAflux.rng_default, c.seed)
@info "Start training with baseseed: $(c.seed)"

insize, outsize = size(fit_iter)
insize, outsize = datasize(fit_iter)

population = initial_models(c.popsize, mdir, c.newpop, fitnessgen, insize, outsize[1])

Expand All @@ -102,6 +102,10 @@ function AutoFlux.fit(c::ImageClassifier, fit_iter, fitnessgen, evostrategy::Abs
return evolutionloop(population, evostrategy, fit_iter, cb)
end

datasize(itr) = datasize(first(itr))
datasize(t::Tuple) = datasize.(t)
datasize(a::AbstractArray) = size(a)

function evolutionloop(population, evostrategy, trainingiter, cb)
for (gen, iter) in enumerate(trainingiter)
@info "Begin generation $gen"
Expand Down Expand Up @@ -131,6 +135,6 @@ function initial_models(nr, mdir, newpop, fitnessgen, insize, outsize)
as = initial_archspace(insize[1:2], outsize)
return PersistentArray(mdir, nr, i -> create_model(join(["model", i]), as, iv(i), fitnessgen))
end
create_model(name, as, in, fg) = CacheCandidate(HostCandidate(CandidateModel(CompGraph(in, as(name, in)), newopt(newlr(0.01)), Flux.logitcrossentropy, fg())))
create_model(name, as, in, fg) = CacheCandidate(HostCandidate(CandidateModel(CompGraph(in, as(name, in)), newopt(rand() * 0.099 + 0.01), Flux.logitcrossentropy, fg())))

end
4 changes: 2 additions & 2 deletions src/app/imageclassification/strategy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ end
TrainStrategy(;nepochs=200, batchsize=32, nbatches_per_gen=400, seed=123, dataaug=identity) = TrainStrategy(nepochs, batchsize, nbatches_per_gen, seed, dataaug)
function trainiter(s::TrainStrategy, x, y)
baseiter = dataiter(x, y, s.batchsize, s.seed, s.dataaug)
epochiter = Iterators.cycle(baseiter, s.nepochs)
return RepeatPartitionIterator(GpuIterator(epochiter), s.nbatches_per_gen)
partiter = RepeatPartitionIterator(GpuIterator(baseiter), s.nbatches_per_gen)
return Iterators.cycle(partiter, s.nepochs)
end

batch(x, batchsize, seed) = ShuffleIterator(x, batchsize, MersenneTwister(seed))
Expand Down
125 changes: 54 additions & 71 deletions src/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,23 @@ end
RepeatPartitionIterator(base, nrep) = RepeatPartitionIterator(Iterators.Stateful(base), nrep)
RepeatPartitionIterator(base::Iterators.Stateful, nrep) = RepeatPartitionIterator(base, nrep)

function Base.iterate(itr::RepeatPartitionIterator, state=nothing)
function Base.iterate(itr::RepeatPartitionIterator, reset=true)
if reset
Iterators.reset!(itr.base, itr.base.itr)
end
length(itr) == 0 && return nothing
return Iterators.take(RepeatStatefulIterator(itr.base), itr.ntake), nothing
return Iterators.take(RepeatStatefulIterator(itr.base), itr.ntake), false
end

Base.length(itr::RepeatPartitionIterator) = ceil(Int, length(itr.base) / itr.ntake)
Base.eltype(itr::RepeatPartitionIterator{T}) where T = T
Base.size(itr::RepeatPartitionIterator) = size(itr.base.itr)


Base.IteratorSize(itr::RepeatPartitionIterator) = Base.IteratorSize(itr.base)
Base.IteratorSize(itr::RepeatPartitionIterator) = Base.IteratorSize(itr.base.itr)
Base.IteratorEltype(itr::RepeatPartitionIterator) = Base.HasEltype()


"""
cycle(itr, nreps)

An iterator that cycles through `itr nreps` times.
"""
Base.Iterators.cycle(itr, nreps) = Iterators.take(Iterators.cycle(itr), nreps * length(itr))

struct RepeatStatefulIterator{T, VS}
base::Iterators.Stateful{T, VS}
start::VS
Expand All @@ -71,9 +67,56 @@ Base.length(itr::RepeatStatefulIterator) = length(itr.base.itr) - itr.taken
Base.eltype(itr::RepeatStatefulIterator) = eltype(itr.base)
Base.size(itr::RepeatStatefulIterator) = size(itr.base.itr)

Base.IteratorSize(itr::RepeatStatefulIterator) = Base.IteratorSize(itr.base)
Base.IteratorSize(itr::RepeatStatefulIterator) = Base.IteratorSize(itr.base.itr)
Base.IteratorEltype(itr::RepeatStatefulIterator) = Base.HasEltype()

"""
cycle(itr, nreps)

An iterator that cycles through `itr nreps` times.
"""
Base.Iterators.cycle(itr, nreps) = Iterators.take(Iterators.cycle(itr), nreps * length(itr))

"""
SeedIterator
SeedIterator(base; rng=rng_default, seed=rand(rng, UInt32))

Iterator which has the random seed of an `AbstractRNG` as state.

Calls `Random.seed!(rng, seed)` every iteration so that wrapped iterators which depend on `rng` will produce the same sequence.

Useful in conjunction with [`RepeatPartitionIterator`](@ref) and random data augmentation so that all candidates in a generation are trained with identical augmentation.
"""
struct SeedIterator{R <: AbstractRNG,T}
rng::R
seed::UInt32
base::T
end
SeedIterator(base; rng=rng_default, seed=rand(rng, UInt32)) = SeedIterator(rng, UInt32(seed), base)

function Base.iterate(itr::SeedIterator)
Random.seed!(itr.rng, itr.seed)
valstate = iterate(itr.base)
valstate === nothing && return nothing
val, state = valstate
return val, (itr.seed+1, state)
end

function Base.iterate(itr::SeedIterator, state)
seed,basestate = state
Random.seed!(itr.rng, seed)
valstate = iterate(itr.base, basestate)
valstate === nothing && return nothing
val, state = valstate
return val, (seed+1, state)
end

Base.length(itr::SeedIterator) = length(itr.base)
Base.eltype(itr::SeedIterator) = eltype(itr.base)
Base.size(itr::SeedIterator) = size(itr.base)

Base.IteratorSize(itr::SeedIterator) = Base.IteratorSize(itr.base)
Base.IteratorEltype(itr::SeedIterator) = Base.IteratorEltype(itr.base)

"""
MapIterator{F, T}
Expand Down Expand Up @@ -167,66 +210,6 @@ Base.print(io::IO, itr::BatchIterator) = print(io, "BatchIterator(size=$(size(it

Flux.onehotbatch(itr::BatchIterator, labels) = MapIterator(x -> Flux.onehotbatch(x, labels), itr)

"""
FlipIterator{T}
FlipIterator(base, p::Real=0.5, dim::Int=1)

Flips data from `base` along dimension `dim` with probability `p`.
"""
struct FlipIterator{T}
p::Probability
dim::Int
base::T
end
FlipIterator(base, p::Real=0.5, dim::Int=1) = FlipIterator(Probability(p), dim, base)

Base.length(itr::FlipIterator) = length(itr.base)
Base.size(itr::FlipIterator) = size(itr.base)

Base.IteratorSize(itr::FlipIterator) = Base.IteratorSize(itr.base)
Base.IteratorEltype(itr::FlipIterator) = Base.IteratorEltype(itr.base)

Base.iterate(itr::FlipIterator) = flip(itr, iterate(itr.base))
Base.iterate(itr::FlipIterator, state) = flip(itr, iterate(itr.base, state))

flip(itr::FlipIterator, valstate) = apply(itr.p) ? flip(itr.dim, valstate) : valstate
flip(dim::Integer, ::Nothing) = nothing
flip(dim::Integer, (data,state)::Tuple) = reverse(data, dims=dim), state

"""
ShiftIterator{T, S<:AbstractParSpace, R<:AbstractRNG}
ShiftIterator(base;rng=rng_default)
ShiftIterator(base, cs::Integer...;rng=rng_default)

Randomly shifts data from `base` in the interval `0:cs` pixels while keeping the orignal size by cropping and padding.
"""
struct ShiftIterator{T, S<:AbstractParSpace, R<:AbstractRNG}
shift::S
rng::R
base::T
end
ShiftIterator(base;rng=rng_default) = ShiftIterator(base, 4,4,0,0,rng=rng)
ShiftIterator(base, cs::Integer...;rng=rng_default) = ShiftIterator(ParSpace(UnitRange.(0, cs)), rng, base)

Base.length(itr::ShiftIterator) = length(itr.base)
Base.size(itr::ShiftIterator) = size(itr.base)

Base.IteratorSize(itr::ShiftIterator) = Base.IteratorSize(itr.base)
Base.IteratorEltype(itr::ShiftIterator) = Base.IteratorEltype(itr.base)

Base.iterate(itr::ShiftIterator) = shift(itr, iterate(itr.base))
Base.iterate(itr::ShiftIterator, state) = shift(itr, iterate(itr.base, state))

shift(itr::ShiftIterator, ::Nothing) = nothing
function shift(itr::ShiftIterator, (data,state)::Tuple)
s = itr.shift(itr.rng)
sdata = circshift(data, s)
for (dim, sdim) in enumerate(s)
selectdim(sdata, dim, 1:sdim) .= 0
end
return sdata, state
end

"""
ShuffleIterator{T<:AbstractArray, R<:AbstractRNG}
ShuffleIterator(data, batchsize, rng=rng_default)
Expand Down
1 change: 1 addition & 0 deletions test/app/autoflux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@

@test sleepreti(0.01) == 0.01
@test sleepreti(0.02) == 0.02
fitness(ff, x -> [1 0; 0 1]) # Avoid compiler delays?
@test fitness(ff, x -> [1 0; 0 1]) == 0.501 #SizeFitness gives 0.001 extra

sleepreti(0.4)
Expand Down
14 changes: 0 additions & 14 deletions test/examples.jl
Original file line number Diff line number Diff line change
Expand Up @@ -462,20 +462,6 @@ end
@test size(first(siter)) == size(first(biter))
@test first(siter) != first(biter)

# Flip data along a dimension with a certain probability
probability = 1.0
dimension = 1
fiter = FlipIterator(biter, probability, dimension)
@test first(fiter) == reverse(first(biter), dims=dimension)

# Randomly shift the data while cropping and padding to keep the same size
maxshiftdim1 = 2
maxshiftdim2 = 0
siter = ShiftIterator(biter, maxshiftdim1,maxshiftdim2; rng = MersenneTwister(12))
sdata = first(siter)
@test sdata[1:1,:] == zeros(1,2)
@test sdata[2:4,:] == first(biter)[1:3,:]

# Apply a function to each batch
miter = MapIterator(x -> 2 .* x, biter)
@test first(miter) == 2 .* first(biter)
Expand Down
28 changes: 10 additions & 18 deletions test/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@
end
end

@testset "SeedIterator" begin
rng = MersenneTwister(123)
testitr = SeedIterator(MapIterator(x -> x * rand(rng, Int), ones(10)); rng=rng, seed=12)
@test collect(testitr) == collect(testitr)

rng = MersenneTwister(1234)
nesteditr = SeedIterator(MapIterator(x -> x * rand(rng, Int), testitr); rng=rng, seed=1)
@test collect(nesteditr) == collect(nesteditr)
end

@testset "MapIterator" begin
itr = MapIterator(x -> 2x, [1,2,3,4,5])
@test collect(itr) == [2,4,6,8,10]
Expand All @@ -58,24 +68,6 @@ end
@test "biter: $itr" == "biter: BatchIterator(size=(2, 3, 4, 5), batchsize=2)"
end

@testset "FlipIterator" begin
itr = FlipIterator([[1 2 3 4; 5 6 7 8], [1 2; 3 4]], 1.0, 2)
for (act, exp) in zip(itr, [[4 3 2 1; 8 7 6 5], [2 1; 4 3]])
@test act == exp
end

itr = FlipIterator([[1 2 3 4; 5 6 7 8]], 0.0, 2)
@test first(itr) == [1 2 3 4; 5 6 7 8]
end

@testset "ShiftIterator" begin
itr = ShiftIterator([[1 2 3 4; 5 6 7 8], [5 6 7 8; 1 2 3 4]], 0, 2, rng=SeqRng(0))

for (act, exp) in zip(itr, [[0 1 2 3; 0 5 6 7], [0 5 6 7; 0 1 2 3]])
@test act == exp
end
end

@testset "ShuffleIterator ndims $(length(dims))" for dims in ((5), (3,4), (2,3,4), (2,3,4,5), (2,3,4,5,6), (2,3,4,5,6,7))
sitr = ShuffleIterator(collect(reshape(1:prod(dims),dims...)), 2, MersenneTwister(123))
bitr = BatchIterator(collect(reshape(1:prod(dims),dims...)), 2)
Expand Down
3 changes: 3 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ using Test
return vec[rng.ind]
end

NaiveNASflux.agg(m::NaiveNASflux.Ewma, x, y) = m.α .* cpu(x) .+ (1 - m.α) .* cpu(y)
NaiveNASflux.agg(m::NaiveNASflux.Ewma, ::Missing, y) = cpu(y)

@info "Testing util"
include("util.jl")

Expand Down