Skip to content

Commit

Permalink
Merge pull request #30 from DrChainsaw/iterfix
Browse files Browse the repository at this point in the history
Iterfix
  • Loading branch information
DrChainsaw committed Apr 8, 2020
2 parents 8320731 + 4751a9a commit bdedc8c
Show file tree
Hide file tree
Showing 9 changed files with 78 additions and 123 deletions.
16 changes: 1 addition & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -602,27 +602,13 @@ siter = ShuffleIterator(copy(data), 2, MersenneTwister(123))
@test size(first(siter)) == size(first(biter))
@test first(siter) != first(biter)

# Flip data along a dimension with a certain probability
probability = 1.0
dimension = 1
fiter = FlipIterator(biter, probability, dimension)
@test first(fiter) == reverse(first(biter), dims=dimension)

# Randomly shift the data while cropping and padding to keep the same size
maxshiftdim1 = 2
maxshiftdim2 = 0
siter = ShiftIterator(biter, maxshiftdim1,maxshiftdim2; rng = MersenneTwister(12))
sdata = first(siter)
@test sdata[1:1,:] == zeros(1,2)
@test sdata[2:4,:] == first(biter)[1:3,:]

# Apply a function to each batch
miter = MapIterator(x -> 2 .* x, biter)
@test first(miter) == 2 .* first(biter)

# Move data to gpu
giter = GpuIterator(miter)
@test first(giter) == first(miter)
@test first(giter) == first(miter) |> gpu

labels = collect(0:5)

Expand Down
2 changes: 1 addition & 1 deletion src/NaiveGAflux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export evolvemodel, AbstractCandidate, CandidateModel, HostCandidate, CacheCandi
export evolve!, AbstractEvolution, NoOpEvolution, AfterEvolution, ResetAfterEvolution, EliteSelection, SusSelection, TournamentSelection, CombinedEvolution, EvolveCandidates

# misc types
export Probability, MutationShield, ApplyIf, RemoveIfSingleInput, RepeatPartitionIterator, MapIterator, GpuIterator, BatchIterator, FlipIterator, ShiftIterator, ShuffleIterator, PersistentArray
export Probability, MutationShield, ApplyIf, RemoveIfSingleInput, RepeatPartitionIterator, SeedIterator, MapIterator, GpuIterator, BatchIterator, FlipIterator, ShiftIterator, ShuffleIterator, PersistentArray

# Persistence
export persist, savemodels
Expand Down
8 changes: 6 additions & 2 deletions src/app/imageclassification/ImageClassification.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ function AutoFlux.fit(c::ImageClassifier, fit_iter, fitnessgen, evostrategy::Abs
Random.seed!(NaiveGAflux.rng_default, c.seed)
@info "Start training with baseseed: $(c.seed)"

insize, outsize = size(fit_iter)
insize, outsize = datasize(fit_iter)

population = initial_models(c.popsize, mdir, c.newpop, fitnessgen, insize, outsize[1])

Expand All @@ -102,6 +102,10 @@ function AutoFlux.fit(c::ImageClassifier, fit_iter, fitnessgen, evostrategy::Abs
return evolutionloop(population, evostrategy, fit_iter, cb)
end

datasize(itr) = datasize(first(itr))
datasize(t::Tuple) = datasize.(t)
datasize(a::AbstractArray) = size(a)

function evolutionloop(population, evostrategy, trainingiter, cb)
for (gen, iter) in enumerate(trainingiter)
@info "Begin generation $gen"
Expand Down Expand Up @@ -131,6 +135,6 @@ function initial_models(nr, mdir, newpop, fitnessgen, insize, outsize)
as = initial_archspace(insize[1:2], outsize)
return PersistentArray(mdir, nr, i -> create_model(join(["model", i]), as, iv(i), fitnessgen))
end
create_model(name, as, in, fg) = CacheCandidate(HostCandidate(CandidateModel(CompGraph(in, as(name, in)), newopt(newlr(0.01)), Flux.logitcrossentropy, fg())))
create_model(name, as, in, fg) = CacheCandidate(HostCandidate(CandidateModel(CompGraph(in, as(name, in)), newopt(rand() * 0.099 + 0.01), Flux.logitcrossentropy, fg())))

end
4 changes: 2 additions & 2 deletions src/app/imageclassification/strategy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ end
TrainStrategy(;nepochs=200, batchsize=32, nbatches_per_gen=400, seed=123, dataaug=identity) = TrainStrategy(nepochs, batchsize, nbatches_per_gen, seed, dataaug)
function trainiter(s::TrainStrategy, x, y)
baseiter = dataiter(x, y, s.batchsize, s.seed, s.dataaug)
epochiter = Iterators.cycle(baseiter, s.nepochs)
return RepeatPartitionIterator(GpuIterator(epochiter), s.nbatches_per_gen)
partiter = RepeatPartitionIterator(GpuIterator(baseiter), s.nbatches_per_gen)
return Iterators.cycle(partiter, s.nepochs)
end

batch(x, batchsize, seed) = ShuffleIterator(x, batchsize, MersenneTwister(seed))
Expand Down
125 changes: 54 additions & 71 deletions src/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,23 @@ end
RepeatPartitionIterator(base, nrep) = RepeatPartitionIterator(Iterators.Stateful(base), nrep)
RepeatPartitionIterator(base::Iterators.Stateful, nrep) = RepeatPartitionIterator(base, nrep)

function Base.iterate(itr::RepeatPartitionIterator, state=nothing)
function Base.iterate(itr::RepeatPartitionIterator, reset=true)
if reset
Iterators.reset!(itr.base, itr.base.itr)
end
length(itr) == 0 && return nothing
return Iterators.take(RepeatStatefulIterator(itr.base), itr.ntake), nothing
return Iterators.take(RepeatStatefulIterator(itr.base), itr.ntake), false
end

Base.length(itr::RepeatPartitionIterator) = ceil(Int, length(itr.base) / itr.ntake)
Base.eltype(itr::RepeatPartitionIterator{T}) where T = T
Base.size(itr::RepeatPartitionIterator) = size(itr.base.itr)


Base.IteratorSize(itr::RepeatPartitionIterator) = Base.IteratorSize(itr.base)
Base.IteratorSize(itr::RepeatPartitionIterator) = Base.IteratorSize(itr.base.itr)
Base.IteratorEltype(itr::RepeatPartitionIterator) = Base.HasEltype()


"""
cycle(itr, nreps)
An iterator that cycles through `itr nreps` times.
"""
Base.Iterators.cycle(itr, nreps) = Iterators.take(Iterators.cycle(itr), nreps * length(itr))

struct RepeatStatefulIterator{T, VS}
base::Iterators.Stateful{T, VS}
start::VS
Expand All @@ -71,9 +67,56 @@ Base.length(itr::RepeatStatefulIterator) = length(itr.base.itr) - itr.taken
Base.eltype(itr::RepeatStatefulIterator) = eltype(itr.base)
Base.size(itr::RepeatStatefulIterator) = size(itr.base.itr)

Base.IteratorSize(itr::RepeatStatefulIterator) = Base.IteratorSize(itr.base)
Base.IteratorSize(itr::RepeatStatefulIterator) = Base.IteratorSize(itr.base.itr)
Base.IteratorEltype(itr::RepeatStatefulIterator) = Base.HasEltype()

"""
cycle(itr, nreps)
An iterator that cycles through `itr nreps` times.
"""
Base.Iterators.cycle(itr, nreps) = Iterators.take(Iterators.cycle(itr), nreps * length(itr))

"""
SeedIterator
SeedIterator(base; rng=rng_default, seed=rand(rng, UInt32))
Iterator which has the random seed of an `AbstractRNG` as state.
Calls `Random.seed!(rng, seed)` every iteration so that wrapped iterators which depend on `rng` will produce the same sequence.
Useful in conjunction with [`RepeatPartitionIterator`](@ref) and random data augmentation so that all candidates in a generation are trained with identical augmentation.
"""
struct SeedIterator{R <: AbstractRNG,T}
rng::R
seed::UInt32
base::T
end
SeedIterator(base; rng=rng_default, seed=rand(rng, UInt32)) = SeedIterator(rng, UInt32(seed), base)

function Base.iterate(itr::SeedIterator)
Random.seed!(itr.rng, itr.seed)
valstate = iterate(itr.base)
valstate === nothing && return nothing
val, state = valstate
return val, (itr.seed+1, state)
end

function Base.iterate(itr::SeedIterator, state)
seed,basestate = state
Random.seed!(itr.rng, seed)
valstate = iterate(itr.base, basestate)
valstate === nothing && return nothing
val, state = valstate
return val, (seed+1, state)
end

Base.length(itr::SeedIterator) = length(itr.base)
Base.eltype(itr::SeedIterator) = eltype(itr.base)
Base.size(itr::SeedIterator) = size(itr.base)

Base.IteratorSize(itr::SeedIterator) = Base.IteratorSize(itr.base)
Base.IteratorEltype(itr::SeedIterator) = Base.IteratorEltype(itr.base)

"""
MapIterator{F, T}
Expand Down Expand Up @@ -167,66 +210,6 @@ Base.print(io::IO, itr::BatchIterator) = print(io, "BatchIterator(size=$(size(it

Flux.onehotbatch(itr::BatchIterator, labels) = MapIterator(x -> Flux.onehotbatch(x, labels), itr)

"""
FlipIterator{T}
FlipIterator(base, p::Real=0.5, dim::Int=1)
Flips data from `base` along dimension `dim` with probability `p`.
"""
struct FlipIterator{T}
p::Probability
dim::Int
base::T
end
FlipIterator(base, p::Real=0.5, dim::Int=1) = FlipIterator(Probability(p), dim, base)

Base.length(itr::FlipIterator) = length(itr.base)
Base.size(itr::FlipIterator) = size(itr.base)

Base.IteratorSize(itr::FlipIterator) = Base.IteratorSize(itr.base)
Base.IteratorEltype(itr::FlipIterator) = Base.IteratorEltype(itr.base)

Base.iterate(itr::FlipIterator) = flip(itr, iterate(itr.base))
Base.iterate(itr::FlipIterator, state) = flip(itr, iterate(itr.base, state))

flip(itr::FlipIterator, valstate) = apply(itr.p) ? flip(itr.dim, valstate) : valstate
flip(dim::Integer, ::Nothing) = nothing
flip(dim::Integer, (data,state)::Tuple) = reverse(data, dims=dim), state

"""
ShiftIterator{T, S<:AbstractParSpace, R<:AbstractRNG}
ShiftIterator(base;rng=rng_default)
ShiftIterator(base, cs::Integer...;rng=rng_default)
Randomly shifts data from `base` in the interval `0:cs` pixels while keeping the orignal size by cropping and padding.
"""
struct ShiftIterator{T, S<:AbstractParSpace, R<:AbstractRNG}
shift::S
rng::R
base::T
end
ShiftIterator(base;rng=rng_default) = ShiftIterator(base, 4,4,0,0,rng=rng)
ShiftIterator(base, cs::Integer...;rng=rng_default) = ShiftIterator(ParSpace(UnitRange.(0, cs)), rng, base)

Base.length(itr::ShiftIterator) = length(itr.base)
Base.size(itr::ShiftIterator) = size(itr.base)

Base.IteratorSize(itr::ShiftIterator) = Base.IteratorSize(itr.base)
Base.IteratorEltype(itr::ShiftIterator) = Base.IteratorEltype(itr.base)

Base.iterate(itr::ShiftIterator) = shift(itr, iterate(itr.base))
Base.iterate(itr::ShiftIterator, state) = shift(itr, iterate(itr.base, state))

shift(itr::ShiftIterator, ::Nothing) = nothing
function shift(itr::ShiftIterator, (data,state)::Tuple)
s = itr.shift(itr.rng)
sdata = circshift(data, s)
for (dim, sdim) in enumerate(s)
selectdim(sdata, dim, 1:sdim) .= 0
end
return sdata, state
end

"""
ShuffleIterator{T<:AbstractArray, R<:AbstractRNG}
ShuffleIterator(data, batchsize, rng=rng_default)
Expand Down
1 change: 1 addition & 0 deletions test/app/autoflux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@

@test sleepreti(0.01) == 0.01
@test sleepreti(0.02) == 0.02
fitness(ff, x -> [1 0; 0 1]) # Avoid compiler delays?
@test fitness(ff, x -> [1 0; 0 1]) == 0.501 #SizeFitness gives 0.001 extra

sleepreti(0.4)
Expand Down
14 changes: 0 additions & 14 deletions test/examples.jl
Original file line number Diff line number Diff line change
Expand Up @@ -462,20 +462,6 @@ end
@test size(first(siter)) == size(first(biter))
@test first(siter) != first(biter)

# Flip data along a dimension with a certain probability
probability = 1.0
dimension = 1
fiter = FlipIterator(biter, probability, dimension)
@test first(fiter) == reverse(first(biter), dims=dimension)

# Randomly shift the data while cropping and padding to keep the same size
maxshiftdim1 = 2
maxshiftdim2 = 0
siter = ShiftIterator(biter, maxshiftdim1,maxshiftdim2; rng = MersenneTwister(12))
sdata = first(siter)
@test sdata[1:1,:] == zeros(1,2)
@test sdata[2:4,:] == first(biter)[1:3,:]

# Apply a function to each batch
miter = MapIterator(x -> 2 .* x, biter)
@test first(miter) == 2 .* first(biter)
Expand Down
28 changes: 10 additions & 18 deletions test/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@
end
end

@testset "SeedIterator" begin
rng = MersenneTwister(123)
testitr = SeedIterator(MapIterator(x -> x * rand(rng, Int), ones(10)); rng=rng, seed=12)
@test collect(testitr) == collect(testitr)

rng = MersenneTwister(1234)
nesteditr = SeedIterator(MapIterator(x -> x * rand(rng, Int), testitr); rng=rng, seed=1)
@test collect(nesteditr) == collect(nesteditr)
end

@testset "MapIterator" begin
itr = MapIterator(x -> 2x, [1,2,3,4,5])
@test collect(itr) == [2,4,6,8,10]
Expand All @@ -58,24 +68,6 @@ end
@test "biter: $itr" == "biter: BatchIterator(size=(2, 3, 4, 5), batchsize=2)"
end

@testset "FlipIterator" begin
itr = FlipIterator([[1 2 3 4; 5 6 7 8], [1 2; 3 4]], 1.0, 2)
for (act, exp) in zip(itr, [[4 3 2 1; 8 7 6 5], [2 1; 4 3]])
@test act == exp
end

itr = FlipIterator([[1 2 3 4; 5 6 7 8]], 0.0, 2)
@test first(itr) == [1 2 3 4; 5 6 7 8]
end

@testset "ShiftIterator" begin
itr = ShiftIterator([[1 2 3 4; 5 6 7 8], [5 6 7 8; 1 2 3 4]], 0, 2, rng=SeqRng(0))

for (act, exp) in zip(itr, [[0 1 2 3; 0 5 6 7], [0 5 6 7; 0 1 2 3]])
@test act == exp
end
end

@testset "ShuffleIterator ndims $(length(dims))" for dims in ((5), (3,4), (2,3,4), (2,3,4,5), (2,3,4,5,6), (2,3,4,5,6,7))
sitr = ShuffleIterator(collect(reshape(1:prod(dims),dims...)), 2, MersenneTwister(123))
bitr = BatchIterator(collect(reshape(1:prod(dims),dims...)), 2)
Expand Down
3 changes: 3 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ using Test
return vec[rng.ind]
end

NaiveNASflux.agg(m::NaiveNASflux.Ewma, x, y) = m.α .* cpu(x) .+ (1 - m.α) .* cpu(y)
NaiveNASflux.agg(m::NaiveNASflux.Ewma, ::Missing, y) = cpu(y)

@info "Testing util"
include("util.jl")

Expand Down

0 comments on commit bdedc8c

Please sign in to comment.