diff --git a/src/Population.jl b/src/Population.jl index 7591a3c0f..9cba1718e 100644 --- a/src/Population.jl +++ b/src/Population.jl @@ -1,3 +1,4 @@ +using Random using FromFile @from "Core.jl" import Options, Dataset, RecordType, stringTree @from "EquationUtils.jl" import countNodes @@ -41,30 +42,30 @@ Population(X::AbstractMatrix{T}, y::AbstractVector{T}, baseline::T; # Sample 10 random members of the population, and make a new one function samplePop(pop::Population, options::Options)::Population - idx = rand(1:pop.n, options.ns) + idx = randperm(pop.n)[1:options.ns] return Population(pop.members[idx]) end # Sample the population, and get the best member from that sample function bestOfSample(pop::Population, options::Options)::PopMember sample = samplePop(pop, options) - if options.probPickFirst == 1.0 - best_idx = argmin([sample.members[member].score for member=1:options.ns]) - return sample.members[best_idx] + + scores = [sample.members[member].score for member=1:options.ns] + p = options.probPickFirst + + if p == 1.0 + chosen_idx = argmin(scores) else - sort_idx = sortperm([sample.members[member].score for member=1:options.ns]) - # Lowest comes first - k = range(0.0, stop=options.ns-1, step=1.0) |> collect - p = options.probPickFirst + sort_idx = sortperm(scores) + # scores[sort_idx] would put smallest first. - # Weighted choice: + k = collect(0:(options.ns - 1)) prob_each = p * (1 - p) .^ k prob_each /= sum(prob_each) cumprob = cumsum(prob_each) - chosen_idx = findfirst(cumprob .> rand(Float32)) - - return sample.members[chosen_idx] + chosen_idx = sort_idx[findfirst(cumprob .> rand())] end + return sample.members[chosen_idx] end function finalizeScores(dataset::Dataset{T}, diff --git a/src/SymbolicRegression.jl b/src/SymbolicRegression.jl index 7e536c43c..a26a4f440 100644 --- a/src/SymbolicRegression.jl +++ b/src/SymbolicRegression.jl @@ -68,7 +68,7 @@ using Reexport @from "MutationFunctions.jl" import genRandomTree, genRandomTreeFixedSize @from "LossFunctions.jl" import EvalLoss, Loss, scoreFunc @from "PopMember.jl" import PopMember, copyPopMember -@from "Population.jl" import Population, bestSubPop, record_population +@from "Population.jl" import Population, bestSubPop, record_population, bestOfSample @from "HallOfFame.jl" import HallOfFame, calculateParetoFrontier, string_dominating_pareto_curve @from "SingleIteration.jl" import SRCycle, OptimizeAndSimplifyPopulation @from "InterfaceSymbolicUtils.jl" import node_to_symbolic, symbolic_to_node diff --git a/test/test_prob_pick_first.jl b/test/test_prob_pick_first.jl new file mode 100644 index 000000000..97a3be105 --- /dev/null +++ b/test/test_prob_pick_first.jl @@ -0,0 +1,35 @@ +using SymbolicRegression, Test + +n = 10 + +options = Options( + binary_operators=(+, -, *, /), + unary_operators=(cos, sin), + probPickFirst=0.999, +) + +for reverse in [false, true] + members = PopMember{Float32}[] + + # Generate members with scores from 0 to 1: + for i=1:n + tree = Node("x1") * 3.2f0 + score = Float32(i-1)/(n-1) + if reverse + score = 1 - score + end + push!(members, PopMember(tree, score)) + end + + pop = Population(members, n) + + best_pop_member = [ + SymbolicRegression.bestOfSample(pop, options).score + for j=1:100 + ] + + mean_value = sum(best_pop_member)/length(best_pop_member) + + # Make sure average score is small + @test mean_value < 0.1 +end \ No newline at end of file diff --git a/test/unittest.jl b/test/unittest.jl index 43b87b87e..1d8ae42c3 100644 --- a/test/unittest.jl +++ b/test/unittest.jl @@ -227,4 +227,9 @@ for fnc in [ zero_tolerance = 1e-6 @test all(abs.(test_y .- true_y)/N .< zero_tolerance) -end \ No newline at end of file +end + + +println("Testing whether probPickFirst works.") +include("test_prob_pick_first.jl") +println("Passed.") \ No newline at end of file