From 10843ebce4dbd3d68dcf08aad614b5963f0a80ca Mon Sep 17 00:00:00 2001 From: David Hanak Date: Mon, 28 Nov 2022 18:27:49 +0100 Subject: [PATCH] Use seed! to put every copy of rng into a unique state Using `rand(_rng, i)` didn't really put all copies of `rng` into a unique state, the states were still interlocked (all the generators produced same sequence of random numbers with some offset). Calling ` seed!` with a deterministic, pseudo-random seed for each thread produces much better results, which is also visible in the classification and regression accuracies produced by the tests. --- src/classification/main.jl | 6 ++---- src/regression/main.jl | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/classification/main.jl b/src/classification/main.jl index 146f3d06..9bbf82b6 100644 --- a/src/classification/main.jl +++ b/src/classification/main.jl @@ -370,12 +370,10 @@ function build_forest( loss = (ns, n) -> util.entropy(ns, n, entropy_terms) if rng isa Random.AbstractRNG + shared_seed = rand(rng, UInt) Threads.@threads for i in 1:n_trees # The Mersenne Twister (Julia's default) is not thread-safe. - _rng = copy(rng) - # Take some elements from the ring to have different states for each tree. This - # is the only way given that only a `copy` can be expected to exist for RNGs. - rand(_rng, i) + _rng = Random.seed!(copy(rng), shared_seed + i) inds = rand(_rng, 1:t_samples, n_samples) forest[i] = build_tree( labels[inds], diff --git a/src/regression/main.jl b/src/regression/main.jl index 1c58637f..77231c4a 100644 --- a/src/regression/main.jl +++ b/src/regression/main.jl @@ -95,12 +95,10 @@ function build_forest( forest = impurity_importance ? Vector{Root{S, T}}(undef, n_trees) : Vector{LeafOrNode{S, T}}(undef, n_trees) if rng isa Random.AbstractRNG + shared_seed = rand(rng, UInt) Threads.@threads for i in 1:n_trees # The Mersenne Twister (Julia's default) is not thread-safe. - _rng = copy(rng) - # Take some elements from the ring to have different states for each tree. - # This is the only way given that only a `copy` can be expected to exist for RNGs. - rand(_rng, i) + _rng = Random.seed!(copy(rng), shared_seed + i) inds = rand(_rng, 1:t_samples, n_samples) forest[i] = build_tree( labels[inds],