Add a new env var PERL_RAND_SEED

This env var can be used to trigger a repeatable run of a script which calls C<srand()> with no arguments, either explicitly or implicitly via use of C<rand()> prior to calling srand(). This is implemented in such a way that calling C<srand()> with no arguments in forks or subthreads (again explicitly or implicitly) will receive their own seed but the seeds they receive will be repeatable. This is intended for debugging and perl development performance testing, and for running the test suite consistently. It is documented that the exact seeds used to initialize the random state are unspecified, and that they may change between releases or even builds. The only guarantee provided is that the same perl executable will produce the same results twice all other things being equal. In practice and in core testing we do expect consistency, but adding the tightest set of restrictions on use seemed sensible. The env var is ignored when perl is run setuid or setgid similarly to the C<PERL_INTERNAL_RAND_SEED> env var.
Perl · Aug 6, 2022 · 42f55d4 · 42f55d4
1 parent c93c14a
commit 42f55d4
Show file tree

Hide file tree

Showing 19 changed files with 277 additions and 27 deletions.
diff --git a/INSTALL b/INSTALL
@@ -2746,6 +2746,12 @@ X<PERL_INTERNAL_RAND_SEED>
 If you configure perl with C<-Accflags=-DNO_PERL_INTERNAL_RAND_SEED>,
 perl will ignore the C<PERL_INTERNAL_RAND_SEED> environment variable.
 
+=head2 C<-DNO_PERL_RAND_SEED>
+X<PERL_RAND_SEED>
+
+If you configure perl with C<-Accflags=-DNO_PERL_RAND_SEED>,
+perl will ignore the C<PERL_RAND_SEED> environment variable.
+
 =head1 DOCUMENTATION
 
 Read the manual entries before running perl.  The main documentation

diff --git a/MANIFEST b/MANIFEST
@@ -6175,7 +6175,8 @@ t/run/fresh_perl.t		Tests that require a fresh perl.
 t/run/locale.t		Tests related to locale handling
 t/run/noswitch.t		Test aliasing ARGV for other switch tests
 t/run/runenv.t			Test if perl honors its environment variables.
-t/run/runenv_hashseed.t	Test if perl honors PERL_HASH_SEED.
+t/run/runenv_hashseed.t		Test if perl honors PERL_HASH_SEED.
+t/run/runenv_randseed.t		Test if perl honors PERL_RAND_SEED.
 t/run/script.t			See if script invocation works
 t/run/switch0.t			Test the -0 switch
 t/run/switcha.t			Test the -a switch

diff --git a/dist/threads/t/thread.t b/dist/threads/t/thread.t
@@ -11,6 +11,7 @@ BEGIN {
 }
 
 use ExtUtils::testlib;
+use Data::Dumper;
 
 use threads;
 
@@ -156,7 +157,8 @@ package main;
     rand(10);
     threads->create( sub { $rand{int(rand(10000000000))}++ } ) foreach 1..25;
     $_->join foreach threads->list;
-    ok((keys %rand >= 23), "Check that rand() is randomized in new threads");
+    ok((keys %rand >= 23), "Check that rand() is randomized in new threads")
+        or diag Dumper(\%rand);
 }
 
 # bugid #24165

diff --git a/dist/threads/t/version.t b/dist/threads/t/version.t
@@ -1,8 +1,17 @@
 use strict;
 use warnings;
-use threads;
 use Test::More;
 
+BEGIN {
+    use Config;
+    if (! $Config{'useithreads'}) {
+        print("1..0 # SKIP Perl not compiled with 'useithreads'\n");
+        exit(0);
+    }
+}
+
+use threads;
+
 # test that the version documented in threads.pm pod matches
 # that of the code.
 

diff --git a/dist/threads/threads.xs b/dist/threads/threads.xs
@@ -807,6 +807,7 @@ S_ithread_create(
     thread->gimme = gimme;
     thread->state = exit_opt;
 
+
     /* "Clone" our interpreter into the thread's interpreter.
      * This gives thread access to "static data" and code.
      */
@@ -1171,6 +1172,7 @@ ithread_create(...)
         if (! thread) {
             XSRETURN_UNDEF;     /* Mutex already unlocked */
         }
+        PERL_SRAND_OVERRIDE_NEXT_PARENT();
         ST(0) = sv_2mortal(S_ithread_to_SV(aTHX_ Nullsv, thread, classname, FALSE));
 
         /* Let thread run. */
@@ -1179,7 +1181,6 @@ ithread_create(...)
         /* warning: releasing mutex 'thread->mutex' that was not held [-Wthread-safety-analysis] */
         MUTEX_UNLOCK(&thread->mutex);
         CLANG_DIAG_RESTORE_STMT;
-
         /* XSRETURN(1); - implied */
 
 

diff --git a/embedvar.h b/embedvar.h
diff --git a/handy.h b/handy.h
@@ -2967,21 +2967,43 @@ last-inclusive range.
  * are very useful when you want an integer to "dance" in a random way,
  * but you also never want it to become 0 and thus false.
  *
- * Obviously they leave x unchanged if it starts out as 0. */
+ * Obviously they leave x unchanged if it starts out as 0. 
+ *
+ * We have two variants just because that can be helpful in certain
+ * places. There is no advantage to either, they are equally bad as each
+ * other as far RNG's go.  The main point is they produce different 
+ * sequences.
+ *
+ * */
+
+#define PERL_XORSHIFT32_A(x)    \
+STMT_START {                    \
+    (x) ^= ((x) << 13);         \
+    (x) ^= ((x) >> 17);         \
+    (x) ^= ((x) << 5);          \
+} STMT_END
 
-#define PERL_XORSHIFT64(x)      \
+/* 32 bit version */
+#define PERL_XORSHIFT64_A(x)    \
 STMT_START {                    \
-    (x) ^= (x) << 13;           \
-    (x) ^= (x) >> 17;           \
-    (x) ^= (x) << 5;            \
+    (x) ^= ((x) << 13);         \
+    (x) ^= ((x) >> 7);          \
+    (x) ^= ((x) << 17);         \
 } STMT_END
 
 /* 32 bit version */
-#define PERL_XORSHIFT32(x)           \
+#define PERL_XORSHIFT32_B(x)    \
+STMT_START {                    \
+    (x) ^= ((x) << 5);          \
+    (x) ^= ((x) >> 27);         \
+    (x) ^= ((x) << 8);          \
+} STMT_END
+
+#define PERL_XORSHIFT64_B(x)    \
 STMT_START {                    \
-    (x) ^= (x) << 13;           \
-    (x) ^= (x) >> 7;            \
-    (x) ^= (x) << 17;           \
+    (x) ^= ((x) << 15);         \
+    (x) ^= ((x) >> 49);         \
+    (x) ^= ((x) << 26);         \
 } STMT_END
 
 

diff --git a/hv.c b/hv.c
@@ -56,10 +56,10 @@ static const char S_strtab_error[]
  */
 #if IVSIZE == 8
 /* 64 bit version */
-#define XORSHIFT_RAND_BITS(x)   PERL_XORSHIFT64(x)
+#define XORSHIFT_RAND_BITS(x)   PERL_XORSHIFT64_A(x)
 #else
 /* 32 bit version */
-#define XORSHIFT_RAND_BITS(x)   PERL_XORSHIFT32(x)
+#define XORSHIFT_RAND_BITS(x)   PERL_XORSHIFT32_A(x)
 #endif
 
 #define UPDATE_HASH_RAND_BITS_KEY(key,klen)                             \

diff --git a/intrpvar.h b/intrpvar.h
@@ -808,7 +808,9 @@ PERLVARI(I, perl_destruct_level, signed char,	0)
 
 PERLVAR(I, pad_reset_pending, bool)	/* reset pad on next attempted alloc */
 
-PERLVAR(I, srand_called, bool)
+PERLVARI(I, srand_called, bool, false)      /* has random_state been initialized yet? */
+PERLVARI(I, srand_override, U32, 0)         /* Should we use a deterministic sequence? */
+PERLVARI(I, srand_override_next, U32, 0)    /* Next item in the sequence */
 
 #ifdef USE_LOCALE_NUMERIC
 

diff --git a/perl.c b/perl.c
@@ -266,20 +266,43 @@ perl_construct(pTHXx)
 
     init_stacks();
 
-/* The PERL_INTERNAL_RAND_SEED set-up must be after init_stacks because it calls
+#if !defined(NO_PERL_RAND_SEED) || !defined(NO_PERL_INTERNAL_HASH_SEED)
+    bool sensitive_env_vars_allowed =
+            (PerlProc_getuid() == PerlProc_geteuid() &&
+             PerlProc_getgid() == PerlProc_getegid()) ? TRUE : FALSE;
+#endif
+
+/* The seed set-up must be after init_stacks because it calls
  * things that may put SVs on the stack.
  */
+#ifndef NO_PERL_RAND_SEED
+    if (sensitive_env_vars_allowed) {
+        UV seed= 0;
+        const char *env_pv;
+        if ((env_pv = PerlEnv_getenv("PERL_RAND_SEED")) &&
+            grok_number(env_pv, strlen(env_pv), &seed) == IS_NUMBER_IN_UV)
+        {
+
+            PL_srand_override_next = seed;
+            PERL_SRAND_OVERRIDE_NEXT_INIT();
+        }
+    }
+#endif
 
+    /* This is NOT the state used for C<rand()>, this is only
+     * used in internal functionality */
 #ifdef NO_PERL_INTERNAL_RAND_SEED
     Perl_drand48_init_r(&PL_internal_random_state, seed());
 #else
     {
         UV seed;
         const char *env_pv;
-        if (PerlProc_getuid() != PerlProc_geteuid() ||
-            PerlProc_getgid() != PerlProc_getegid() ||
+        if (
+            !sensitive_env_vars_allowed ||
             !(env_pv = PerlEnv_getenv("PERL_INTERNAL_RAND_SEED")) ||
-            grok_number(env_pv, strlen(env_pv), &seed) != IS_NUMBER_IN_UV) {
+            grok_number(env_pv, strlen(env_pv), &seed) != IS_NUMBER_IN_UV)
+        {
+            /* use a randomly generated seed */
             seed = seed();
         }
         Perl_drand48_init_r(&PL_internal_random_state, (U32)seed);
@@ -2051,6 +2074,12 @@ S_Internals_V(pTHX_ CV *cv)
 #  endif
 #  ifdef USE_THREAD_SAFE_LOCALE
                              " USE_THREAD_SAFE_LOCALE"
+#  endif
+#  ifdef NO_PERL_RAND_SEED
+                             " NO_PERL_RAND_SEED"
+#  endif
+#  ifdef NO_PERL_INTERNAL_RAND_SEED
+                             " NO_PERL_INTERNAL_RAND_SEED"
 #  endif
         ;
     PERL_UNUSED_ARG(cv);

diff --git a/perl.h b/perl.h
@@ -8794,8 +8794,31 @@ END_EXTERN_C
 #  endif
 #endif
 
-#endif /* DOUBLE_HAS_NAN */
+/* these are used to faciliate the env var PERL_RAND_SEED,
+ * which allows consistent behavior from code that calls
+ * srand() with no arguments, either explicitly or implicitly.
+ */
+#define PERL_SRAND_OVERRIDE_NEXT() PERL_XORSHIFT32_A(PL_srand_override_next);
+
+#define PERL_SRAND_OVERRIDE_NEXT_INIT() STMT_START {    \
+    PL_srand_override = PL_srand_override_next;         \
+    PERL_SRAND_OVERRIDE_NEXT();                         \
+} STMT_END
+
+#define PERL_SRAND_OVERRIDE_GET(into) STMT_START {      \
+    into= PL_srand_override;                            \
+    PERL_SRAND_OVERRIDE_NEXT_INIT();                    \
+} STMT_END
 
+#define PERL_SRAND_OVERRIDE_NEXT_CHILD() STMT_START {   \
+    PERL_XORSHIFT32_B(PL_srand_override_next);          \
+    PERL_SRAND_OVERRIDE_NEXT_INIT();                    \
+} STMT_END                                              
+
+#define PERL_SRAND_OVERRIDE_NEXT_PARENT() \
+    PERL_SRAND_OVERRIDE_NEXT()
+
+#endif /* DOUBLE_HAS_NAN */
 
 /*
 

diff --git a/pod/perldelta.pod b/pod/perldelta.pod
@@ -27,6 +27,17 @@ here, but most should go in the L</Performance Enhancements> section.
 
 [ List each enhancement as a =head2 entry ]
 
+=head2 PERL_RAND_SEED
+
+Added a new environment variable C<PERL_RAND_SEED> which can be used to
+cause a perl program which uses C<rand> without using C<srand()>
+explicitly or which uses C<srand()> with no arguments to be repeatable.
+See L<perlrun>. This feature can be disabled at compile time by passing
+
+    -Accflags=-DNO_PERL_RAND_SEED
+
+to F<Configure> during the build process.
+
 =head1 Security
 
 XXX Any security-related notices go here.  In particular, any security

diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
@@ -8452,7 +8452,7 @@ The point of the function is to "seed" the L<C<rand>|/rand EXPR>
 function so that L<C<rand>|/rand EXPR> can produce a different sequence
 each time you run your program.  When called with a parameter,
 L<C<srand>|/srand EXPR> uses that for the seed; otherwise it
-(semi-)randomly chooses a seed.  In either case, starting with Perl 5.14,
+(semi-)randomly chooses a seed (see below).  In either case, starting with Perl 5.14,
 it returns the seed.  To signal that your code will work I<only> on Perls
 of a recent vintage:
 
@@ -8484,6 +8484,16 @@ combinations to test comprehensively in the time available to it each run.  It
 can test a random subset each time, and should there be a failure, log the seed
 used for that run so that it can later be used to reproduce the same results.
 
+The C<PERL_RAND_SEED> environment var may be used to cause C<srand()> called
+without arguments to return a consistent seed each time it is called (whether
+called explicitly or implicitly). The exact seeding is unspecified, but
+using different values for C<PERL_RAND_SEED> should produce different
+results. This is intended for debugging and is only guaranteed to produce
+consistent results between invocations of the same perl executable. Note
+that this env variable is read once during process startup, changing it
+during the program flow will not affect the currently running process.
+See L<perlrun> for more details.
+
 B<L<C<rand>|/rand EXPR> is not cryptographically secure.  You should not rely
 on it in security-sensitive situations.>  As of this writing, a
 number of third-party CPAN modules offer random number generators

diff --git a/pod/perlrun.pod b/pod/perlrun.pod
@@ -1405,6 +1405,32 @@ with tainting enabled.
 
 Perl may be built to ignore this variable.
 
+=item PERL_RAND_SEED
+X<PERL_RAND_SEED>
+
+When set to an integer value this value will be used to seed the perl
+internal random number generator used for C<rand()> when it is used
+without an explicit C<srand()> call or for when an explicit no-argument
+C<srand()> call is made.
+
+Normally calling C<rand()> prior to calling C<srand()> or calling
+C<srand()> explicitly with no arguments should result in the random
+number generator using "best efforts" to seed the generator state with a
+relatively high quality random seed. When this environment variable is
+set then the seeds used will be deterministically computed from the
+value provided in the env var in such a way that the application process
+and any forks or threads should continue to have their own unique seed but
+that the program may be run twice with identical results as far as
+C<rand()> goes (assuming all else is equal).
+
+PERL_RAND_SEED is intended for performance measurements and debugging
+and is explicitly NOT intended for stable testing. The only guarantee is
+that a specific perl executable will produce the same results twice in a
+row, there is no guarantee that the results will be the same between
+perl releases or on different architectures.
+
+Ignored if perl is run setuid or setgid.
+
 =back
 
 Perl also has environment variables that control how Perl handles data

diff --git a/pp.c b/pp.c
@@ -2920,7 +2920,17 @@ PP(pp_sin)
 PP(pp_rand)
 {
     if (!PL_srand_called) {
-        (void)seedDrand01((Rand_seed_t)seed());
+        Rand_seed_t s;
+        if (PL_srand_override) {
+            /* env var PERL_RAND_SEED has been set so the user wants
+             * consistent srand() initialization. */
+            PERL_SRAND_OVERRIDE_GET(s);
+        } else {
+            /* Pseudo random initialization from context state and possible
+             * random devices */
+            s= (Rand_seed_t)seed();
+        }
+        (void)seedDrand01(s);
         PL_srand_called = TRUE;
     }
     {
@@ -2979,7 +2989,13 @@ PP(pp_srand)
         }
     }
     else {
-        anum = seed();
+        if (PL_srand_override) {
+            /* env var PERL_RAND_SEED has been set so the user wants
+             * consistent srand() initialization. */
+            PERL_SRAND_OVERRIDE_GET(anum);
+        } else {
+            anum = seed();
+        }
     }
 
     (void)seedDrand01((Rand_seed_t)anum);