In [6]:
from scipy.stats import norm
from pyspark.sql import SparkSession
import numpy as np
from random import choice
import string

def prepare_simulation(seed, sample, n_simulations):
    random_state = np.random.RandomState(seed)
    simulation_matrix = random_state.choice(sample, size=n_simulations)
    return simulation_matrix
        

In [7]:

norm_1 = norm(loc=0, scale=1)
sample = np.sort(norm_1.rvs(10000))
    
n_trials = 10
n_simulations = 5
seeds = [int.from_bytes(bytearray(''.join(choice(string.ascii_lowercase) for _ in range(4)), encoding='utf-8'), 'big') for _ in range(n_trials)]
simulation_matrix = np.array([prepare_simulation(seed, sample, n_simulations) for seed in seeds])
print(simulation_matrix)
    

[[-1.12627422  0.96469566 -0.77522994 -0.76223404 -0.9925766 ]
 [-0.24958348 -0.86782805  1.89363683 -1.55530756  0.27694666]
 [ 0.15247171 -1.1773507   0.01342889 -0.96620304 -0.13391546]
 [ 0.48232482 -1.54693747 -0.18135229  1.41051844 -0.70507641]
 [-1.0909062  -0.03505605  0.16251799 -1.80723937  0.66648901]
 [ 1.12878487 -0.58353758  1.44818437  0.51148496  1.23070015]
 [ 1.2077587  -1.38653538  0.6653577   0.91239942  1.06451205]
 [-0.25550831 -0.49556759  2.99207254  1.45429851  0.43599191]
 [ 0.92323727 -0.28250646  0.15811125 -0.24718835  1.0097103 ]
 [ 0.56231774  1.77002369 -1.38774394  1.08059853 -1.62036334]]


In [8]:
spark_session = SparkSession.builder.master('local[*]').getOrCreate()
sc = spark_session.sparkContext

simulation_matrix = sc.parallelize(seeds).flatMap(lambda x: prepare_simulation(x, sample, n_simulations))
combined_simulation = np.array(simulation_matrix.collect()).reshape((n_trials, n_simulations))
    
spark_session.stop()

combined_simulation

array([[-1.12627422,  0.96469566, -0.77522994, -0.76223404, -0.9925766 ],
       [-0.24958348, -0.86782805,  1.89363683, -1.55530756,  0.27694666],
       [ 0.15247171, -1.1773507 ,  0.01342889, -0.96620304, -0.13391546],
       [ 0.48232482, -1.54693747, -0.18135229,  1.41051844, -0.70507641],
       [-1.0909062 , -0.03505605,  0.16251799, -1.80723937,  0.66648901],
       [ 1.12878487, -0.58353758,  1.44818437,  0.51148496,  1.23070015],
       [ 1.2077587 , -1.38653538,  0.6653577 ,  0.91239942,  1.06451205],
       [-0.25550831, -0.49556759,  2.99207254,  1.45429851,  0.43599191],
       [ 0.92323727, -0.28250646,  0.15811125, -0.24718835,  1.0097103 ],
       [ 0.56231774,  1.77002369, -1.38774394,  1.08059853, -1.62036334]])