In [1]:
import datetime

In [2]:
val_seed = 5532
num_darts = [10**2,10**3,10**4,10**5,10**6,10**7]
val_max = max(num_darts)

<img src='./MonteCarloApproach.png'>
From UC Berkeley

# Using Numpy

In [3]:
import numpy as np
from operator import add

In [4]:
def get_pi(val_size,val_max,val_seed=val_seed):
    if(val_size <= val_max):
        time_start = datetime.datetime.now()
        np.random.seed(val_seed)
        x = np.random.random_sample(size=val_size)
        y = np.random.random_sample(size=val_size)
        x_2 = [i**2 for i in x]
        y_2 = [j**2 for j in y]
        line_len = list(map(add,x_2,y_2))
        less_than_1 = len([1 for i in line_len if i < 1])
        tmp_pi = 4*(less_than_1/val_size)
        time_end = datetime.datetime.now()
        total_s = (time_end-time_start).total_seconds()
        print('Calc. value pi = '+str(tmp_pi)+'; sample size = '+str(val_size)+' ('+str(100*round(val_size/val_max,5))+'% of total); took '+str(total_s)+' secs.')
    return(tmp_pi)

In [5]:
%%time
pi_from_numpy = [get_pi(val_size=x,val_max=val_max) for x in num_darts]
print(pi_from_numpy)

Calc. value pi = 3.16; sample size = 100 (0.001% of total); took 0.000228 secs.
Calc. value pi = 3.164; sample size = 1000 (0.01% of total); took 0.002421 secs.
Calc. value pi = 3.1436; sample size = 10000 (0.1% of total); took 0.01886 secs.
Calc. value pi = 3.1508; sample size = 100000 (1.0% of total); took 0.105436 secs.
Calc. value pi = 3.14234; sample size = 1000000 (10.0% of total); took 1.327553 secs.
Calc. value pi = 3.14151; sample size = 10000000 (100.0% of total); took 11.458188 secs.
[3.16, 3.164, 3.1436, 3.1508, 3.14234, 3.14151]
CPU times: user 12.5 s, sys: 886 ms, total: 13.4 s
Wall time: 13.6 s


# Using Pandas

In [6]:
import pandas as pd

In [7]:
%%time
np.random.seed(val_seed)
x = np.random.random_sample(size=val_max)
y = np.random.random_sample(size=val_max)
df = pd.DataFrame({'x':x,'y':y})
df['x_2'] = df['x']**2
df['y_2'] = df['y']**2
df['line_len'] = df['x_2']+df['y_2']
df['less_than_1'] = [1 if x < 1 else 0 for x in df['line_len']]

CPU times: user 3.67 s, sys: 679 ms, total: 4.35 s
Wall time: 3.64 s


In [8]:
%%time
pi_from_df = []
for i in num_darts:
    if(i <= val_max):
        time_start = datetime.datetime.now()
        np.random.seed(val_seed)
        df_sample = df['less_than_1'].sample(n=i,replace=False)
        tmp_pi = 4*(sum(df_sample)/i)
        time_end = datetime.datetime.now()
        total_s = (time_end-time_start).total_seconds()
        print('Calc. value pi = '+str(tmp_pi)+'; sample size = '+str(i)+' ('+str(100*round(i/val_max,5))+'% of total); took '+str(total_s)+' secs.')
        pi_from_df.append(tmp_pi)

Calc. value pi = 3.12; sample size = 100 (0.001% of total); took 0.481703 secs.
Calc. value pi = 3.188; sample size = 1000 (0.01% of total); took 0.457466 secs.
Calc. value pi = 3.1536; sample size = 10000 (0.1% of total); took 0.495009 secs.
Calc. value pi = 3.14416; sample size = 100000 (1.0% of total); took 0.499252 secs.
Calc. value pi = 3.14194; sample size = 1000000 (10.0% of total); took 0.54777 secs.
Calc. value pi = 3.14151; sample size = 10000000 (100.0% of total); took 1.564854 secs.
CPU times: user 3.84 s, sys: 145 ms, total: 3.99 s
Wall time: 4.05 s


# Using Dask

In [None]:
from dask.distributed import Client, progress
from dask import dataframe as dd 

In [None]:
client = Client(processes=False, threads_per_worker=4, n_workers=1, memory_limit='2GB')
client

In [None]:
ddf = dd.from_pandas(df, npartitions=4)

In [None]:
type(ddf)

In [None]:
%%time
pi_from_ddf = []
for i in num_darts:
    if(i <= val_max):
        time_start = datetime.datetime.now()
        np.random.seed(val_seed)
        ddf_sample = ddf['less_than_1'].sample(n=i,replace=False)
        tmp_pi = 4*(sum(ddf_sample)/i)
        time_end = datetime.datetime.now()
        total_s = (time_end-time_start).total_seconds()
        print('Calc. value pi = '+str(tmp_pi)+'; sample size = '+str(i)+' ('+str(100*round(i/val_max,5))+'% of total); took '+str(total_s)+' secs.')
        pi_from_ddf.append(tmp_pi)