# Exercise 1

- Run the same calculate_pi function for array sizes 1GB-5GB.

- Time the numpy only version against when using dask.

- Notice anything?

- Modify the codes below and submit to the queue.

In [None]:
%%writefile calculate_pi.py

import dask
from dask.distributed import Client
import numpy as np

import time

def calculate_pi(size_in_bytes):
    
    """Calculate pi using a Monte Carlo method."""
    
    rand_array_shape = (int(size_in_bytes / 8 / 2), 2)
    
    # 2D random array with positions (x, y)
    xy = np.random.uniform(low=0.0, high=1.0, size=rand_array_shape)
    
    # check if position (x, y) is in unit circle
    xy_inside_circle = (xy ** 2).sum(axis=1) < 1

    # pi is the fraction of points in circle x 4
    pi = 4 * xy_inside_circle.sum() / xy_inside_circle.size

    print(f"\nfrom {xy.nbytes / 1e9} GB randomly chosen positions")
    print(f"   pi estimate: {pi}")
    print(f"   pi error: {abs(pi - np.pi)}\n")
    
    return pi

# In a script need to have main segment in order for dask to work correctly.
if __name__ == '__main__':

    t0 = time.time()
# Perform numpy version on 1GB, 2GB, 3GB, 4GB and 5GB
    t1 = time.time()
    print(f"time taken for numpy is {t1-t0}\n\n")


    print(f"\n\n\\n")

    client = Client(??)


    t0 = time.time()
# Perform dask version in parallel for 1GB, 2GB, 3GB, 4GB and 5GB
    t1 = time.time()
    print(f"time taken for dask is {t1-t0}\n\n")

    client.close()


In [None]:
%%writefile calculate_pi.slurm
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --time=00:10:00
#SBATCH -A course
#SBATCH --job-name=calpi
#SBATCH -p CourseDevQ
#SBATCH --reservation=CourseMay


module purge
module load conda
module list

source activate /ichec/home/users/course00/conda_HPC

cd $SLURM_SUBMIT_DIR


python -u calculate_pi.py

exit 0