In [1]:
from pyspark import SparkContext
import time
sc.stop()


In [2]:
def initialize_spark_context(workers):
    """
    Initializes and returns a Spark context with a number of local workers.
    """
    return SparkContext(master=f"local[{workers}]")

In [3]:
def calculate_integral(w, n, a=1, b=20):
    """
    Calculates the integral estimation of the function f(x) = (10*x^2 - 2) using a Spark RDD.
    
    Args:
        a (int): The lower limit of the range.
        b (int): The upper limit of the range.
        n (int): The number of intervals.

    Returns:
        float: The estimated value of the integral.
    """
    sc = initialize_spark_context(w) #Initializes and returns a Spark context with w number of local workers
    time.sleep(1) # Letting the spark context initilizing process time so it wont influance the time mesurment
    start_time = time.time() # Starting the calculation time mesurment
    k_list = list(range(1, n)) # Creating a list of k's from 1 to n-1 for the sigma calculation
    k_rdd = sc.parallelize(k_list) # creating a rdd from the k list and parallelizing (spliting) it between the workers
    mapped_rdd = k_rdd.map(lambda k: 10*(a+k/n*(b-a))**2-2) # creating a mapped rdd to calculate the value in the sigma for each k value 
    integral_sum = mapped_rdd.reduce(lambda x, y: x + y) # Summing the mapped rdd to get the sigma value for all k
    end_time = time.time() # ending the calculation time mesurment
    runtime = end_time - start_time # calculating the time mesurment
    sc.stop()  # Important to stop the Spark context to free resources
    return (b-a)/n*((10*a**2-2+10*b**2-2)/2+integral_sum), runtime # Calculated and returens the estimated value of the integral and the rumtime

In [4]:
def main(w): # Main function to calculate and print the integral and runtime with a specified number of workers.
    for n in (100, 1000, 10000, 1000000): # Running the calculation and prints on all these number of subdivisions
        integral, runtime = calculate_integral(w, n, a=1, b=20) # Calling the function for the calculation
        print(f'Calculated integral value with {w} workers and {n} intervals: {integral}')
        print(f'The error is: {26625.3333-integral}')
        print(f'Runtime of the script is {runtime:.2f} seconds')
        

In [7]:
main(w=2)

                                                                                

Calculated integral value with 2 workers and 100 intervals: 26626.4765
The error is: -1.1432000000022526
Runtime of the script is 0.88 seconds


                                                                                

Calculated integral value with 2 workers and 1000 intervals: 26625.344764999994
The error is: -0.011464999995951075
Runtime of the script is 0.92 seconds


                                                                                

Calculated integral value with 2 workers and 10000 intervals: 26625.33344765002
The error is: -0.00014765002197236754
Runtime of the script is 0.94 seconds


24/12/13 16:13:24 WARN TaskSetManager: Stage 0 contains a task of very large size (2332 KiB). The maximum recommended task size is 1000 KiB.
                                                                                

Calculated integral value with 2 workers and 1000000 intervals: 26625.333333344046
The error is: -3.334404755150899e-05
Runtime of the script is 1.57 seconds


In [8]:
main(w=4)

                                                                                

Calculated integral value with 4 workers and 100 intervals: 26626.476499999997
The error is: -1.1431999999986147
Runtime of the script is 0.92 seconds


                                                                                

Calculated integral value with 4 workers and 1000 intervals: 26625.344764999994
The error is: -0.011464999995951075
Runtime of the script is 0.99 seconds


                                                                                

Calculated integral value with 4 workers and 10000 intervals: 26625.33344765001
The error is: -0.00014765001105843112
Runtime of the script is 0.91 seconds


24/12/13 16:13:40 WARN TaskSetManager: Stage 0 contains a task of very large size (1105 KiB). The maximum recommended task size is 1000 KiB.
                                                                                

Calculated integral value with 4 workers and 1000000 intervals: 26625.333333344683
The error is: -3.3344684197800234e-05
Runtime of the script is 1.33 seconds
