# Computing an Integral by the Trapezoid method using SparkSQL 

*First import libraries*

In [149]:
from pyspark.sql import SQLContext, SparkSession
from pyspark.sql.types import FloatType, DoubleType
import math

*Initialize SparkSession and SQLContext*

In [150]:
spark = SparkSession.builder.appName("TrapezoidMethod").getOrCreate()      
sqlContext = SQLContext(spark.sparkContext)

*Define integration Limits, Number of Partitions and UDFs*

In [151]:
a = 0.01      #Inferior Limit
b = 0.1       #Superior Limit
k = 100000     #Number of partitions
h = (b-a)/k

def interval(n) :
    return a + n*h

def f(x) :
    return math.cos(1/x)

def A(x0,x1) :
    return (h/2)*(f(x0) + f(x1))

spark.udf.register("interval",interval,DoubleType())
spark.udf.register("f",f,DoubleType())
spark.udf.register("A",A,DoubleType())

<function __main__.A(x0, x1)>

*In the next code block we Calculate a DataFrame with the values for every point and the Area of every trapezoid*

In [152]:
df = spark.range(start = 1, end = k+1).createOrReplaceTempView("TRAPEZOID") ## Define the Id column
## Use the 'Inteval' UDF to crate two columns containing the points that will be used in the next operation
spark.sql("SELECT id, interval(id-1) as x0, interval(id) as x1 FROM TRAPEZOID").createOrReplaceTempView("TRAPEZOID") 
## Here we add the f, and Area columns to the TempView
spark.sql("SELECT *, f(x0), f(x1), A(x0,x1) as area FROM TRAPEZOID").createOrReplaceTempView("TRAPEZOID")
## Show the top 10 rows from the Temp View
spark.sql("SELECT * FROM TRAPEZOID").show(10)
## In this line we calculate the sum of every trapezoid area, thus resulting in the defined integral value
spark.sql("SELECT sum(area) FROM TRAPEZOID").show()


+---+--------------------+--------------------+------------------+------------------+--------------------+
| id|                  x0|                  x1|             f(x0)|             f(x1)|                area|
+---+--------------------+--------------------+------------------+------------------+--------------------+
|  1|                0.01|           0.0100009|0.8623188722876839|0.8577271357502991|7.740207036170925E-7|
|  2|           0.0100009|           0.0100018|0.8577271357502991|0.8530667813794596|7.698572627083914E-7|
|  3|           0.0100018|           0.0100027|0.8530667813794596|0.8483382236202023|7.656322522498478E-7|
|  4|           0.0100027|           0.0100036|0.8483382236202023|0.8435418820760986|7.613460475633354E-7|
|  5|           0.0100036|           0.0100045|0.8435418820760986| 0.838678181468593|7.569990285951113E-7|
|  6|           0.0100045|           0.0100054| 0.838678181468593| 0.833747551596085|7.525915798791051E-7|
|  7|           0.0100054|0.010006300

*If you are not interested in the calculation tables, you can use just the block bellow which prints only the integral value*

In [153]:
df = spark.range(start = 1, end = k+1).createOrReplaceTempView("TRAPEZOID")
spark.sql("SELECT id, interval(id-1) as x0, interval(id) as x1 FROM TRAPEZOID").createOrReplaceTempView("TRAPEZOID")
spark.sql("SELECT sum(A(x0,x1)) as area FROM TRAPEZOID").collect()[0][0]


0.0035917860374208837