In [1]:
import findspark
findspark.init()

In [None]:
from pyspark.sql import SparkSession
spark = (SparkSession
         .builder
         .appName('Higher Order Functions')
         .master('local[3]')
         .getOrCreate()
        )
spark

In [4]:
temperature_list = [
    [[35,36,32,30,40,42,38]],
    [[31,32,34,55,56]]
]
schema = 'celsius array<int>'
df = spark.createDataFrame(temperature_list, schema)

In [5]:
df.show(truncate=False)

                                                                                

+----------------------------+
|celsius                     |
+----------------------------+
|[35, 36, 32, 30, 40, 42, 38]|
|[31, 32, 34, 55, 56]        |
+----------------------------+



#### Convert each value from Celsius to Farenheit
###### F = (C * 9) / 5 + 32

In [6]:
df.createOrReplaceTempView('t_c')

In [10]:
spark.sql('select * from t_c').show(truncate=False)

+----------------------------+
|celsius                     |
+----------------------------+
|[35, 36, 32, 30, 40, 42, 38]|
|[31, 32, 34, 55, 56]        |
+----------------------------+



In [11]:
spark.sql(
    """
        select celsius, transform(celsius, c -> (c*9)/5 + 32) as farenheit
        from t_c
    """
).show(truncate=False)

+----------------------------+---------------------------------------------+
|celsius                     |farenheit                                    |
+----------------------------+---------------------------------------------+
|[35, 36, 32, 30, 40, 42, 38]|[95.0, 96.8, 89.6, 86.0, 104.0, 107.6, 100.4]|
|[31, 32, 34, 55, 56]        |[87.8, 89.6, 93.2, 131.0, 132.8]             |
+----------------------------+---------------------------------------------+



#### Filer temperatures > 38 celsius

In [13]:
spark.sql(
    """
        select celsius, filter(celsius, c -> c > 38) as high from t_c
    """
).show(truncate=False)

+----------------------------+--------+
|celsius                     |high    |
+----------------------------+--------+
|[35, 36, 32, 30, 40, 42, 38]|[40, 42]|
|[31, 32, 34, 55, 56]        |[55, 56]|
+----------------------------+--------+



#### Check if any of the records has 38 celsius in the list

In [15]:
spark.sql(
    """
        select celsius, exists(celsius, c -> c = 38) as exists from t_c
    """
).show(truncate=False)

+----------------------------+------+
|celsius                     |exists|
+----------------------------+------+
|[35, 36, 32, 30, 40, 42, 38]|true  |
|[31, 32, 34, 55, 56]        |false |
+----------------------------+------+



#### Calculate the average temperature and convert to Farenheit

In [18]:
spark.sql(
    """
        select celsius,
        reduce(
            celsius,
            0,
            (c,acc) -> c + acc,
            acc -> (acc / size(celsius)) * 9 / 5 + 32
        ) as avg_farenheit
        from t_c
    """
).show(truncate=False)

+----------------------------+------------------+
|celsius                     |avg_farenheit     |
+----------------------------+------------------+
|[35, 36, 32, 30, 40, 42, 38]|97.05714285714286 |
|[31, 32, 34, 55, 56]        |106.88000000000001|
+----------------------------+------------------+

