In [1]:
from pyspark.sql import functions as F
from pyspark.sql import Window

In [2]:
fm = get_foundry_manager()
sc = fm._sql_context



In [3]:
## Sample DataFrame

In [4]:
a = sc.createDataFrame([(1, 1, 16), 
                        (2, 1, 100),
                        (3, 2, 150),
                        (4, 2, 150),
                        (5, 2, 6),
                        (6, 3, 99),
                        (7, 3, 101),
                        (8, 4, 103),
                        (9, 4, 12),
                        (10, 4, 18),
                        (11, 4, 47),
                        (12, 4, 44)]
                       , ['ID', 'Phase', 'FF_1'])

There are two ways of using windows : 

* Define the window and the range at the same time, or
* Define the window and use pyspark.sql functions 

In [30]:
window_w_range = Window.partitionBy('Phase')\
                       .orderBy('ID')\
                       .rowsBetween(0, 1)
window_wo_range = Window.orderBy('ID')

In [31]:
a.withColumn("Avg", F.mean(a['FF_1']).over(window_w_range)).show()

+---+-----+----+-----+
| ID|Phase|FF_1|  Avg|
+---+-----+----+-----+
|  1|    1|  16| 58.0|
|  2|    1| 100|100.0|
|  6|    3|  99|100.0|
|  7|    3| 101|101.0|
|  3|    2| 150|150.0|
|  4|    2| 150| 78.0|
|  5|    2|   6|  6.0|
|  8|    4| 103| 57.5|
|  9|    4|  12| 15.0|
| 10|    4|  18| 32.5|
| 11|    4|  47| 45.5|
| 12|    4|  44| 44.0|
+---+-----+----+-----+



In [32]:
def func():
    result = F.col('FF_1') + F.lead(F.col('FF_1'), 1).over(window_wo_range)
    return result
a.withColumn("sum", func()).show()

+---+-----+----+----+
| ID|Phase|FF_1| sum|
+---+-----+----+----+
|  1|    1|  16| 116|
|  2|    1| 100| 250|
|  3|    2| 150| 300|
|  4|    2| 150| 156|
|  5|    2|   6| 105|
|  6|    3|  99| 200|
|  7|    3| 101| 204|
|  8|    4| 103| 115|
|  9|    4|  12|  30|
| 10|    4|  18|  65|
| 11|    4|  47|  91|
| 12|    4|  44|null|
+---+-----+----+----+

