In [20]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.window import *
from pyspark.sql.functions import *

spark = SparkSession.builder.appName('guard').getOrCreate()

schema = StructType([
    StructField("customer_id", IntegerType(), nullable=False),
    StructField("transaction_date", StringType(), nullable=False),
    StructField("amount", DoubleType(), nullable=False)
])

data = [
    (1, "2023-01-15", 150.75),
    (2, "2023-01-18", 87.40),
    (3, "2023-01-20", 205.50),
    (4, "2023-01-22", 67.25),
    (5, "2023-01-25", 320.99),
    (6, "2023-02-01", 89.10),
    (7, "2023-02-10", 75.20),
    (8, "2023-02-12", 145.50),
    (9, "2023-02-18", 230.00),
    (10, "2023-02-28", 310.75)
]

columns = ["customer_id", "transaction_date", "amount"]
transactions = spark.createDataFrame(data, columns)
transactions = transactions.withColumn("transaction_date", col('transaction_date').cast('date'))

window = Window.partitionBy('customer_id').orderBy('transaction_date').rowsBetween(Window.unboundedPreceding, Window.currentRow)
cum_transaction = transactions.withColumn('cumulative_amount', sum('amount').over(window))

roll_7_avg = Window.partitionBy('customer_id').orderBy('transaction_date').rowsBetween(-6, 0)
trans_avg = cum_transaction.withColumn('rolling_avg_amount', avg('amount').over(roll_7_avg))

trans_avg.show()


+-----------+----------------+------+-----------------+------------------+
|customer_id|transaction_date|amount|cumulative_amount|rolling_avg_amount|
+-----------+----------------+------+-----------------+------------------+
|          1|      2023-01-15|150.75|           150.75|            150.75|
|          2|      2023-01-18|  87.4|             87.4|              87.4|
|          3|      2023-01-20| 205.5|            205.5|             205.5|
|          4|      2023-01-22| 67.25|            67.25|             67.25|
|          5|      2023-01-25|320.99|           320.99|            320.99|
|          6|      2023-02-01|  89.1|             89.1|              89.1|
|          7|      2023-02-10|  75.2|             75.2|              75.2|
|          8|      2023-02-12| 145.5|            145.5|             145.5|
|          9|      2023-02-18| 230.0|            230.0|             230.0|
|         10|      2023-02-28|310.75|           310.75|            310.75|
+-----------+------------

In [None]:
_avg