In [50]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from datetime import date


In [67]:
spark = SparkSession.builder.appName("Customer Transactions Analysis").getOrCreate()


In [68]:
data = [
    (1,date(2024,2,1),1250.0),
    (1,date(2024,2,3),2540.0),
    (1,date(2024,2,6),830.0),
    (1,date(2024,2,12),1280.0),
    (2,date(2024,2,2),4200.0),
    (2,date(2024,2,5),1250.0),
    (2,date(2024,2,7),2206.0),
    (2,date(2024,2,9),1350.0),
    (3,date(2024,2,1),3200.0),
    (3,date(2024,2,4),1800.0),
    (3,date(2024,2,6),540.0),
    (3,date(2024,2,10),1500.0),
    (5,date(2024,2,12),1150.0),
    (6,date(2024,2,1),1000.0),
    (6,date(2024,2,3),500.0),
    (6,date(2024,2,6),2200.0),
    (6,date(2024,2,8),1800.0),
    (7,date(2024,2,1),3800.0),
    (7,date(2024,2,4),2750.0),
    (7,date(2024,2,7),1850.0),
    (9,date(2024,2,7),3000.0),
    (9,date(2024,2,9),2200.0),
    (10,date(2024,2,2),3300.0),
    (10,date(2024,2,5),800.0),
    (10,date(2024,2,8),1850.0),
    (10,date(2024,2,11),2100.0)
]


In [71]:
schema = ["customer_id", "transaction_dates", "amount_spent"]
transactions = spark.createDataFrame(data, schema=schema)



In [72]:
cumulative_window = Window.partitionBy("customer_id").orderBy("transaction_dates").rowsBetween(Window.unboundedPreceding, Window.currentRow)

In [73]:
transactions = transactions.withColumn("cumulative_amount",F.sum("amount_spent").over(cumulative_window))

In [74]:

transactions = transactions.withColumn("transaction_date_days",F.unix_timestamp(F.col("transaction_dates"), "yyyy-MM-dd") / (24 * 60 * 60))


In [75]:
rolling_window = Window.partitionBy("customer_id").orderBy("transaction_date_days").rangeBetween(-6, 0)


In [76]:
transactions = transactions.withColumn("rolling_avg_amount",F.avg("amount_spent").over(rolling_window))

In [77]:
transactions.show()
spark.stop()

+-----------+-----------------+------------+-----------------+---------------------+------------------+
|customer_id|transaction_dates|amount_spent|cumulative_amount|transaction_date_days|rolling_avg_amount|
+-----------+-----------------+------------+-----------------+---------------------+------------------+
|          1|       2024-02-01|      1250.0|           1250.0|              19754.0|            1250.0|
|          1|       2024-02-03|      2540.0|           3790.0|              19756.0|            1895.0|
|          1|       2024-02-06|       830.0|           4620.0|              19759.0|            1540.0|
|          1|       2024-02-12|      1280.0|           5900.0|              19765.0|            1055.0|
|          2|       2024-02-02|      4200.0|           4200.0|              19755.0|            4200.0|
|          2|       2024-02-05|      1250.0|           5450.0|              19758.0|            2725.0|
|          2|       2024-02-07|      2206.0|           7656.0|  