In [1]:
csv_data = """date,category,amount,user
2025-01-05,Food,250.50,User1
2025-01-12,Transport,80.00,User1
2025-01-20,Shopping,500.00,User1
2025-02-02,Food,300.00,User1
2025-02-15,Entertainment,200.00,User1
2025-02-25,Transport,60.00,User1
2025-03-03,Food,150.00,User1
2025-03-12,Shopping,700.00,User1
2025-03-25,Food,220.00,User1
2025-03-28,Entertainment,400.00,User1
"""

with open("expenses.csv", "w") as file:
    file.write(csv_data)

In [7]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_date, month, sum as _sum, max as _max

# Create Spark Session
spark = SparkSession.builder.appName("ExpenseAnalysis").getOrCreate()

In [3]:
# Load data with PySpark
df = spark.read.csv("expenses.csv", header=True, inferSchema=True)

In [4]:
# Convert date column to proper date type
df = df.withColumn("date", to_date(col("date"), "yyyy-MM-dd"))


In [5]:
# Extract month number
df = df.withColumn("month", month(col("date")))


In [8]:
# Total monthly spend per user
monthly_spend = df.groupBy("user", "month").agg(_sum("amount").alias("total_spent"))
monthly_spend.show()

+-----+-----+-----------+
| user|month|total_spent|
+-----+-----+-----------+
|User1|    2|      560.0|
|User1|    3|     1470.0|
|User1|    1|      830.5|
+-----+-----+-----------+



In [9]:
# Detect unusual spikes (expenses > 500 in one month)
unusual_spending = monthly_spend.filter(col("total_spent") > 500)
print("===== Unusual Spending Detected =====")
unusual_spending.show()

===== Unusual Spending Detected =====
+-----+-----+-----------+
| user|month|total_spent|
+-----+-----+-----------+
|User1|    2|      560.0|
|User1|    3|     1470.0|
|User1|    1|      830.5|
+-----+-----+-----------+



In [10]:
# Detect large one-time transactions (> 400)
large_transactions = df.filter(col("amount") > 400)
print("===== Large One-Time Transactions =====")
large_transactions.show()

===== Large One-Time Transactions =====
+----------+--------+------+-----+-----+
|      date|category|amount| user|month|
+----------+--------+------+-----+-----+
|2025-01-20|Shopping| 500.0|User1|    1|
|2025-03-12|Shopping| 700.0|User1|    3|
+----------+--------+------+-----+-----+

