In [1]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F, Window as W
from pyspark.sql.types import StructType, StructField, StringType, DateType, IntegerType
from datetime import datetime

In [3]:
spark = SparkSession.builder.appName("DailyCodingProblem-day-12").getOrCreate()

# 📝 Problem 1: PySpark – Identify Peak Sales Day per Product

### **Problem Statement**

You have a PySpark DataFrame containing daily sales data for multiple products. Write a PySpark program to **find the date with the highest sales for each product**.

### **Sample Input** (`product_sales`)

| product | sale\_date | sales |
| ------- | ---------- | ----- |
| Laptop  | 2025-01-01 | 100   |
| Laptop  | 2025-01-02 | 250   |
| Laptop  | 2025-01-03 | 200   |
| Phone   | 2025-01-01 | 300   |
| Phone   | 2025-01-02 | 150   |
| Phone   | 2025-01-03 | 400   |

### **Expected Output**

| product | peak\_sale\_date | peak\_sales |
| ------- | ---------------- | ----------- |
| Laptop  | 2025-01-02       | 250         |
| Phone   | 2025-01-03       | 400         |

---

In [8]:
data = [
    ("Laptop", "2025-01-01", 100),
    ("Laptop", "2025-01-02", 250),
    ("Laptop", "2025-01-03", 200),
    ("Phone",  "2025-01-01", 300),
    ("Phone",  "2025-01-02", 150),
    ("Phone",  "2025-01-03", 400),
]

schema = StructType([
    StructField("product", StringType(), True),
    StructField("sale_date", StringType(), True), 
    StructField("sales", IntegerType(), True)
])

df = spark.createDataFrame(data, schema)

In [9]:
df = df.withColumn(
    "sale_date",
    F.to_date("sale_date")
)

In [18]:
w = W.partitionBy("product").orderBy(F.col("sales").desc())

In [23]:
peak_sale_df = df.withColumn(
    "rank",
    F.dense_rank().over(w)
).filter(
    F.col("rank") == 1
).withColumnRenamed(
    "sale_date",
    "peak_sale_date"
).withColumnRenamed(
    "sales",
    "peak_sales"
).drop("rank")

peak_sale_df.show()

+-------+--------------+----------+
|product|peak_sale_date|peak_sales|
+-------+--------------+----------+
| Laptop|    2025-01-02|       250|
|  Phone|    2025-01-03|       400|
+-------+--------------+----------+

