In [1]:
from pyspark.sql import SparkSession, functions as F, Window as W
from pyspark.sql.types import StructType, StructField, StringType, DateType, IntegerType

In [2]:
spark = SparkSession.builder.appName("DailyCodingProblem-23-08-2025").getOrCreate()

# 📝 Problem 1: PySpark – Calculate Percentage Contribution of Each Product

### **Problem Statement**

You have a PySpark DataFrame with total sales for different products. Write a PySpark program to **calculate the percentage contribution of each product** towards the total sales across all products.

### **Sample Input** (`product_sales`)

| product | sales |
| ------- | ----- |
| Laptop  | 1200  |
| Phone   | 800   |
| Tablet  | 500   |
| Desktop | 500   |

### **Expected Output**

| product | sales | percentage\_contribution |
| ------- | ----- | ------------------------ |
| Laptop  | 1200  | 40.0                     |
| Phone   | 800   | 26.7                     |
| Tablet  | 500   | 16.7                     |
| Desktop | 500   | 16.7                     |

*(Percentages rounded to 1 decimal place)*

---

In [7]:
schema = StructType([
    StructField("product", StringType(), True),
    StructField("sales", IntegerType(), True)
])

data = [
    ("Laptop", 1200),
    ("Phone", 800),
    ("Tablet", 500),
    ("Desktop", 500),
]

df = spark.createDataFrame(data, schema)

w = W.orderBy(df.sales).rowsBetween(W.unboundedPreceding, W.unboundedFollowing)

df = df.withColumn(
    'percentage_contribution',
    F.round((F.col('sales') / F.sum(F.col('sales')).over(w)) * 100, 1)
).orderBy(df.sales.desc())

df.show()


+-------+-----+-----------------------+
|product|sales|percentage_contribution|
+-------+-----+-----------------------+
| Laptop| 1200|                   40.0|
|  Phone|  800|                   26.7|
| Tablet|  500|                   16.7|
|Desktop|  500|                   16.7|
+-------+-----+-----------------------+

