# PIVOT() + ROLLUP()

In [4]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

spark = (
    SparkSession.builder
    .appName("example-pivot-rollup")
    .getOrCreate()
)

In [5]:
data = [('X1','2021','Scala',270),
        ('Y5','2021','Scala',230),
        ('N3','2020','Scala',150),
        ('C5','2020','Scala',100),
        ('D7','2020','Python',300),
        ('D3','2021','Python',400),
        ('H2','2021','Python',500)]

columns = ["BatchID", "Year", "CourseName", "Students"]

df = spark.createDataFrame(data=data, schema = columns)

In [3]:
df.groupBy("Year").pivot("CourseName").agg(expr("sum(Students)")).show()

+----+------+-----+
|Year|Python|Scala|
+----+------+-----+
|2020|   300|  250|
|2021|   900|  500|
+----+------+-----+



In [8]:
df.rollup("Year", "CourseName") \
    .agg(expr("sum(Students)") \
    .alias("TotalStudents")) \
    .orderBy("Year", "CourseName") \
    .show()

+----+----------+-------------+
|Year|CourseName|TotalStudents|
+----+----------+-------------+
|null|      null|         1950|
|2020|      null|          550|
|2020|    Python|          300|
|2020|     Scala|          250|
|2021|      null|         1400|
|2021|    Python|          900|
|2021|     Scala|          500|
+----+----------+-------------+

