In [0]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import expr
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

data = [("Banana",1000,"USA"), ("Carrots",1500,"USA"), ("Beans",1600,"USA"), \
      ("Orange",2000,"USA"),("Orange",2000,"USA"),("Banana",400,"China"), \
      ("Carrots",1200,"China"),("Beans",1500,"China"),("Orange",4000,"China"), \
      ("Banana",2000,"Canada"),("Carrots",2000,"Canada"),("Beans",2000,"Mexico")]

columns= ["Product","Amount","Country"]
df = spark.createDataFrame(data = data, schema = columns)
df.printSchema()
df.show(truncate=False)

pivotDF = df.groupBy("Product").pivot("Country").sum("Amount")
pivotDF.printSchema()
pivotDF.show(truncate=False)

pivotDF = df.groupBy("Product","Country") \
      .sum("Amount") \
      .groupBy("Product") \
      .pivot("Country") \
      .sum("sum(Amount)")
pivotDF.printSchema()
pivotDF.show(truncate=False)


""" unpivot """
""" unpivot """
unpivotExpr = "stack(3, 'Canada', Canada, 'China', China, 'Mexico', Mexico) as (Country,Total)"
unPivotDF = pivotDF.select("Product", expr(unpivotExpr)) \
    .where("Total is not null")
unPivotDF.show(truncate=False)

root
 |-- Product: string (nullable = true)
 |-- Amount: long (nullable = true)
 |-- Country: string (nullable = true)

+-------+------+-------+
|Product|Amount|Country|
+-------+------+-------+
|Banana |1000  |USA    |
|Carrots|1500  |USA    |
|Beans  |1600  |USA    |
|Orange |2000  |USA    |
|Orange |2000  |USA    |
|Banana |400   |China  |
|Carrots|1200  |China  |
|Beans  |1500  |China  |
|Orange |4000  |China  |
|Banana |2000  |Canada |
|Carrots|2000  |Canada |
|Beans  |2000  |Mexico |
+-------+------+-------+

root
 |-- Product: string (nullable = true)
 |-- Canada: long (nullable = true)
 |-- China: long (nullable = true)
 |-- Mexico: long (nullable = true)
 |-- USA: long (nullable = true)

+-------+------+-----+------+----+
|Product|Canada|China|Mexico|USA |
+-------+------+-----+------+----+
|Orange |null  |4000 |null  |4000|
|Beans  |null  |1500 |2000  |1600|
|Banana |2000  |400  |null  |1000|
|Carrots|2000  |1200 |null  |1500|
+-------+------+-----+------+----+

root
 |-- Pro

In [0]:
#Importing Libraries: The code imports the necessary libraries: pyspark, SparkSession, and expr from pyspark.sql.functions.

#Creating SparkSession: A Spark session is created using SparkSession.builder.appName('SparkByExamples.com').getOrCreate().

#Creating DataFrame: A DataFrame named df is created from a list of tuples data and column names columns using spark.createDataFrame(data=data, schema=columns).

#Displaying DataFrame Schema and Content: The schema and content of the DataFrame df are displayed using df.printSchema() and df.show(truncate=False).

#Pivot Operation: The DataFrame df is pivoted using the groupBy("Product").pivot("Country").sum("Amount") operation. This groups the data by "Product", creates columns based on distinct "Country" values, and calculates the sum of "Amount" for each combination.

#Displaying Pivot DataFrame Schema and Content: The schema and content of the pivot DataFrame pivotDF are displayed using pivotDF.printSchema() and pivotDF.show(truncate=False).

#Chained Pivot Operation: The DataFrame df is further transformed using chained operations. It performs grouping, summing, grouping again, and pivoting to calculate the sum of "Amount" for each "Product" and "Country" combination.

#Displaying Chained Pivot DataFrame Schema and Content: The schema and content of the chained pivot DataFrame pivotDF are displayed using pivotDF.printSchema() and pivotDF.show(truncate=False).

#Unpivot Operation: The pivot DataFrame pivotDF is unpivoted using stack() and expr() functions. The stack() function is used to unpivot the data by specifying the number of columns and their corresponding aliases. The resulting DataFrame is named unPivotDF.

#Displaying Unpivot DataFrame: The unpivot DataFrame unPivotDF is displayed using unPivotDF.show(truncate=False).

#The code demonstrates how to perform pivot and unpivot operations in PySpark to reshape and transform data between wide and long formats.
