In [0]:
data = [
    ("CompanyA", "2025-Q1", 100000),
    ("CompanyA", "2025-Q2", 120000),
    ("CompanyB", "2025-Q1", 90000),
    ("CompanyB", "2025-Q2", 95000)
]
columns = ["Company", "Quarter", "Revenue"]

df = spark.createDataFrame(data, columns)
display(df)

Company,Quarter,Revenue
CompanyA,2025-Q1,100000
CompanyA,2025-Q2,120000
CompanyB,2025-Q1,90000
CompanyB,2025-Q2,95000


### Pivot

In [0]:
df_pivot = df.groupBy("Company").pivot("Quarter").sum("Revenue")
display(df_pivot)

Company,2025-Q1,2025-Q2
CompanyA,100000,120000
CompanyB,90000,95000


### UnPivot

In [0]:
df_unpivot = df_pivot.selectExpr(
    "Company",
    "stack(2, '2025-Q1', `2025-Q1`, '2025-Q2', `2025-Q2`) as (Quarter, Revenue)"  # use backticks ` `for revenue 
)
display(df_unpivot)

Company,Quarter,Revenue
CompanyA,2025-Q1,100000
CompanyA,2025-Q2,120000
CompanyB,2025-Q1,90000
CompanyB,2025-Q2,95000


- If you have many columns to unpivot, doing it manually with stack is not practical.
- Instead, you can dynamically generate the stack expression using Python

In [0]:
# Get all columns to unpivot except the id/grouping column
pivot_cols = [col for col in df_pivot.columns if col != "Company"]

# Build the stack expression dynamically
n = len(pivot_cols)
stack_expr = "stack({0}, {1}) as (Quarter, revenue)".format(
    n,
    ", ".join([f"'{c}', `{c}`" for c in pivot_cols])
)

# Use selectExpr with the generated stack expression
df_unpivot = df_pivot.selectExpr("Company", stack_expr)
display(df_unpivot)

Company,Quarter,revenue
CompanyA,2025-Q1,100000
CompanyA,2025-Q2,120000
CompanyB,2025-Q1,90000
CompanyB,2025-Q2,95000
