In [0]:
from pyspark.sql import functions as fn

In [0]:
data = [
  {
    'row': 'A',
    'values' : [1,2,3]
  },
  {
    'row': 'B',
    'values' : [4,5,None]
  },
  {
    'row': 'C',
    'values' : None
  },
]

df = spark.createDataFrame(data)
df.printSchema()

root
 |-- row: string (nullable = true)
 |-- values: array (nullable = true)
 |    |-- element: long (containsNull = true)



# explode

In [0]:
df_explode = df.withColumn(
  'exploded', fn.explode('values')
)

display(df_explode)

row,values,exploded
A,"List(1, 2, 3)",1.0
A,"List(1, 2, 3)",2.0
A,"List(1, 2, 3)",3.0
B,"List(4, 5, null)",4.0
B,"List(4, 5, null)",5.0
B,"List(4, 5, null)",


# explode_outer

In [0]:
df_explode = df.withColumn(
  'exploded', fn.explode_outer('values')
)

display(df_explode)

row,values,exploded
A,"List(1, 2, 3)",1.0
A,"List(1, 2, 3)",2.0
A,"List(1, 2, 3)",3.0
B,"List(4, 5, null)",4.0
B,"List(4, 5, null)",5.0
B,"List(4, 5, null)",
C,,


# posexplode

In [0]:
df_posexplode = df.select(
  'row', 'values', fn.posexplode('values')
)

display(df_posexplode)

row,values,pos,col
A,"List(1, 2, 3)",0,1.0
A,"List(1, 2, 3)",1,2.0
A,"List(1, 2, 3)",2,3.0
B,"List(4, 5, null)",0,4.0
B,"List(4, 5, null)",1,5.0
B,"List(4, 5, null)",2,


In [0]:
df_posexplode = df.select(
  # alias to rename the output columns from poseplode
  'row', 'values', fn.posexplode('values').alias("index", "column")
)

display(df_posexplode)

row,values,index,column
A,"List(1, 2, 3)",0,1.0
A,"List(1, 2, 3)",1,2.0
A,"List(1, 2, 3)",2,3.0
B,"List(4, 5, null)",0,4.0
B,"List(4, 5, null)",1,5.0
B,"List(4, 5, null)",2,


# posexplode_outer

In [0]:
df_posexplode_outer = df.select(
  'row', 'values', fn.posexplode_outer('values')
)

display(df_posexplode_outer)

row,values,pos,col
A,"List(1, 2, 3)",0.0,1.0
A,"List(1, 2, 3)",1.0,2.0
A,"List(1, 2, 3)",2.0,3.0
B,"List(4, 5, null)",0.0,4.0
B,"List(4, 5, null)",1.0,5.0
B,"List(4, 5, null)",2.0,
C,,,


In [0]:
df_posexplode_outer = df.select(
  # alias to rename the output columns from poseplode_outer
  'row', 'values', fn.posexplode_outer('values').alias("index", "column")
)

display(df_posexplode_outer)

row,values,index,column
A,"List(1, 2, 3)",0.0,1.0
A,"List(1, 2, 3)",1.0,2.0
A,"List(1, 2, 3)",2.0,3.0
B,"List(4, 5, null)",0.0,4.0
B,"List(4, 5, null)",1.0,5.0
B,"List(4, 5, null)",2.0,
C,,,
