In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

In [None]:
spark = SparkSession.builder.appName('pyspark-by-examples').getOrCreate()

arrayData = [
    ('James', ['Java', 'Scala'], {'hair': 'black', 'eye': 'brown'}),
    ('Michael', ['Spark', 'Java', None], {'hair': 'brown', 'eye': None}),
    ('Robert', ['CSharp', ''], {'hair': 'red', 'eye': ''}),
    ('Washington', None, None),
    ('Jefferson', ['1', '2'], {})]

df = spark.createDataFrame(data=arrayData, schema=['name', 'knownLanguages', 'properties'])
df.printSchema()
df.show()

# Explode
Used to explode or create array or map columns to rows
When an array is passed to this function, it creates a new default column `col1` and it contains 
all array elements. 
When a map is passed, it creates two new columns one for key and one for value and each 
element in map split into the rows.
Will ignore entries that have NULL values

### Explode Array

In [None]:
print('Explode Array')
df2 = df.select(df.name, explode(df.knownLanguages))
df2.printSchema()
df2.show()

### Explode Map (Dictionary)

In [None]:
print('Explode Map (Dictionary)')
df3 = df.select(df.name, explode(df.properties))
df3.printSchema()
df3.show()

## Explode Outer
Used to create a row for each element in the 
array or map column. Unlike explode, if the array or map is null or
empty, `explode_outer` returns null.

### Explode Outer Array

In [None]:
print('Explode Outer Array')
df.select(df.name, explode_outer(df.knownLanguages)).show()

### Explode Outer Map (Dictionary)

In [None]:
print('Explode Outer Map')
df.select(df.name, explode_outer(df.properties)).show()

# Posexplode
Creates a row for each element in the array and creates two columns 
`pos` to hold the position of the array element and the `col` to hold the actual 
array value. And when the input column is a map, `posexplode` function creates 3 columns
 `pos` to hold the position of the map element, `key` and `value` columns.
This will ignore elements that have null or empty.

### Posexplode Array

In [None]:
print('Posexplode Array')
df.select(df.name, posexplode(df.knownLanguages)).show()

### Posexplode Map (Dictionary)

In [None]:
print('Posexplode Map')
df.select(df.name, posexplode(df.properties)).show()

# Posexplode Outer
Creates a row for each element in the array and
creates two columns `pos` to hold the position of the 
array element and the `col` to hold the actual array value. 
Unlike posexplode, if the array or map is null or empty, posexplode_outer 
function returns null, null for pos and col columns. Similarly for 
the map, it returns rows with nulls.

### Posexplode Outer Array

In [None]:
print('Posexplode Outer Array')
df.select(df.name, posexplode_outer(df.knownLanguages)).show()

### Posexplode Outer Map (Dictionary)

In [None]:
print('Posexplode Outer Map')
df.select(df.name, posexplode_outer(df.properties)).show()

In [2]:
spark = SparkSession.builder.appName('pyspark-by-examples').getOrCreate()

arrayData = [
    ('James', ['Java', 'Scala'], {'hair': 'black', 'eye': 'brown'}),
    ('Michael', ['Spark', 'Java', None], {'hair': 'brown', 'eye': None}),
    ('Robert', ['CSharp', ''], {'hair': 'red', 'eye': ''}),
    ('Washington', None, None),
    ('Jefferson', ['1', '2'], {})]

df = spark.createDataFrame(data=arrayData, schema=['name', 'knownLanguages', 'properties'])
df.printSchema()
df.show()

root
 |-- name: string (nullable = true)
 |-- knownLanguages: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+----------+-------------------+--------------------+
|      name|     knownLanguages|          properties|
+----------+-------------------+--------------------+
|     James|      [Java, Scala]|{eye -> brown, ha...|
|   Michael|[Spark, Java, null]|{eye -> null, hai...|
|    Robert|         [CSharp, ]|{eye -> , hair ->...|
|Washington|               null|                null|
| Jefferson|             [1, 2]|                  {}|
+----------+-------------------+--------------------+



# Explode
Used to explode or create array or map columns to rows
When an array is passed to this function, it creates a new default column `col1` and it contains 
all array elements. 
When a map is passed, it creates two new columns one for key and one for value and each 
element in map split into the rows.
Will ignore entries that have NULL values

### Explode Array

In [3]:
print('Explode Array')
df2 = df.select(df.name, explode(df.knownLanguages))
df2.printSchema()
df2.show()

Explode Array
root
 |-- name: string (nullable = true)
 |-- col: string (nullable = true)

+---------+------+
|     name|   col|
+---------+------+
|    James|  Java|
|    James| Scala|
|  Michael| Spark|
|  Michael|  Java|
|  Michael|  null|
|   Robert|CSharp|
|   Robert|      |
|Jefferson|     1|
|Jefferson|     2|
+---------+------+



### Explode Map (Dictionary)

In [4]:
print('Explode Map (Dictionary)')
df3 = df.select(df.name, explode(df.properties))
df3.printSchema()
df3.show()

Explode Map (Dictionary)
root
 |-- name: string (nullable = true)
 |-- key: string (nullable = false)
 |-- value: string (nullable = true)

+-------+----+-----+
|   name| key|value|
+-------+----+-----+
|  James| eye|brown|
|  James|hair|black|
|Michael| eye| null|
|Michael|hair|brown|
| Robert| eye|     |
| Robert|hair|  red|
+-------+----+-----+



## Explode Outer
Used to create a row for each element in the 
array or map column. Unlike explode, if the array or map is null or
empty, `explode_outer` returns null.

### Explode Outer Array

In [5]:
print('Explode Outer Array')
df.select(df.name, explode_outer(df.knownLanguages)).show()

Explode Outer Array
+----------+------+
|      name|   col|
+----------+------+
|     James|  Java|
|     James| Scala|
|   Michael| Spark|
|   Michael|  Java|
|   Michael|  null|
|    Robert|CSharp|
|    Robert|      |
|Washington|  null|
| Jefferson|     1|
| Jefferson|     2|
+----------+------+



### Explode Outer Map (Dictionary)

In [6]:
print('Explode Outer Map')
df.select(df.name, explode_outer(df.properties)).show()

Explode Outer Map
+----------+----+-----+
|      name| key|value|
+----------+----+-----+
|     James| eye|brown|
|     James|hair|black|
|   Michael| eye| null|
|   Michael|hair|brown|
|    Robert| eye|     |
|    Robert|hair|  red|
|Washington|null| null|
| Jefferson|null| null|
+----------+----+-----+



# Posexplode
Creates a row for each element in the array and creates two columns 
`pos` to hold the position of the array element and the `col` to hold the actual 
array value. And when the input column is a map, `posexplode` function creates 3 columns
 `pos` to hold the position of the map element, `key` and `value` columns.
This will ignore elements that have null or empty.

### Posexplode Array

In [7]:
print('Posexplode Array')
df.select(df.name, posexplode(df.knownLanguages)).show()

Posexplode Array
+---------+---+------+
|     name|pos|   col|
+---------+---+------+
|    James|  0|  Java|
|    James|  1| Scala|
|  Michael|  0| Spark|
|  Michael|  1|  Java|
|  Michael|  2|  null|
|   Robert|  0|CSharp|
|   Robert|  1|      |
|Jefferson|  0|     1|
|Jefferson|  1|     2|
+---------+---+------+



### Posexplode Map (Dictionary)

In [8]:
print('Posexplode Map')
df.select(df.name, posexplode(df.properties)).show()

Posexplode Map
+-------+---+----+-----+
|   name|pos| key|value|
+-------+---+----+-----+
|  James|  0| eye|brown|
|  James|  1|hair|black|
|Michael|  0| eye| null|
|Michael|  1|hair|brown|
| Robert|  0| eye|     |
| Robert|  1|hair|  red|
+-------+---+----+-----+



# Posexplode Outer
Creates a row for each element in the array and
creates two columns `pos` to hold the position of the 
array element and the `col` to hold the actual array value. 
Unlike posexplode, if the array or map is null or empty, posexplode_outer 
function returns null, null for pos and col columns. Similarly for 
the map, it returns rows with nulls.

### Posexplode Outer Array

In [9]:
print('Posexplode Outer Array')
df.select(df.name, posexplode_outer(df.knownLanguages)).show()

Posexplode Outer Array
+----------+----+------+
|      name| pos|   col|
+----------+----+------+
|     James|   0|  Java|
|     James|   1| Scala|
|   Michael|   0| Spark|
|   Michael|   1|  Java|
|   Michael|   2|  null|
|    Robert|   0|CSharp|
|    Robert|   1|      |
|Washington|null|  null|
| Jefferson|   0|     1|
| Jefferson|   1|     2|
+----------+----+------+



### Posexplode Outer Map (Dictionary)

In [10]:
print('Posexplode Outer Map')
df.select(df.name, posexplode_outer(df.properties)).show()

Posexplode Outer Map
+----------+----+----+-----+
|      name| pos| key|value|
+----------+----+----+-----+
|     James|   0| eye|brown|
|     James|   1|hair|black|
|   Michael|   0| eye| null|
|   Michael|   1|hair|brown|
|    Robert|   0| eye|     |
|    Robert|   1|hair|  red|
|Washington|null|null| null|
| Jefferson|null|null| null|
+----------+----+----+-----+

