In [0]:
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('pyspark-by-examples').getOrCreate()

arrayData = [
        ('James',['Java','Scala'],{'hair':'black','eye':'brown'}),
        ('Michael',['Spark','Java',None],{'hair':'brown','eye':None}),
        ('Robert',['CSharp',''],{'hair':'red','eye':''}),
        ('Washington',None,None),
        ('Jefferson',['1','2'],{})
        ]
df = spark.createDataFrame(data=arrayData, schema = ['name','knownLanguages','properties'])
df.printSchema()
df.show()

from pyspark.sql.functions import explode
df2 = df.select(df.name,explode(df.knownLanguages))
df2.printSchema()
df2.show()

from pyspark.sql.functions import explode
df3 = df.select(df.name,explode(df.properties))
df3.printSchema()
df3.show()

from pyspark.sql.functions import explode_outer
""" with array """
df.select(df.name,explode_outer(df.knownLanguages)).show()
""" with map """
df.select(df.name,explode_outer(df.properties)).show()


from pyspark.sql.functions import posexplode
""" with array """
df.select(df.name,posexplode(df.knownLanguages)).show()
""" with map """
df.select(df.name,posexplode(df.properties)).show()

from pyspark.sql.functions import posexplode_outer
""" with array """
df.select(df.name,posexplode_outer(df.knownLanguages)).show()

""" with map """
df.select(df.name,posexplode_outer(df.properties)).show()


"""END"""

root
 |-- name: string (nullable = true)
 |-- knownLanguages: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+----------+-------------------+--------------------+
|      name|     knownLanguages|          properties|
+----------+-------------------+--------------------+
|     James|      [Java, Scala]|{eye -> brown, ha...|
|   Michael|[Spark, Java, null]|{eye -> null, hai...|
|    Robert|         [CSharp, ]|{eye -> , hair ->...|
|Washington|               null|                null|
| Jefferson|             [1, 2]|                  {}|
+----------+-------------------+--------------------+

root
 |-- name: string (nullable = true)
 |-- col: string (nullable = true)

+---------+------+
|     name|   col|
+---------+------+
|    James|  Java|
|    James| Scala|
|  Michael| Spark|
|  Michael|  Java|
|  Michael|  null|
|   Robert|CSharp|
|   Robert|      |

In [0]:
#It imports the necessary modules: pyspark and SparkSession.
#It creates a SparkSession object named spark with the configuration appName('pyspark-by-examples').
#It defines an array arrayData containing tuples with different data types.
#It creates a DataFrame df by calling spark.createDataFrame() and passing the arrayData and the schema as arguments. The schema is defined with three columns: 'name', 'knownLanguages', and 'properties'. The resulting DataFrame is displayed using df.printSchema() and df.show().
#It imports the explode function from pyspark.sql.functions.
#It applies the explode function to the 'knownLanguages' column of df by calling df.select() and passing df.name and explode(df.knownLanguages) as arguments. The resulting DataFrame is assigned to df2 and displayed using df2.printSchema() and df2.show().
#It applies the explode function to the 'properties' column of df by calling df.select() and passing df.name and explode(df.properties) as arguments. The resulting DataFrame is assigned to df3 and displayed using df3.printSchema() and df3.show().
#It imports the explode_outer function from pyspark.sql.functions.
#It applies the explode_outer function to the 'knownLanguages' column of df by calling df.select() and passing df.name and explode_outer(df.knownLanguages) as arguments. The resulting DataFrame is displayed using df.select(df.name,explode_outer(df.knownLanguages)).show().
#It applies the explode_outer function to the 'properties' column of df by calling df.select() and passing df.name and explode_outer(df.properties) as arguments. The resulting DataFrame is displayed using df.select(df.name,explode_outer(df.properties)).show().
#It imports the posexplode and posexplode_outer functions from pyspark.sql.functions.
#It applies the posexplode function to the 'knownLanguages' column of df by calling df.select() and passing df.name and posexplode(df.knownLanguages) as arguments. The resulting DataFrame is displayed using df.select(df.name,posexplode(df.knownLanguages)).show().
#It applies the posexplode function to the 'properties' column of df by calling df.select() and passing df.name and posexplode(df.properties) as arguments. The resulting DataFrame is displayed using df.select(df.name,posexplode(df.properties)).show().
#It applies the posexplode_outer function to the 'knownLanguages' column of df by calling df.select() and passing df.name and posexplode_outer(df.knownLanguages) as arguments. The resulting DataFrame is displayed using df.select(df.name,posexplode_outer(df.knownLanguages)).show().
#It applies the posexplode_outer function to the 'properties' column of df by calling df.select() and passing df.name and posexplode_outer(df.properties) as arguments. The resulting DataFrame is displayed using df.select(df.name,posexplode_outer(df.properties)).show().
