In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

dataDictionary = [
        ('James',{'hair':'black','eye':'brown'}),
        ('Michael',{'hair':'brown','eye':None}),
        ('Robert',{'hair':'red','eye':'black'}),
        ('Washington',{'hair':'grey','eye':'grey'}),
        ('Jefferson',{'hair':'brown','eye':''})
        ]

df = spark.createDataFrame(data=dataDictionary, schema = ['name','properties'])
df.printSchema()
df.show(truncate=False)

# Using StructType schema
from pyspark.sql.types import StructField, StructType, StringType, MapType,IntegerType
schema = StructType([
    StructField('name', StringType(), True),
    StructField('properties', MapType(StringType(),StringType()),True)
])
df2 = spark.createDataFrame(data=dataDictionary, schema = schema)
df2.printSchema()
df2.show(truncate=False)

df3=df.rdd.map(lambda x: \
    (x.name,x.properties["hair"],x.properties["eye"])) \
    .toDF(["name","hair","eye"])
df3.printSchema()
df3.show()

df.withColumn("hair",df.properties.getItem("hair")) \
  .withColumn("eye",df.properties.getItem("eye")) \
  .drop("properties") \
  .show()

df.withColumn("hair",df.properties["hair"]) \
  .withColumn("eye",df.properties["eye"]) \
  .drop("properties") \
  .show()

# Functions
from pyspark.sql.functions import explode,map_keys,col
keysDF = df.select(explode(map_keys(df.properties))).distinct()
keysList = keysDF.rdd.map(lambda x:x[0]).collect()
keyCols = list(map(lambda x: col("properties").getItem(x).alias(str(x)), keysList))
df.select(df.name, *keyCols).show()

root
 |-- name: string (nullable = true)
 |-- properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+----------+-----------------------------+
|name      |properties                   |
+----------+-----------------------------+
|James     |{eye -> brown, hair -> black}|
|Michael   |{eye -> null, hair -> brown} |
|Robert    |{eye -> black, hair -> red}  |
|Washington|{eye -> grey, hair -> grey}  |
|Jefferson |{eye -> , hair -> brown}     |
+----------+-----------------------------+

root
 |-- name: string (nullable = true)
 |-- properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+----------+-----------------------------+
|name      |properties                   |
+----------+-----------------------------+
|James     |{eye -> brown, hair -> black}|
|Michael   |{eye -> null, hair -> brown} |
|Robert    |{eye -> black, hair -> red}  |
|Washington|{eye -> grey, hair -> grey}  |
|Je

In [0]:
#Import the necessary module: SparkSession from pyspark.sql.
#Create a SparkSession named 'SparkByExamples.com' using SparkSession.builder.appName('SparkByExamples.com').getOrCreate().
#Define sample data as a list of tuples, where each tuple contains a name and a properties dictionary.
#Define the schema using StructType and StructField to specify the structure of the DataFrame.
#Create a DataFrame using spark.createDataFrame(data=dataDictionary, schema=schema).
#Print the schema of the DataFrame using df.printSchema().
#Show the content of the DataFrame using df.show(truncate=False).
#Create another DataFrame, df2, using the same data and schema but specifying the schema using StructType.
#Print the schema of df2 using df2.printSchema().
#Show the content of df2 using df2.show(truncate=False).
#Transform the DataFrame df using RDD operations and convert it back to a DataFrame df3 by mapping the values and specifying the column names.
#Print the schema of df3 using df3.printSchema().
#Show the content of df3 using df3.show().
#Use the withColumn function to extract values from the properties map and add them as separate columns to the DataFrame. Finally, drop the original properties column using drop("properties") and show the updated DataFrame.
#Repeat step 14 but access the properties map directly using indexing df.properties["hair"].
#Use the explode function and map_keys function to extract distinct keys from the properties map. Convert the resulting DataFrame keysDF to a list of keys.
#Create a list of columns using the col function and getItem method to access the values from the properties map based on the keys extracted in step 16. Select the name column along with the key columns and show the resulting DataFrame.
#The code demonstrates how to work with nested data, specifically maps, in PySpark DataFrames.