In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

dataDictionary = [
        ('James',{'hair':'black','eye':'brown'}),
        ('Michael',{'hair':'brown','eye':None}),
        ('Robert',{'hair':'red','eye':'black'}),
        ('Washington',{'hair':'grey','eye':'grey'}),
        ('Jefferson',{'hair':'brown','eye':''})
        ]

# Using StructType schema
from pyspark.sql.types import StructField, StructType, StringType, MapType
schema = StructType([
    StructField('name', StringType(), True),
    StructField('properties', MapType(StringType(),StringType()),True)
])
df = spark.createDataFrame(data=dataDictionary, schema = schema)
df.printSchema()
df.show(truncate=False)

df3=df.rdd.map(lambda x: \
    (x.name,x.properties["hair"],x.properties["eye"])) \
    .toDF(["name","hair","eye"])
df3.printSchema()
df3.show()

df.withColumn("hair",df.properties.getItem("hair")) \
  .withColumn("eye",df.properties.getItem("eye")) \
  .drop("properties") \
  .show()

df.withColumn("hair",df.properties["hair"]) \
  .withColumn("eye",df.properties["eye"]) \
  .drop("properties") \
  .show()

from pyspark.sql.functions import explode
df.select(df.name,explode(df.properties)).show()

from pyspark.sql.functions import map_keys
df.select(df.name,map_keys(df.properties)).show()

from pyspark.sql.functions import map_values
df.select(df.name,map_values(df.properties)).show()

#from pyspark.sql.functions import explode,map_keys
#keysDF = df.select(explode(map_keys(df.properties))).distinct()
#keysList = keysDF.rdd.map(lambda x:x[0]).collect()
#print(keysList)

root
 |-- name: string (nullable = true)
 |-- properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+----------+-----------------------------+
|name      |properties                   |
+----------+-----------------------------+
|James     |{eye -> brown, hair -> black}|
|Michael   |{eye -> null, hair -> brown} |
|Robert    |{eye -> black, hair -> red}  |
|Washington|{eye -> grey, hair -> grey}  |
|Jefferson |{eye -> , hair -> brown}     |
+----------+-----------------------------+

root
 |-- name: string (nullable = true)
 |-- hair: string (nullable = true)
 |-- eye: string (nullable = true)

+----------+-----+-----+
|      name| hair|  eye|
+----------+-----+-----+
|     James|black|brown|
|   Michael|brown| null|
|    Robert|  red|black|
|Washington| grey| grey|
| Jefferson|brown|     |
+----------+-----+-----+

+----------+-----+-----+
|      name| hair|  eye|
+----------+-----+-----+
|     James|black|brown|
|   Michael|brown| 

In [0]:
#Importing Libraries:

#The necessary library, pyspark.sql.SparkSession, is imported to create a SparkSession and work with DataFrames.
#Additional functions and types are imported from pyspark.sql.functions and pyspark.sql.types for specific operations.
#Creating SparkSession:

#spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate() creates a SparkSession with the application name "SparkByExamples.com".
#Creating DataFrame with MapType Column:

#The code defines a list of tuples, dataDictionary, where each tuple contains a name and a properties map.
#A StructType schema is defined using StructField and MapType to specify the structure of the DataFrame.
#df = spark.createDataFrame(data=dataDictionary, schema=schema) creates a DataFrame, df, using the provided data and schema.
#Displaying DataFrame:

#df.printSchema() displays the schema of the DataFrame.
#df.show(truncate=False) displays the data in the DataFrame.
#Accessing MapType Column:

#The code demonstrates various operations to access and manipulate the MapType column, properties, in the DataFrame.
#It shows how to extract specific values from the map using getItem() and create separate columns for them.
#The DataFrame is modified by dropping the original properties column.
#Exploding MapType Column:

#The explode() function from pyspark.sql.functions is used to explode the properties map into separate rows with key-value pairs.
#df.select(df.name, explode(df.properties)).show() displays the exploded DataFrame with the name and exploded properties.
#Accessing Map Keys and Values:

#The map_keys() and map_values() functions from pyspark.sql.functions are used to extract the keys and values of the properties map, respectively.
#df.select(df.name, map_keys(df.properties)).show() displays the DataFrame with the name and keys of the properties.
#df.select(df.name, map_values(df.properties)).show() displays the DataFrame with the name and values of the properties.
#The code showcases various operations to work with DataFrame columns of MapType, including accessing specific values, exploding the map into separate rows, and extracting keys and values from the map.