In [1]:
import findspark
findspark.init()

In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

In [7]:
data = [('srikanth',{'hair':'black','eye':'brown'}),('Manvith',{'hair':'black','eye':'blue'})]

schema = ['Name','Properties']

df = spark.createDataFrame(data,schema)

df.show(truncate=False)

df.printSchema()

+--------+-----------------------------+
|Name    |Properties                   |
+--------+-----------------------------+
|srikanth|{eye -> brown, hair -> black}|
|Manvith |{eye -> blue, hair -> black} |
+--------+-----------------------------+

root
 |-- Name: string (nullable = true)
 |-- Properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



In [8]:
from pyspark.sql.types import StructType,StructField,StringType,IntegerType,MapType

schema = StructType([\
                    StructField('Name',dataType=StringType()),\
                    StructField('Properties',dataType=MapType(StringType(),StringType()))])


df1= spark.createDataFrame(data,schema)

df1.printSchema()

df1.show(truncate=False)

root
 |-- Name: string (nullable = true)
 |-- Properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+--------+-----------------------------+
|Name    |Properties                   |
+--------+-----------------------------+
|srikanth|{eye -> brown, hair -> black}|
|Manvith |{eye -> blue, hair -> black} |
+--------+-----------------------------+



In [9]:
#access maptype elements

display(df1)

DataFrame[Name: string, Properties: map<string,string>]

In [12]:
df2= df.withColumn('Hair',df.Properties['hair'])
df2.show(truncate=False)
df2.printSchema()

+--------+-----------------------------+-----+
|Name    |Properties                   |Hair |
+--------+-----------------------------+-----+
|srikanth|{eye -> brown, hair -> black}|black|
|Manvith |{eye -> blue, hair -> black} |black|
+--------+-----------------------------+-----+

root
 |-- Name: string (nullable = true)
 |-- Properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- Hair: string (nullable = true)



In [14]:
df3 = df.withColumn('Eye',df.Properties.getItem('eye'))
df3.show(truncate=False)
df3.printSchema()

+--------+-----------------------------+-----+
|Name    |Properties                   |Eye  |
+--------+-----------------------------+-----+
|srikanth|{eye -> brown, hair -> black}|brown|
|Manvith |{eye -> blue, hair -> black} |blue |
+--------+-----------------------------+-----+

root
 |-- Name: string (nullable = true)
 |-- Properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- Eye: string (nullable = true)



In [None]:
#explode()
#map_keys()
#map_values()

In [15]:
df.show(truncate=False)

+--------+-----------------------------+
|Name    |Properties                   |
+--------+-----------------------------+
|srikanth|{eye -> brown, hair -> black}|
|Manvith |{eye -> blue, hair -> black} |
+--------+-----------------------------+



In [16]:
from pyspark.sql.functions import explode

df4= df.select('name','Properties',explode(df.Properties))

df4.show()

df4.printSchema()

+--------+--------------------+----+-----+
|    name|          Properties| key|value|
+--------+--------------------+----+-----+
|srikanth|{eye -> brown, ha...| eye|brown|
|srikanth|{eye -> brown, ha...|hair|black|
| Manvith|{eye -> blue, hai...| eye| blue|
| Manvith|{eye -> blue, hai...|hair|black|
+--------+--------------------+----+-----+

root
 |-- name: string (nullable = true)
 |-- Properties: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- key: string (nullable = false)
 |-- value: string (nullable = true)



In [18]:
from pyspark.sql.functions import map_keys,map_values,col

df5 = df.withColumn('Keys',col=map_keys(col('Properties')))

df5.show(truncate=False)

+--------+-----------------------------+-----------+
|Name    |Properties                   |Keys       |
+--------+-----------------------------+-----------+
|srikanth|{eye -> brown, hair -> black}|[eye, hair]|
|Manvith |{eye -> blue, hair -> black} |[eye, hair]|
+--------+-----------------------------+-----------+



In [19]:
df6 = df.withColumn('Values',map_values(col('Properties')))

df6.show(truncate=False)

+--------+-----------------------------+--------------+
|Name    |Properties                   |Values        |
+--------+-----------------------------+--------------+
|srikanth|{eye -> brown, hair -> black}|[brown, black]|
|Manvith |{eye -> blue, hair -> black} |[blue, black] |
+--------+-----------------------------+--------------+

