In [37]:
from pyspark.sql.types import *
from pyspark.sql import SparkSession
from pyspark.sql.functions import min, max, upper, trim, from_json, to_json, json_tuple
from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

spark = SparkSession.Builder().appName('from_json(), to_json(), json_tuple() function').getOrCreate()

In [3]:
data = [
    ('Ajay', '{ "hair" : "brown", "eye" : "black" }'),
    ('Rohit', '{ "hair" : "black", "eye" : "black" }'),
]

schema = ['name', 'props']

df = spark.createDataFrame(data, schema)
df.show(truncate=False)
df.printSchema()

+-----+-------------------------------------+
|name |props                                |
+-----+-------------------------------------+
|Ajay |{ "hair" : "brown", "eye" : "black" }|
|Rohit|{ "hair" : "black", "eye" : "black" }|
+-----+-------------------------------------+

root
 |-- name: string (nullable = true)
 |-- props: string (nullable = true)



#### from_json() --> to convert the JSON string to MapType() or StructType() column in DF

##### MapType()

In [6]:
df1 = df.withColumn('propsMap', from_json(df.props, MapType(StringType(), StringType())))
df1.show(truncate=False)
df1.printSchema()

+-----+-------------------------------------+-----------------------------+
|name |props                                |propsMap                     |
+-----+-------------------------------------+-----------------------------+
|Ajay |{ "hair" : "brown", "eye" : "black" }|{hair -> brown, eye -> black}|
|Rohit|{ "hair" : "black", "eye" : "black" }|{hair -> black, eye -> black}|
+-----+-------------------------------------+-----------------------------+

root
 |-- name: string (nullable = true)
 |-- props: string (nullable = true)
 |-- propsMap: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



In [11]:
df1.withColumn('hair', df1.propsMap.hair)\
    .withColumn('eyeColor', df1.propsMap.eye)\
    .show()

+-----+--------------------+--------------------+-----+--------+
| name|               props|            propsMap| hair|eyeColor|
+-----+--------------------+--------------------+-----+--------+
| Ajay|{ "hair" : "brown...|{hair -> brown, e...|brown|   black|
|Rohit|{ "hair" : "black...|{hair -> black, e...|black|   black|
+-----+--------------------+--------------------+-----+--------+



##### StructType()

In [33]:
structSchema = StructType(
    [
        StructField('hair', StringType()),    
        StructField('eye', StringType()),    
    ]
)

df2 = df.withColumn('propsStruct', from_json(df.props, structSchema))
df2.show(truncate=False)
df2.printSchema()

+-----+-------------------------------------+--------------+
|name |props                                |propsStruct   |
+-----+-------------------------------------+--------------+
|Ajay |{ "hair" : "brown", "eye" : "black" }|{brown, black}|
|Rohit|{ "hair" : "black", "eye" : "black" }|{black, black}|
+-----+-------------------------------------+--------------+

root
 |-- name: string (nullable = true)
 |-- props: string (nullable = true)
 |-- propsStruct: struct (nullable = true)
 |    |-- hair: string (nullable = true)
 |    |-- eye: string (nullable = true)



In [34]:
df2.withColumn('hair', df2.propsStruct.hair)\
    .withColumn('eye', df2.propsStruct.eye).show()

+-----+--------------------+--------------+-----+-----+
| name|               props|   propsStruct| hair|  eye|
+-----+--------------------+--------------+-----+-----+
| Ajay|{ "hair" : "brown...|{brown, black}|brown|black|
|Rohit|{ "hair" : "black...|{black, black}|black|black|
+-----+--------------------+--------------+-----+-----+



#### to_json() --> to convert MapType() or StructType() column for JSON to JSON String Column

In [32]:
df3 = df2.withColumn('propsJSONstring', to_json(df2.propsStruct))
df3.show(truncate=False)
df3.printSchema()

+-----+-------------------------------------+--------------+------------------------------+
|name |props                                |propsStruct   |propsJSONstring               |
+-----+-------------------------------------+--------------+------------------------------+
|Ajay |{ "hair" : "brown", "eye" : "black" }|{brown, black}|{"hair":"brown","eye":"black"}|
|Rohit|{ "hair" : "black", "eye" : "black" }|{black, black}|{"hair":"black","eye":"black"}|
+-----+-------------------------------------+--------------+------------------------------+

root
 |-- name: string (nullable = true)
 |-- props: string (nullable = true)
 |-- propsStruct: struct (nullable = true)
 |    |-- hair: string (nullable = true)
 |    |-- eye: string (nullable = true)
 |-- propsJSONstring: string (nullable = true)



#### json_tuple() --> query or extract the elements from JSON String Column and create as new columns

In [42]:
df4 = df.select(df.name, df.props, json_tuple(df.props, 'hair', 'eye').alias('hair', 'eye'))
df4.show(truncate=False)
df4.printSchema()

+-----+-------------------------------------+-----+-----+
|name |props                                |hair |eye  |
+-----+-------------------------------------+-----+-----+
|Ajay |{ "hair" : "brown", "eye" : "black" }|brown|black|
|Rohit|{ "hair" : "black", "eye" : "black" }|black|black|
+-----+-------------------------------------+-----+-----+

root
 |-- name: string (nullable = true)
 |-- props: string (nullable = true)
 |-- hair: string (nullable = true)
 |-- eye: string (nullable = true)



In [41]:
df5 = df.withColumn('hair', json_tuple(df.props, 'hair'))\
        .withColumn('eye', json_tuple(df.props, 'eye'))
df5.show(truncate=False)

+-----+-------------------------------------+-----+-----+
|name |props                                |hair |eye  |
+-----+-------------------------------------+-----+-----+
|Ajay |{ "hair" : "brown", "eye" : "black" }|brown|black|
|Rohit|{ "hair" : "black", "eye" : "black" }|black|black|
+-----+-------------------------------------+-----+-----+



In [43]:
spark.stop()