#### Setting Environment Varibles

In [0]:
import os
import sys
os.environ["PYSPARK_PYTHON"] = sys.executable
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable

#### Creating dataframe

In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder \
        .appName("JSON Functions")\
        .getOrCreate()

In [0]:
json_string = """{"ZipCode" : 704, "ZipCodeType" : "Standard", "City" : "PARC PARQUE", "Country" : "PR"}"""

In [0]:
df=spark.createDataFrame([(1, json_string)], ["id", "value"])
df.show(truncate = False)

+---+---------------------------------------------------------------------------------------+
|id |value                                                                                  |
+---+---------------------------------------------------------------------------------------+
|1  |{"ZipCode" : 704, "ZipCodeType" : "Standard", "City" : "PARC PARQUE", "Country" : "PR"}|
+---+---------------------------------------------------------------------------------------+



#### Json Functions

##### 1.from_json

###### Map Type

In [0]:
from pyspark.sql.types import MapType, StringType
from pyspark.sql.functions import from_json
df2 = df.withColumn("value", from_json(df.value, MapType(StringType(), StringType())))
df2.show(truncate = False)

+---+-----------------------------------------------------------------------------+
|id |value                                                                        |
+---+-----------------------------------------------------------------------------+
|1  |{ZipCode -> 704, ZipCodeType -> Standard, City -> PARC PARQUE, Country -> PR}|
+---+-----------------------------------------------------------------------------+



###### StructType

In [0]:
from pyspark.sql.types import StructField,StructType,StringType
# Creating Schema for JSON
schema = StructType([
    StructField("ZipCode", StringType(), True),
    StructField("ZipCodeType", StringType(), True),
    StructField("City", StringType(), True),
    StructField("Country", StringType(), True)
])
# Converting JSON Strig to Struct Type
df3=df.withColumn("value",from_json(df.value,schema))
df3.printSchema()

root
 |-- id: long (nullable = true)
 |-- value: struct (nullable = true)
 |    |-- ZipCode: string (nullable = true)
 |    |-- ZipCodeType: string (nullable = true)
 |    |-- City: string (nullable = true)
 |    |-- Country: string (nullable = true)



In [0]:
df3.show(truncate=False)

+---+--------------------------------+
|id |value                           |
+---+--------------------------------+
|1  |{704, Standard, PARC PARQUE, PR}|
+---+--------------------------------+



In [0]:
df4=df3.select("id","value.*")
df4.show()

+---+-------+-----------+-----------+-------+
| id|ZipCode|ZipCodeType|       City|Country|
+---+-------+-----------+-----------+-------+
|  1|    704|   Standard|PARC PARQUE|     PR|
+---+-------+-----------+-----------+-------+



##### 2.to_json

In [0]:
from pyspark.sql.functions import to_json,col
df2.withColumn("value",to_json(col("value"))).show(truncate=False)

+---+------------------------------------------------------------------------------+
|id |value                                                                         |
+---+------------------------------------------------------------------------------+
|1  |{"ZipCode":"704","ZipCodeType":"Standard","City":"PARC PARQUE","Country":"PR"}|
+---+------------------------------------------------------------------------------+



##### 3.json_tuple

In [0]:
from pyspark.sql.functions import json_tuple
df.select(col("id"),json_tuple(col("value"),"ZipCode","ZipCodeType"))\
    .toDF("Id","Zip","ZipType")\
        .show()

+---+---+--------+
| Id|Zip| ZipType|
+---+---+--------+
|  1|704|Standard|
+---+---+--------+



##### 4.get_json_object

In [0]:
from pyspark.sql.functions import get_json_object
df.select(col("id"),get_json_object(col("value"),"$.ZipCode").alias("Code"),get_json_object(col("value"),"$.Country").alias("Country")).show()

+---+----+-------+
| id|Code|Country|
+---+----+-------+
|  1| 704|     PR|
+---+----+-------+



##### 5.schema_of_json

In [0]:
from pyspark.sql.functions import schema_of_json, lit
schemaStr = spark.range(1) \
    .select(schema_of_json(lit(json_string))) \
    .collect()[0][0]
print(schemaStr)

STRUCT<City: STRING, Country: STRING, ZipCode: BIGINT, ZipCodeType: STRING>
