In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()
data = [ ("36636","Finance",(3000,"USA")), 
    ("40288","Finance",(5000,"IND")), 
    ("42114","Sales",(3900,"USA")), 
    ("39192","Marketing",(2500,"CAN")), 
    ("34534","Sales",(6500,"USA")) ]
schema = StructType([
     StructField('id', StringType(), True),
     StructField('dept', StringType(), True),
     StructField('properties', StructType([
         StructField('salary', IntegerType(), True),
         StructField('location', StringType(), True)
         ]))
     ])

df = spark.createDataFrame(data=data,schema=schema)
df.printSchema()
df.show(truncate=False)


#Convert struct type to Map
from pyspark.sql.functions import col,lit,create_map
df = df.withColumn("propertiesMap",create_map(
        lit("salary"),col("properties.salary"),
        lit("location"),col("properties.location")
        )).drop("properties")
df.printSchema()
df.show(truncate=False)

root
 |-- id: string (nullable = true)
 |-- dept: string (nullable = true)
 |-- properties: struct (nullable = true)
 |    |-- salary: integer (nullable = true)
 |    |-- location: string (nullable = true)

+-----+---------+-----------+
|id   |dept     |properties |
+-----+---------+-----------+
|36636|Finance  |{3000, USA}|
|40288|Finance  |{5000, IND}|
|42114|Sales    |{3900, USA}|
|39192|Marketing|{2500, CAN}|
|34534|Sales    |{6500, USA}|
+-----+---------+-----------+

root
 |-- id: string (nullable = true)
 |-- dept: string (nullable = true)
 |-- propertiesMap: map (nullable = false)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+-----+---------+---------------------------------+
|id   |dept     |propertiesMap                    |
+-----+---------+---------------------------------+
|36636|Finance  |{salary -> 3000, location -> USA}|
|40288|Finance  |{salary -> 5000, location -> IND}|
|42114|Sales    |{salary -> 3900, location -> USA}|
|39192|Marketing|{

In [0]:
#The code starts by importing the necessary modules: SparkSession from pyspark.sql and various types and functions from pyspark.sql.types and pyspark.sql.functions.

#A SparkSession is created using the SparkSession.builder API with the application name set to 'SparkByExamples.com'.

#The data is defined as a list of tuples, representing the rows of the DataFrame. Each tuple contains values for the columns "id", "dept", and "properties". The "properties" column itself is a struct type, with nested fields "salary" and "location".

#The schema is defined using the StructType class from pyspark.sql.types. The schema specifies the structure of the DataFrame, including the data types and nested fields.

#The DataFrame is created using the spark.createDataFrame() method, passing the data and schema as arguments. The resulting DataFrame is named df.

#The schema of the DataFrame is printed using the printSchema() method.

#The content of the DataFrame is displayed using the show() method with truncate=False, ensuring that all column values are fully displayed.

#The create_map() function from pyspark.sql.functions is used to convert the struct type column "properties" into a map type column named "propertiesMap". The create_map() function takes pairs of literal values and column references and creates a map from them. In this case, the pairs consist of the literal strings "salary" and "location" as keys, and the respective column references "properties.salary" and "properties.location" as values.

#The original "properties" column is dropped using the drop() method.

#The schema of the updated DataFrame is printed using the printSchema() method.

#The content of the updated DataFrame is displayed using the show() method.

#Overall, this code showcases how to convert a struct type column into a map type column in PySpark by using the create_map() function. It provides a way to transform and manipulate nested structures within DataFrames.
