In [93]:
from pyspark.sql import types as T
from pyspark.sql import functions as F
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession.builder.appName("DataFrameComJson").getOrCreate()

In [94]:
data = [
    (1, {"201": "João", "202": "30"}),
    (2, {"201": "Maria", "202": "25", "203": "23"})
]
schema = "id INT, info STRUCT<`201`: STRING, `202`: STRING, `203`: STRING>"
df = spark.createDataFrame(data, schema)

In [95]:
df.show()

+---+----------------+
| id|            info|
+---+----------------+
|  1|{João, 30, NULL}|
|  2| {Maria, 25, 23}|
+---+----------------+



In [96]:
df.printSchema()

root
 |-- id: integer (nullable = true)
 |-- info: struct (nullable = true)
 |    |-- 201: string (nullable = true)
 |    |-- 202: string (nullable = true)
 |    |-- 203: string (nullable = true)



In [97]:
map_expr = F.create_map(
    F.lit("201"), F.col("info.201"),
    F.lit("202"), F.col("info.202"),
    F.lit("203"), F.col("info.203"),
)
print(map_expr)

Column<'map(201, info.201, 202, info.202, 203, info.203)'>


In [98]:
df_with_map = df.withColumn("info_map", map_expr)

# Mostrar resultado
df_with_map.show(truncate=False)

+---+----------------+-------------------------------------+
|id |info            |info_map                             |
+---+----------------+-------------------------------------+
|1  |{João, 30, NULL}|{201 -> João, 202 -> 30, 203 -> NULL}|
|2  |{Maria, 25, 23} |{201 -> Maria, 202 -> 25, 203 -> 23} |
+---+----------------+-------------------------------------+



In [99]:
list_ = df.schema["info"].dataType.fieldNames()

map_expr = F.create_map(*[elem for k in list_ for elem in (F.lit(k), F.col(f"info.{k}"))])

print(map_expr)

Column<'map(201, info.201, 202, info.202, 203, info.203)'>


In [102]:
def create_map_dynamic(df):
    list_ = df.schema["info"].dataType.fieldNames()
    map_expr = F.create_map(*[elem for k in list_ for elem in (F.lit(k), F.col(f"info.{k}"))])
    df_with_map = df.withColumn("info_map", map_expr)
    return df_with_map

df2 = create_map_dynamic(df)

df2.show(truncate=False)

+---+----------------+-------------------------------------+
|id |info            |info_map                             |
+---+----------------+-------------------------------------+
|1  |{João, 30, NULL}|{201 -> João, 202 -> 30, 203 -> NULL}|
|2  |{Maria, 25, 23} |{201 -> Maria, 202 -> 25, 203 -> 23} |
+---+----------------+-------------------------------------+

