# CREATE MAP

In [1]:
import findspark
findspark.init()

In [2]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/08/28 15:38:45 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/08/28 15:38:48 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


In [3]:
data_product = [
    (1,"mobile",1000, 10, 7),
    (2,"smartphone",1400, 5, 8),
    (3,"cellphone",1200, 7, 10),
    (4,"laptop",2250, 2, 27),
    (5,"notebook",3150, 14, 29),
    (6,"pc",3000, 8, 25),
    (7,"netbook",4500, 2, 27)
]

schema_product = "id INTEGER, device STRING, price INTEGER, discount INTEGER, size INTEGER"

In [4]:
df_product = spark.createDataFrame(data = data_product, schema = schema_product)
df_product.show()

                                                                                

+---+----------+-----+--------+----+
| id|    device|price|discount|size|
+---+----------+-----+--------+----+
|  1|    mobile| 1000|      10|   7|
|  2|smartphone| 1400|       5|   8|
|  3| cellphone| 1200|       7|  10|
|  4|    laptop| 2250|       2|  27|
|  5|  notebook| 3150|      14|  29|
|  6|        pc| 3000|       8|  25|
|  7|   netbook| 4500|       2|  27|
+---+----------+-----+--------+----+



## CREATE MAP CONVERT COLUMN TO DICT

In [5]:
from pyspark.sql.functions import col, lit, create_map

In [7]:
df_product_dict_v1 = df_product.select(col("id"),
                                    col("device"),
                                    col("price"),
                                    col("discount"),
                                    col("size"),
                                    create_map(col("device"),col("price")).alias("dict_price"))

df_product_dict_v1.show()

+---+----------+-----+--------+----+--------------------+
| id|    device|price|discount|size|          dict_price|
+---+----------+-----+--------+----+--------------------+
|  1|    mobile| 1000|      10|   7|    {mobile -> 1000}|
|  2|smartphone| 1400|       5|   8|{smartphone -> 1400}|
|  3| cellphone| 1200|       7|  10| {cellphone -> 1200}|
|  4|    laptop| 2250|       2|  27|    {laptop -> 2250}|
|  5|  notebook| 3150|      14|  29|  {notebook -> 3150}|
|  6|        pc| 3000|       8|  25|        {pc -> 3000}|
|  7|   netbook| 4500|       2|  27|   {netbook -> 4500}|
+---+----------+-----+--------+----+--------------------+



In [8]:
df_product_dict_v1.printSchema()

root
 |-- id: integer (nullable = true)
 |-- device: string (nullable = true)
 |-- price: integer (nullable = true)
 |-- discount: integer (nullable = true)
 |-- size: integer (nullable = true)
 |-- dict_price: map (nullable = false)
 |    |-- key: string
 |    |-- value: integer (valueContainsNull = true)



In [11]:
df_product_dict_v2 = df_product.withColumn("dict_values", 
                                           create_map(lit("device"), col("device"),
                                                      lit("price"), col("price"),
                                                      lit("discount"), col("discount"),
                                                      lit("size"), col("size")))

df_product_dict_v2.show(truncate=False)

+---+----------+-----+--------+----+---------------------------------------------------------------+
|id |device    |price|discount|size|dict_values                                                    |
+---+----------+-----+--------+----+---------------------------------------------------------------+
|1  |mobile    |1000 |10      |7   |{device -> mobile, price -> 1000, discount -> 10, size -> 7}   |
|2  |smartphone|1400 |5       |8   |{device -> smartphone, price -> 1400, discount -> 5, size -> 8}|
|3  |cellphone |1200 |7       |10  |{device -> cellphone, price -> 1200, discount -> 7, size -> 10}|
|4  |laptop    |2250 |2       |27  |{device -> laptop, price -> 2250, discount -> 2, size -> 27}   |
|5  |notebook  |3150 |14      |29  |{device -> notebook, price -> 3150, discount -> 14, size -> 29}|
|6  |pc        |3000 |8       |25  |{device -> pc, price -> 3000, discount -> 8, size -> 25}       |
|7  |netbook   |4500 |2       |27  |{device -> netbook, price -> 4500, discount -> 2, size 

In [12]:
df_product_dict_v2.printSchema()

root
 |-- id: integer (nullable = true)
 |-- device: string (nullable = true)
 |-- price: integer (nullable = true)
 |-- discount: integer (nullable = true)
 |-- size: integer (nullable = true)
 |-- dict_values: map (nullable = false)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

