In [1]:
import findspark
findspark.init()

In [2]:
from pyspark.sql import SparkSession

In [3]:
spark = SparkSession.builder.getOrCreate()

In [4]:
spark

In [6]:
# Dataframe
df = spark.createDataFrame([
    ("1001",),
    ("1002",),
    ("1004",),
], ["product_id"])

# Lookup dictionary (small)
product_dic = {
    "1001": "iPhone",
    "1002": "Samsung",
    "1003": "Pixel"
}

In [7]:
df.show()

+----------+
|product_id|
+----------+
|      1001|
|      1002|
|      1004|
+----------+



In [9]:
# Broadcasting the dictinary variable
broad_vr = spark.sparkContext.broadcast(product_dic)

In [13]:
broad_vr.value

{'1001': 'iPhone', '1002': 'Samsung', '1003': 'Pixel'}

In [14]:
broad_vr.value.get('1001')

'iPhone'

In [15]:
# Our Function - User Defined

def mymap(x):
    return broad_vr.value.get(x)

In [17]:
from pyspark.sql.functions import *

In [18]:
mymap_udf = udf(mymap)

In [19]:
df_with_name = df.withColumn("product_name", mymap_udf("product_id"))

In [20]:
df_with_name.show()

+----------+------------+
|product_id|product_name|
+----------+------------+
|      1001|      iPhone|
|      1002|     Samsung|
|      1004|        null|
+----------+------------+

