In [1]:
## crating spark session
import pyspark
from pyspark.sql import SparkSession 
spark = SparkSession.builder.appName('Broadcast_variable.com').getOrCreate()


In [2]:
# creating broadcast variables 
states = {"NY":"New York", "CA": "California", "FL": "Flordia"}
broadcast_states = spark.sparkContext.broadcast(states)


In [3]:
# checking the broadcast values
broadcast_states.value

{'NY': 'New York', 'CA': 'California', 'FL': 'Flordia'}

In [7]:
## rdd creation with parallelize
data = [("James","Smith","USA","CA"),
    ("Michael","Rose","USA","NY"),
    ("Robert","Williams","USA","CA"),
    ("Maria","Jones","USA","FL")
  ]
rdd = spark.sparkContext.parallelize(data)
print(rdd.collect())

[('James', 'Smith', 'USA', 'CA'), ('Michael', 'Rose', 'USA', 'NY'), ('Robert', 'Williams', 'USA', 'CA'), ('Maria', 'Jones', 'USA', 'FL')]


In [8]:
## broadcast variable transformation
def state_convert(code):
    return broadcast_states.value[code]

result = rdd.map(lambda x:(x[0],x[1],x[2],state_convert(x[3]))).collect()
print(result)

[('James', 'Smith', 'USA', 'California'), ('Michael', 'Rose', 'USA', 'New York'), ('Robert', 'Williams', 'USA', 'California'), ('Maria', 'Jones', 'USA', 'Flordia')]


In [14]:
## Broadcast variables in DataFrame
# creating broadcast variables 
states = {"NY":"New York", "CA": "California", "FL": "Flordia", 'USA':'United States Of America'}
broadcast_states = spark.sparkContext.broadcast(states)

## rdd creation with parallelize
data = [("James","Smith","USA","CA"),
    ("Michael","Rose","USA","NY"),
    ("Robert","Williams","USA","CA"),
    ("Maria","Jones","USA","FL")
  ]

columns = ['Firstname', 'Lastname','Country','State']
df = spark.createDataFrame(data,schema = columns)
print(df.printSchema())
print(df.show(truncate=False))
## broadcast variable transformation
def transformation(code):
    return broadcast_states.value[code]

result = df.rdd.map(lambda x:(x[0],x[1],transformation(x[2]),transformation(x[3]))).toDF(columns)
print(result.show(truncate=False))

root
 |-- Firstname: string (nullable = true)
 |-- Lastname: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- State: string (nullable = true)

None
+---------+--------+-------+-----+
|Firstname|Lastname|Country|State|
+---------+--------+-------+-----+
|James    |Smith   |USA    |CA   |
|Michael  |Rose    |USA    |NY   |
|Robert   |Williams|USA    |CA   |
|Maria    |Jones   |USA    |FL   |
+---------+--------+-------+-----+

None
+---------+--------+------------------------+----------+
|Firstname|Lastname|Country                 |State     |
+---------+--------+------------------------+----------+
|James    |Smith   |United States Of America|California|
|Michael  |Rose    |United States Of America|New York  |
|Robert   |Williams|United States Of America|California|
|Maria    |Jones   |United States Of America|Flordia   |
+---------+--------+------------------------+----------+

None
