In [1]:
import findspark
findspark.init()

import folium
import branca

from folium.plugins import MarkerCluster
from pyspark import sql, SparkConf, SparkContext
from pyspark.sql.functions import col
from ipywidgets import interact

In [2]:
conf = SparkConf().setAppName("Read_CSV")
sc = SparkContext(conf=conf)
sql_context = sql.SQLContext(sc)

df_cities = sql_context.read.csv("Steden.csv", header=True)
df_pollution = sql_context.read.csv("Luchtvervuiling.csv", header=True)

coordinates = (20.593684, 78.96288)
pollution_map = folium.Map(location=coordinates, zoom_start=4)

In [3]:
TYPES_P = ["so2", "no2", "rspm", "spm"]
HIGH_P = [10, 18, 200, 300]
LOW_P = [5, 10, 80, 200]

In [4]:
STRONG_BLUE = "#3186cc"
RED = "#ff0000"
GREEN = "#7CFC00"
LIGHT_ORANGE = "#ffd27f"

In [7]:
df_map_cities = df_cities.select("lat", "lng", "city")
df_map_pollution = df_pollution.select("location", "so2", "no2", "rspm", "spm")
color_point = STRONG_BLUE

@interact(types=TYPES_P)
def get_pollution(types):
    index = 0
    join_df = df_map_pollution.join(df_map_cities, df_map_pollution.location == df_map_cities.city, how="right")
    join_df = join_df.filter(col(types) != "NA").dropDuplicates(["city"]).collect()
    
    type_index = TYPES_P.index(types)
    high_p_value = HIGH_P[type_index]
    low_p_value = LOW_P[type_index]
    type_index_on_join_df = type_index + 1
    colormap = branca.colormap.linear.YlOrRd_09.scale(0, high_p_value)
    colormap = colormap.to_step(index=[0, high_p_value/0.66, high_p_value/0.33, high_p_value])
    colormap.caption = 'Pollution levels in India'
    colormap.add_to(pollution_map)
    
    for line in join_df:
        pollution = join_df[index][type_index_on_join_df]
        if float(pollution) > high_p_value:
            color_point = RED
        elif float(pollution) < low_p_value:
            color_point = GREEN
        else:
            color_point = LIGHT_ORANGE

        folium.CircleMarker(
            location = [join_df[index][5], join_df[index][6]],
            radius = 4,
            popup = pollution,
            color = color_point,
            fill = True,
            fill_color = STRONG_BLUE
        ).add_to(pollution_map)
        index = index + 1
    return pollution_map

interactive(children=(Dropdown(description='types', options=('so2', 'no2', 'rspm', 'spm'), value='so2'), Outpu…