In [None]:
import findspark
findspark.init()

import folium
import branca

from folium.plugins import MarkerCluster
from pyspark import sql, SparkConf, SparkContext
from pyspark.sql.functions import col
from pyspark.sql.functions import max as sparkMax
from ipywidgets import interact

In [None]:
conf = SparkConf().setAppName("Read_CSV")
sc = SparkContext(conf=conf)
sql_context = sql.SQLContext(sc)

df_cities = sql_context.read.csv("Steden.csv", header=True)
df_pollution = sql_context.read.csv("Luchtvervuiling.csv", header=True)

In [None]:
TYPES_P = ["so2", "no2", "rspm", "spm"]
#conentratie values voor pollutie types (HIGH_P en LOW_P) afkomstig van:
#http://www.arthapedia.in/index.php?title=Ambient_Air_Quality_Standards_in_India
HIGH_P = [80, 80, 60, 100]
LOW_P = [50, 40, 40, 60]

STRONG_BLUE = "#3186cc"
GREEN = "#7CFC00"
LIGHT_ORANGE = "#ffd27f"
RED = "#ff0000"

In [None]:
df_map_cities = df_cities.select("lat", "lng", "city")
df_map_pollution = df_pollution.select("location", "so2", "no2", "rspm", "spm")
color_point = STRONG_BLUE

def get_max_pollution_value(type_p):
    max_p_value = df_map_pollution.filter(col(type_p) != "NA")
    max_p_value = max_p_value.agg(sparkMax(col(type_p)))
    max_p_value = max_p_value.head()[0]
    return float(max_p_value)

@interact(types=TYPES_P)
def get_pollution(types):
    coordinates = (20.593684, 78.96288)
    pollution_map = folium.Map(location=coordinates, zoom_start=4)
    
    index = 0
    join_df = df_map_pollution.join(df_map_cities, df_map_pollution.location == df_map_cities.city, how="right")
    join_df = join_df.filter(col(types) != "NA").dropDuplicates(["city"]).collect()
    
    type_index = TYPES_P.index(types)
    max_p_value = get_max_pollution_value(types)
    #high_p_value = max_p_value * 0.75
    #low_p_value = max_p_value * 0.25
    high_p_value = HIGH_P[type_index]
    low_p_value = LOW_P[type_index]
    type_index_on_join_df = type_index + 1
    
    colormap = branca.colormap.StepColormap(\
                                            colors=[GREEN, LIGHT_ORANGE, RED],\
                                            vmin = 0,\
                                            vmax = max_p_value,
                                            index = [0, low_p_value, high_p_value, max_p_value],\
                                            caption = "Pollution levels in India for type " + types)
    colormap.add_to(pollution_map)
    
    for line in join_df:
        pollution = join_df[index][type_index_on_join_df]
        if float(pollution) > high_p_value:
            color_point = RED
        elif float(pollution) < low_p_value:
            color_point = GREEN
        else:
            color_point = LIGHT_ORANGE

        folium.CircleMarker(
            location = [join_df[index][5], join_df[index][6]],
            radius = 4,
            popup = pollution,
            color = color_point,
            fill = True,
            fill_color = STRONG_BLUE
        ).add_to(pollution_map)
        index = index + 1
    return pollution_map