In [1]:
import findspark
findspark.init()

import datetime
import pyparsing as pp
import ipywidgets as ipy

from pyspark import sql, SparkConf, SparkContext
from pyspark.sql.functions import col, to_date, avg
from pyspark.sql.functions import max as sparkMax
from pyspark.sql.functions import min as sparkMin
from IPython.display import display, clear_output
from ipywidgets import Output, VBox, widgets, interact

In [2]:
conf = SparkConf().setAppName("Read_CSV")
sc = SparkContext(conf=conf)
sqlContext = sql.SQLContext(sc)

dfPollution = sqlContext.read.csv("Luchtvervuiling.csv", header=True)

In [3]:
MONTHS = ['all months','january','february','march','april','may','june','july','august','september','october',\
          'november','december']
TYPES_P = ['so2', 'no2', 'rspm','spm']

#Haal alle waarden uit date kolom met als doel alle verschillende jaren in een lijst te steken voor filter
date_rows = dfPollution.select(to_date(dfPollution.date).alias('to_date')).collect()
years = []
for row in date_rows:
    try:
        if row.to_date.year not in years:
            years.append(row.to_date.year)
    except:
        pass
years.sort(reverse=True)

#Haal alle waarden uit location kolom met als doel alle verschillende steden in een lijst te steken voor filter
location_rows = dfPollution.select("location").distinct().collect()
locations = [str(row['location']) for row in location_rows]
locations.sort()

In [4]:
#Filters
cityTableFilter = widgets.Dropdown(
    options = locations,
    value = locations[0],
    description = 'City:',
    disabled = False,
)
yearTableFilter = widgets.Dropdown(
    options = years,
    value = years[0],
    description = 'Year:',
    disabled = False,
)
monthTableFilter = widgets.Dropdown(
    options = MONTHS,
    value = MONTHS[0],
    description = 'Month:',
    disabled = False,
)
display(widgets.HBox((cityTableFilter, yearTableFilter, monthTableFilter)))

def update_table():
    #Get filter waarden
    selectedCity = cityTableFilter.value
    selectedYear = yearTableFilter.value
    selectedMonth = monthTableFilter.value
    print(selectedCity + ", " + selectedYear + ", " + selectedMonth)
    
    #Data van dfPollution die getoond moet worden overzetten naar dfTable
    dfTable = dfPollution.select(dfPollution['date'], dfPollution['state'], dfPollution['location'], dfPollution['type'],\
                                 dfPollution['so2'], dfPollution['no2'], dfPollution['rspm'], dfPollution['spm'])
    if str(selectedMonth) == "all months" :
        dfTable = dfTable.filter((col("date").between(str(selectedYear) + "-01-01",\
                                                      str(selectedYear) + "-12-31"))\
                                & (col("location") == selectedCity))
    else :
        month = MONTHS.index(selectedMonth)
        if month < 10 :
            month = "0" + str(month)
        month = str(month)
        dfTable = dfTable.filter((col("date").between(str(selectedYear) + "-" + month + "-01",\
                                                      str(selectedYear) + "-" + month + "-31"))\
                                 & (col("location") == selectedCity))
    
    dfTable = dfTable.sort(col("date"))
    count = dfTable.count()
    if count == 0 :
        noMeasurementsError = "No measurements have taken place at " + selectedCity + " during "
        errorEndingStr = "the year " + str(selectedYear) + "."
        if str(selectedMonth) == "all months" :
            noMeasurementsError += errorEndingStr
        else :
            noMeasurementsError += selectedMonth + " of " + errorEndingStr
        print(noMeasurementsError)
    else :
        dfTable.show(dfTable.count())

update_table()
#Update bij aanpassing van filters
def on_change_table_filter(change):
    if change['name'] == 'value' and (change['new'] != change['old']):
        clear_output()
        display(widgets.HBox((cityTableFilter, yearTableFilter, monthTableFilter)))
        update_table()

#Filters linken aan on_change_table_filter functie
cityTableFilter.observe(on_change_table_filter)
yearTableFilter.observe(on_change_table_filter)
monthTableFilter.observe(on_change_table_filter)

HBox(children=(Dropdown(description='City:', options=('ANKLESHWAR', 'Agra', 'Ahmedabad', 'Aizawl', 'Akola', 'A…

TypeError: must be str, not int

In [None]:
#Filters
cityShortTableFilter = widgets.Dropdown(
    options = locations,
    value = locations[0],
    description = 'City:',
    disabled = False,
)
yearShortTableFilter = widgets.Dropdown(
    options = years,
    value = years[0],
    description = 'Year:',
    disabled = False,
)
monthShortTableFilter = widgets.Dropdown(
    options = MONTHS,
    value = MONTHS[0],
    description = 'Month:',
    disabled = False,
)
typesPShortTableFilter = widgets.Dropdown(
    options = TYPES_P,
    value = TYPES_P[0],
    description = 'Pollution type:',
    disabled = False,
)
display(widgets.HBox((cityShortTableFilter, typesPShortTableFilter)))
display(widgets.HBox((yearShortTableFilter, monthShortTableFilter)))

def update_short_table():
    #Data van dfPollution die getoond moet worden overzetten naar dfShortTable
    dfShortTable = dfPollution.select(dfPollution['date'], dfPollution['state'], dfPollution['location'],\
                                      dfPollution['type'], dfPollution[typesPShortTableFilter.value])
    if str(monthShortTableFilter.value) == "all months" :
        dfShortTable = dfShortTable.filter((col("date").between(str(yearShortTableFilter.value) + "-01-01",\
                                                                str(yearShortTableFilter.value) + "-12-31"))\
                                           & (col("location") == cityShortTableFilter.value)\
                                           & (col(typesPShortTableFilter.value) != "NA"))
    else :
        month = MONTHS.index(monthShortTableFilter.value)
        if month < 10 :
            month = "0" + str(month)
        month = str(month)
        dfShortTable = dfShortTable.filter((col("date").between(str(yearShortTableFilter.value) + "-" + month + "-01",\
                                                                str(yearShortTableFilter.value) + "-" + month + "-31"))\
                                           & (col("location") == cityShortTableFilter.value)\
                                           & (col(typesPShortTableFilter.value) != "NA"))
    
    dfShortTable = dfShortTable.sort(col("date"))
    count = dfShortTable.count()
    if count == 0 :
        noMeasurementsError = "No measurements of type " + typesPShortTableFilter.value + " have taken place at " + cityShortTableFilter.value + " during "
        errorEndingStr = "the year " + str(yearShortTableFilter.value) + "."
        if str(monthShortTableFilter.value) == "all months" :
            noMeasurementsError += errorEndingStr
        else :
            noMeasurementsError += monthShortTableFilter.value + " of " + errorEndingStr
        print(noMeasurementsError)
    else :
        shortTable = dfShortTable.agg(avg(col(typesPShortTableFilter.value)),\
                                      sparkMax(col(typesPShortTableFilter.value)),\
                                      sparkMin(col(typesPShortTableFilter.value)))
        shortTable = shortTable.select(col("avg(" + typesPShortTableFilter.value + ")")\
                                       .alias("Average " + typesPShortTableFilter.value),\
                                       col("max(" + typesPShortTableFilter.value + ")")\
                                       .alias("Maximum " + typesPShortTableFilter.value),\
                                       col("min(" + typesPShortTableFilter.value + ")")\
                                       .alias("Minimum " + typesPShortTableFilter.value))
        shortTable.show()

update_short_table()
#Update bij aanpassing van filters
def on_change_short_table_filter(change):
    if change['name'] == 'value' and (change['new'] != change['old']):
        clear_output()
        display(widgets.HBox((cityShortTableFilter, typesPShortTableFilter)))
        display(widgets.HBox((yearShortTableFilter, monthShortTableFilter)))
        update_short_table()

#Filters linken aan on_change_short_table_filter functie
cityShortTableFilter.observe(on_change_short_table_filter)
yearShortTableFilter.observe(on_change_short_table_filter)
monthShortTableFilter.observe(on_change_short_table_filter)
typesPShortTableFilter.observe(on_change_short_table_filter)