In [None]:
import findspark
findspark.init()

import datetime
import pyparsing as pp
import ipywidgets as ipy

from pyspark import sql, SparkConf, SparkContext
from pyspark.sql.functions import col, to_date
from IPython.display import display, clear_output
from ipywidgets import Output, VBox, widgets, interact
from prettytable import PrettyTable

In [None]:
conf = SparkConf().setAppName("Read_CSV")
sc = SparkContext(conf=conf)
sqlContext = sql.SQLContext(sc)

dfPollution = sqlContext.read.csv("Luchtvervuiling.csv", header=True)

In [None]:
MONTHS = ['all months','january','february','march','april','may','june','july','august','september','october',\
          'november','december']
TYPES_P = ['so2', 'no2', 'rspm','spm']

#Haal alle waarden uit date kolom met als doel alle verschillende jaren in een lijst te steken voor filter
date_rows = dfPollution.select(to_date(dfPollution.date).alias('to_date')).collect()
years = []
for row in date_rows:
    try:
        if row.to_date.year not in years:
            years.append(row.to_date.year)
    except:
        pass
years.sort(reverse=True)

#Haal alle waarden uit location kolom met als doel alle verschillende steden in een lijst te steken voor filter
location_rows = dfPollution.select("location").distinct().collect()
locations = [str(row['location']) for row in location_rows]
locations.sort()

In [None]:
#Filters
cityTableFilter = widgets.Dropdown(
    options = locations,
    value = locations[0],
    description = 'City:',
    disabled = False,
)
yearTableFilter = widgets.Dropdown(
    options = years,
    value = years[0],
    description = 'Year:',
    disabled = False,
)
monthTableFilter = widgets.Dropdown(
    options = MONTHS,
    value = MONTHS[0],
    description = 'Month:',
    disabled = False,
)
display(widgets.HBox((cityTableFilter, yearTableFilter, monthTableFilter)))

def update_table():
    #Data van dfPollution die getoond moet worden overzetten naar dfTable
    dfTable = dfPollution.select(dfPollution['date'], dfPollution['state'], dfPollution['location'], dfPollution['type'], \
                                 dfPollution['so2'], dfPollution['no2'], dfPollution['rspm'], dfPollution['spm'])
    if str(monthTableFilter.value) == "all months" :
        dfTable = dfTable.filter((col("date").between(str(yearTableFilter.value) + "-01-01",\
                                                      str(yearTableFilter.value) + "-12-31"))\
                                & (col("location") == cityTableFilter.value))
    else :
        month = MONTHS.index(monthTableFilter.value)
        if month < 10 :
            month = "0" + str(month)
        month = str(month)
        dfTable = dfTable.filter((col("date").between(str(yearTableFilter.value) + "-" + month + "-01",\
                                                      str(yearTableFilter.value) + "-" + month + "-31"))\
                                 & (col("location") == cityTableFilter.value))
    
    dfTable = dfTable.sort(col("date"))
    count = dfTable.count()
    if count == 0 :
        noMeasurementsError = "No measurements have taken place at " + cityTableFilter.value + " during "
        errorEndingStr = "the year " + str(yearTableFilter.value) + "."
        if str(monthTableFilter.value) == "all months" :
            noMeasurementsError += errorEndingStr
        else :
            noMeasurementsError += monthTableFilter.value + " of " + errorEndingStr
        print(noMeasurementsError)
    else :
        dfTable.show(dfTable.count())

update_table()
#Update bij aanpassing van filters
def on_change_table_filter(change):
    if change['name'] == 'value' and (change['new'] != change['old']):
        clear_output()
        display(widgets.HBox((cityTableFilter, yearTableFilter, monthTableFilter)))
        update_table()

#Filters linken aan on_change_table_filter functie
cityTableFilter.observe(on_change_table_filter)
yearTableFilter.observe(on_change_table_filter)
monthTableFilter.observe(on_change_table_filter)

In [None]:
#Filters
cityShortTableFilter = widgets.Dropdown(
    options = locations,
    value = locations[0],
    description = 'City:',
    disabled = False,
)
yearShortTableFilter = widgets.Dropdown(
    options = years,
    value = years[0],
    description = 'Year:',
    disabled = False,
)
monthShortTableFilter = widgets.Dropdown(
    options = MONTHS,
    value = MONTHS[0],
    description = 'Month:',
    disabled = False,
)
typesPShortTableFilter = widgets.Dropdown(
    options = TYPES_P,
    value = TYPES_P[0],
    description = 'Pollution type:',
    disabled = False,
)
display(widgets.HBox((cityShortTableFilter, typesPShortTableFilter)))
display(widgets.HBox((yearShortTableFilter, monthShortTableFilter)))

def update_short_table():
    #Data van dfPollution die getoond moet worden overzetten naar dfShortTable
    dfShortTable = dfPollution.select(dfPollution['date'], dfPollution['state'], dfPollution['location'],\
                                      dfPollution['type'], dfPollution[typesPShortTableFilter.value])
    if str(monthShortTableFilter.value) == "all months" :
        dfShortTable = dfShortTable.filter((col("date").between(str(yearShortTableFilter.value) + "-01-01",\
                                                      str(yearShortTableFilter.value) + "-12-31"))\
                                & (col("location") == cityShortTableFilter.value))
    else :
        month = MONTHS.index(monthShortTableFilter.value)
        if month < 10 :
            month = "0" + str(month)
        month = str(month)
        dfShortTable = dfShortTable.filter((col("date").between(str(yearShortTableFilter.value) + "-" + month + "-01",\
                                                      str(yearShortTableFilter.value) + "-" + month + "-31"))\
                                 & (col("location") == cityShortTableFilter.value))
    
    dfShortTable = dfShortTable.sort(col("date"))
    count = dfShortTable.count()
    if count == 0 :
        noMeasurementsError = "No measurements have taken place at " + cityShortTableFilter.value + " during "
        errorEndingStr = "the year " + str(yearShortTableFilter.value) + "."
        if str(monthShortTableFilter.value) == "all months" :
            noMeasurementsError += errorEndingStr
        else :
            noMeasurementsError += monthShortTableFilter.value + " of " + errorEndingStr
        print(noMeasurementsError)
    else :
        shortTable = PrettyTable(['Average ' + typesPShortTableFilter.value, 'Highest ' + typesPShortTableFilter.value,\
                                  'lowest ' + typesPShortTableFilter.value])
        print(shortTable)
        dfShortTable.show(count)

update_short_table()
#Update bij aanpassing van filters
def on_change_short_table_filter(change):
    if change['name'] == 'value' and (change['new'] != change['old']):
        clear_output()
        display(widgets.HBox((cityShortTableFilter, typesPShortTableFilter)))
        display(widgets.HBox((yearShortTableFilter, monthShortTableFilter)))
        update_short_table()

#Filters linken aan on_change_short_table_filter functie
cityShortTableFilter.observe(on_change_short_table_filter)
yearShortTableFilter.observe(on_change_short_table_filter)
monthShortTableFilter.observe(on_change_short_table_filter)
typesPShortTableFilter.observe(on_change_short_table_filter)