In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import numpy as np
import pandas as pd
from pyproj import Transformer
from datetime import date,datetime



In [3]:
#Import general library
import bokeh

#Import to handle data parsed to bokeh plots
from bokeh.models import ColumnDataSource

#Import to handle bokeh notebook
from bokeh.io import output_notebook

#Import for the background of the map plot
from bokeh.tile_providers import CARTODBPOSITRON, get_provider, CARTODBPOSITRON_RETINA, STAMEN_TERRAIN, STAMEN_TONER, WIKIMEDIA

#Import to create plot, show plot, and output plot
from bokeh.plotting import figure, output_file, show

#Import to handle layout of the bokeh plots
from bokeh.layouts import column, row, layout

#Importing the RangeTool used in the Timeline plot
from bokeh.models import RangeTool

#Import to set the range of the Timeline
from bokeh.models import Range1d

#Import for the button and datepicker widget
from bokeh.models import Button, DatePicker

#Import for callback on an event
from bokeh.events import ButtonClick

#Import palette for the color mapper used in geo_map
#Possible other color palettes
# https://colorcet.holoviz.org/user_guide/index.html
import colorcet as cc
palette = cc.CET_L9

palette_geo_map = cc.CET_D11

#Import palette for the histogram colors
from bokeh.palettes import Spectral

#palette = Spectral


#Import log_cmap to support the color mapper for the geo_map
from bokeh.transform import log_cmap, linear_cmap

#Importing the select tools for bokeh plots
from bokeh.models import LassoSelectTool, BoxSelectTool

#Import the tools for the color bar
from bokeh.models import ColorBar, LogTicker
#Other option for ticker: AdaptiveTicker

#Import legend to show the legend for the timeline and histogram
from bokeh.models import Legend

#Import for the histogram model
from bokeh.models import FactorRange

#Import for the coloring of the bars in the histogram plot
from bokeh.transform import factor_cmap

#Import to show multiple tabs
from bokeh.models import Panel, Tabs

#Imports for the weather map color and slider
from bokeh.models import CategoricalColorMapper, Slider

#Import to update the slider and weather data over time
from bokeh.io import curdoc

output_notebook()

## Assumptions

In [4]:
#screen resolution
screen_height = 1080
screen_length = 1920

#Location of dataset csv
location_twitter_dataset = "C:/Users/DESKTOP-2/Documents/Courses/Master Project/Data/complete_swiss_dataset.csv"

location_weather_data = "D:/Master Project/Extracted Radar Data/"

## Data retrieval

In [6]:
#Function to retrieve the csv storing the weather data
def retrieve_weather_data(measurement, year, day):
    
    #Check whether the parameters passed into the function are valid
    if(len(measurement) == 0 or year < 0 or day < 0 or day > 370):
        print("Error parameters incorrect retrieve_weather_data")
        return -1
    
    weather_data = pd.read_csv(location_weather_data+measurement+str(year)+str(day)+".csv")
    del weather_data['Unnamed: 0']
    
    times = np.unique(weather_data['timestamp'])
    
    times_dict = {}

    for time in times:
        times_dict[time] = datetime.fromtimestamp(time)
        
    weather_data['datetime'] = weather_data['timestamp'].map(times_dict)
    
    weather_data['time_period'] = weather_data['datetime'].dt.strftime('%d - %b %H:%M')
    
    weather_data['value'] = weather_data['value'].astype(str)
    
    #transformer_merc = Transformer.from_crs("EPSG:4326", "EPSG:3857") (for verification)
    #np.array(transformer_merc.transform([45.8,47.9,45.8,47.9],[5.9,5.9,10.6,10.6])).T (for verification)
    
    #Filter based on the coordinates 45.8, 5.9 and 47.9,10.6 which encompass switzerland
    #Translated to web mercator
    geo_filter = np.where((weather_data['x_mercator'] <= 1170000) &
                          (weather_data['x_mercator'] >= 657000) & 
                          (weather_data['y_mercator'] <= 6090000) & 
                          (weather_data['y_mercator'] >= 5750000))
    
    return weather_data.iloc[geo_filter]

In [7]:
#Columns that we want to keep from the data we load in
import_columns = ['created_at_CET','latitude','longitude','Overall.score','Anger','Confusion',
                  'Disgust','Fear','Happiness','Sadness','Shame','Surprise']

#Load the data into the python environment
twitter_data = pd.read_csv(location_twitter_dataset,encoding = "ISO-8859-15",low_memory=False)

#Select only the columns that are relevant
#Timeline Map Histogram data (TMH_data)
TMH_data = twitter_data[import_columns].sort_values(by='created_at_CET',ignore_index=True)

#Round all entries in dataframe
#Mainly to remove specific gps locations of users
TMH_data = TMH_data.round(3)

#Format the string input to datetime
TMH_data['created_at_CET'] = pd.to_datetime(TMH_data['created_at_CET'], format='%Y-%m-%d %H:%M:%S')

#Obtain a list of tweets which are within the specified coordinates
geo_bounds = np.where((TMH_data['latitude'] >= 45.8) &
                      (TMH_data['latitude'] <= 47.9) &
                      (TMH_data['longitude'] >= 5.9) &
                      (TMH_data['longitude'] <= 10.6))[0]

#Set the data to only the points within the bounds
TMH_data = TMH_data.iloc[geo_bounds]

#Sort the data by the date at creation
TMH_data = TMH_data.sort_values(by='created_at_CET')

#Removing the entire dataset now that the selection has been made
twitter_data = None



In [8]:
#Declare transformer to convert from epsg4326 to epsg 3856 (web mercator)
transformer_merc = Transformer.from_crs("EPSG:4326", "EPSG:3857")

#Transform the coordinates from epsg 4326 to epsg 3857
mercator_transform = transformer_merc.transform(TMH_data['latitude'].values,
                                                TMH_data['longitude'].values)

#
TMH_data['x_mercator'] = mercator_transform[0].round(1)
TMH_data['y_mercator'] = mercator_transform[1].round(1)

#Declare transformer to convert from epsg4326 to epsg 21781 (swiss coordinate system)
transformer_swiss = Transformer.from_crs("EPSG:4326", "EPSG:21781")

#Transform the coordinates from epsg 4326 to epsg 3857
swiss_transform = transformer_swiss.transform(TMH_data['latitude'].values,
                                                 TMH_data['longitude'].values)

#
TMH_data['x_swiss'] = (swiss_transform[0] / 1e3).round()
TMH_data['y_swiss'] = (swiss_transform[1] / 1e3).round()

TMH_data['ID'] = TMH_data.index

In [9]:
#Function to compute the map points and their densities given a certain timeframe
def compute_map_data(IDs = []):
    
    #Copy the relevant data from the dataframe
    if(len(IDs) == 0):
        #If no IDs have been parsed, then return all elements
        Map_data = TMH_data.iloc[:][['x_mercator','y_mercator']].sort_values(by=['x_mercator','y_mercator']).copy()
    else:
        Map_data = TMH_data.iloc[IDs][['x_mercator','y_mercator']].sort_values(by=['x_mercator','y_mercator']).copy()

    #Reset the index for the calculation of densities
    Map_data = Map_data.set_index(np.arange(0,len(Map_data)))

    #Drop all the duplciates, which keeps the first occuring entry in the dataframe
    Map_data = Map_data.drop_duplicates(subset=['x_mercator','y_mercator'])

    #Declare an array the length of the coordinates without duplicates
    #Will represent the density at each coordinate
    density = np.ones(len(Map_data))

    #Loop the array with all the coordinates
    for i in range(0,len(Map_data)-1):
        density[i] = Map_data.index[i+1] - Map_data.index[i]

    #Add the column to the Map_data
    Map_data['density'] = density
    
    return Map_data

In [10]:
#Create a dataframe that can be easily queried by time
time_table = TMH_data[['created_at_CET','ID']]
time_table.index = time_table['created_at_CET']
del time_table['created_at_CET']

In [11]:
BZC = retrieve_weather_data("BZC",19,242)

In [12]:
CZC = retrieve_weather_data("CZC",16,147)

In [13]:
#BZC = retrieve_weather_data("BZC",19,242)
#CZC = retrieve_weather_data("CZC",19,242)
#LZC = retrieve_weather_data("LZC",19,242)
#RZC = retrieve_weather_data("RZC",19,242)

In [14]:
CZC['value'] = CZC['value'].astype(float)

In [15]:
#Define the weather app
def W_app(doc):

    #load the background of the map plot
    tile_provider = get_provider(CARTODBPOSITRON_RETINA)

    #Create the Map figure
    # range bounds supplied in web mercator coordinates
    weather_map = figure(x_range=(650000, 1180000), y_range=(5700000, 6100000),
                         x_axis_type="mercator", y_axis_type="mercator",
                         plot_width=int(screen_length*0.5), plot_height=int(screen_height*0.5),
                         toolbar_location="above",title="Weather detected by radar (CZC)")
    
    #Add the background to the map
    weather_map.add_tile(tile_provider)
    
    time_periods = np.unique(CZC[['time_period']])
    
    weather_data = CZC.iloc[np.where(CZC['time_period'] == time_periods[0])]
    
    #Load in the map data
    source_weather = ColumnDataSource(weather_data)
    
    #Initialize the color mapper based on the values of density in the map_data

    #color_mapper = CategoricalColorMapper(palette=Spectral[11], factors=np.unique(BZC['value'].astype(str)))
    color_mapper = linear_cmap(field_name = 'value', palette = palette,
                               low = CZC['value'].min(),
                               high = CZC['value'].max())
    
    #Defines color bar which indicates the levels
    #color_bar = ColorBar(color_mapper=color_mapper['transform'], 
    #                 ticker = LogTicker(num_minor_ticks=1), 
    #                 label_standoff = 13, width=20, location=(0,0))# Set color_bar location
    
    #Add the color bar to the figure
    #weather_map.add_layout(color_bar, 'left')
    
    #Plot the data points with their color corresponding to the amount of tweets at the location
    #geo_map.circle(x='x_mercator', y='y_mercator', size=2, color=color_mapper, alpha=1, source=source_map)
    
    weather_map.rect(x='x_mercator', y='y_mercator',
                     width=2000, height=2000,
                     alpha=1, source=source_weather,
                    fill_color=color_mapper, #dict(field='value',transform=color_mapper),
                    line_alpha=0)
    
    slider = Slider(start=0, end=len(time_periods)-1, value=0, step=1, title=time_periods[0],show_value=False)
    
    #Handler to change the title to the corresponding date
    def slider_title_handler(attr,old,new):
        slider.title = time_periods[slider.value]
        
    #Handler to adjust the data being shown
    def slider_to_map_handler(attr,old,new):
        new_weather_data = CZC.iloc[np.where(CZC['time_period'] == time_periods[slider.value])]
        source_weather.data = new_weather_data
    
    slider.on_change('value',slider_title_handler)
    slider.on_change('value',slider_to_map_handler)
    
    def animate_weather_update():
        new_index = slider.value + 1
        if(new_index > len(time_periods)-1):
            new_index = 0
        slider.value = new_index
    
    def animate_weather():
        global callback_id
        if play_pause_slider_button.label == '► Play':
            play_pause_slider_button.label = '❚❚ Pause'
            callback_id = curdoc().add_periodic_callback(animate_weather_update, 400)
        else:
            play_pause_slider_button.label = '► Play'
            curdoc().remove_periodic_callback(callback_id)
            
    play_pause_slider_button = Button(label='► Play', width=60)
    play_pause_slider_button.on_click(animate_weather)
    
    ### Map (START) ###
    
    #Create the Map figure
    # range bounds supplied in web mercator coordinates
    geo_map = figure(x_range=weather_map.x_range, y_range=weather_map.y_range,
                         x_axis_type="mercator", y_axis_type="mercator",
                         plot_width=int(screen_length*0.55), plot_height=int(screen_height*0.5),
                     toolbar_location="above",title="Density of tweets")
    
    #Add the background to the map
    geo_map.add_tile(tile_provider)
    
    
    #Add the selector tools Lasso and Boxselect
    geo_map.add_tools(LassoSelectTool())
    geo_map.add_tools(BoxSelectTool())
    
    #Compute the data for the map
    map_data = compute_map_data(time_table['2015-05-26':'2015-05-28']['ID'].values)
    
    #Load in the map data
    source_map = ColumnDataSource(map_data)
    
    #Initialize the color mapper based on the values of density in the map_datat
    color_mapper = log_cmap(field_name = 'density', palette = palette_geo_map,
                               low = map_data['density'].min(),
                               high = map_data['density'].max())
    
    #Defines color bar which indicates the levels
    color_bar = ColorBar(color_mapper=color_mapper['transform'], 
                     ticker = LogTicker(num_minor_ticks=1), 
                     label_standoff = 5, width=10, location=(0,0))# Set color_bar location
    
    #Add the color bar to the figure
    geo_map.add_layout(color_bar, 'right')
    
    #Plot the data points with their color corresponding to the amount of tweets at the location
    geo_map.circle(x='x_mercator', y='y_mercator', radius=70, color=color_mapper, alpha=1, source=source_map)
    
    ### Map (END) ###

    plots = layout([
        [weather_map,geo_map],
        [slider,play_pause_slider_button],
        
    ])
    doc.add_root(plots)



In [16]:
show(W_app)