# Libraries

In [7]:
import numpy as np
import pandas as pd
import pyproj
from pyproj import Transformer
from datetime import date,datetime
import yaml

In [8]:
#All bokeh imports

import bokeh

from bokeh.io import show, output_notebook

from bokeh.layouts import column, row

#Used to structure the data input
from bokeh.models import ColumnDataSource

#User inputs
from bokeh.models import Slider, Button, TextInput, DateRangeSlider, DatePicker

#
from bokeh.models import CategoricalColorMapper

from bokeh.models import ColorBar, NumeralTickFormatter, Circle

from bokeh.models import LassoSelectTool, BoxSelectTool

from bokeh.palettes import Spectral6, PRGn, Turbo256
palette = Turbo256

from bokeh.plotting import figure, output_file, show

from bokeh.themes import Theme

from bokeh.tile_providers import CARTODBPOSITRON, OSM, get_provider

from bokeh.transform import linear_cmap, log_cmap

output_notebook()

# Functions

In [87]:
def web_mercator_transform(df_latitude_longitude):
    
    #Transformer to get the coordinates into web mercator format which is what the geo graph takes to plot
    transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857")
    
    transformed_values = transformer.transform(df_latitude_longitude['latitude'].values,
                                               df_latitude_longitude['longitude'].values)
    
    #Transpose the transformed values and then parse then into a dataframe
    #np.array() allows the values to be transformed (.T)
    lat_long_web_mercator = pd.DataFrame(np.array(transformed_values).T)
    
    #Set the columns to the correct format
    lat_long_web_mercator.columns = ['latitude','longitude']
    
    return(lat_long_web_mercator)

# Data Preparation

In [9]:
#Loading twitter data into the python environment

twitter_data = pd.read_csv("complete_swiss_dataset.csv",encoding = "ISO-8859-15")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [117]:
twitter_data.sort_values(by='latitude')

Unnamed: 0.1,Unnamed: 0,user_screen_name,PK,id,latitude,longitude,raw_geo,place_type,place_full_name,place_country_code,...,created_at_CET,created_at_GMT,created_at_EST,check_in_bot,weather_station_bot,Stress.overall.score,homeplace,workplace,cap_english,cap_universal
751151,751152,NeenaAbdul,917499550419636224_455593068_25.34657637_49.58...,917499550419636224,25.34658,49.584410,"{u'type': u'Point', u'coordinates': [49.584412...",country,Switzerland,CH,...,2017-10-09 23:18:33,2017-10-09 21:18:33,2017-10-09 16:18:33,False,False,0,0,0,0.001725,0.001866
320602,320603,FAISAL881,774108643327217664_340820672_26.6406441_49.916...,774108643327217664,26.64064,49.916100,"{u'type': u'Point', u'coordinates': [49.916102...",city,"Thun, Switzerland",CH,...,2016-09-09 06:54:17,2016-09-09 04:54:17,2016-09-08 23:54:17,False,False,0,0,0,0.001946,0.002308
2606,2607,_abm98,552529067665526784_1096293950_27.00729723_49.6...,552529067665526784,27.00730,49.654690,"{u'type': u'Point', u'coordinates': [49.654687...",country,Switzerland,CH,...,2015-01-06 19:16:09,2015-01-06 18:16:09,2015-01-06 13:16:09,False,False,0,0,0,0.141574,0.016452
835046,835047,r_alsube,712558295375036416_636628914_27.1146475_49.550...,712558295375036416,27.11465,49.550750,"{u'type': u'Point', u'coordinates': [49.550754...",city,"Geneva, Switzerland",CH,...,2016-03-23 09:35:10,2016-03-23 08:35:10,2016-03-23 03:35:10,False,False,0,0,0,0.012788,0.005778
58256,58257,aliasgarmg,877506317056974848_204763918_29.3306846_48.064...,877506317056974848,29.33068,48.064100,"{u'type': u'Point', u'coordinates': [48.064104...",city,"Zug, Switzerland",CH,...,2017-06-21 14:39:43,2017-06-21 12:39:43,2017-06-21 07:39:43,False,False,0,0,0,0.001635,0.001936
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
400855,400856,hantighton,568719996261679104_413095832_55.73092159_37.45...,568719996261679104,55.73092,37.453090,"{u'type': u'Point', u'coordinates': [37.453094...",city,"Bagnes, Switzerland",CH,...,2015-02-20 11:33:08,2015-02-20 10:33:08,2015-02-20 05:33:08,False,False,0,0,0,0.001262,0.001744
131836,131837,BhoyWonder_,743469602076753920_529258437_55.94907563_-3.36...,743469602076753920,55.94908,-3.362888,"{u'type': u'Point', u'coordinates': [-3.362888...",city,"Les Genevez (JU), Suisse",CH,...,2016-06-16 17:45:40,2016-06-16 15:45:40,2016-06-16 10:45:40,False,False,0,0,0,0.001554,0.002910
70051,70052,anciitje,683033906686332928_1112214211_56.95384814_24.1...,683033906686332928,56.95385,24.137370,"{u'type': u'Point', u'coordinates': [24.137374...",country,Switzerland,CH,...,2016-01-01 22:15:48,2016-01-01 21:15:48,2016-01-01 16:15:48,False,False,0,0,0,0.141574,0.067479
911081,911082,shura_sh,591326732700921856_411094616_59.9398166_30.284...,591326732700921856,59.93982,30.284800,"{u'type': u'Point', u'coordinates': [30.284800...",city,"Romoos, Schweiz",CH,...,2015-04-23 21:44:14,2015-04-23 19:44:14,2015-04-23 14:44:14,False,False,0,0,0,0.032183,0.003354


In [15]:
coord_data = twitter_data[['latitude','longitude']]
coord_data = coord_data.round(3)

In [112]:
#Lines to format a dataframe in the correct format for the geographical representation (geo_map)

#Copying the relevant columns from twitter_data into geo_map_data
geo_map_data = twitter_data[['latitude','longitude','created_at_CET']].copy(deep=True)

#Rounding all values in geo_map_data 
#Only important for longitude and latitude, due to 1. privacy and 2. computing densities
geo_map_data = geo_map_data.round(3)

#Add a new colunn to keep track of the original ID of the row (tweet)
geo_map_data.loc[:,'ID'] = geo_map_data.index

geo_map_data.index = pd.to_datetime(geo_map_data.index, format='%Y-%m-%d %H:%M:%S')

#Set the date of creation as the index which allows for querying the data using dates
geo_map_data = geo_map_data.set_index(['created_at_CET'])

lat_long_correct_format = web_mercator_transform(geo_map_data[['latitude','longitude']])

geo_map_data.loc[:,'latitude'] = np.array(lat_long_correct_format['latitude'])

geo_map_data.loc[:,'longitude'] = np.array(lat_long_correct_format['longitude'])

In [102]:
geo_map_data.drop_duplicates(subset=['latitude','longitude'])[0:10]

Unnamed: 0_level_0,latitude,longitude,ID
created_at_CET,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-12-05 22:47:22,845248.9,6034294.0,0
2017-10-07 08:52:47,955455.2,6016002.0,1
2017-03-29 00:36:11,685282.8,5812779.0,2
2015-06-27 12:47:23,844692.3,6033634.0,3
2015-01-05 14:06:44,1058982.0,5918113.0,4
2015-04-13 10:46:21,1061097.0,5917299.0,5
2016-12-10 18:11:51,769217.7,5850011.0,6
2016-04-03 07:31:38,881873.0,5997748.0,7
2015-01-04 16:32:30,693631.7,5819698.0,8
2017-07-10 16:06:48,788587.3,5874597.0,17


In [108]:
geo_map_data.sort_values(by='created_at_CET')

Unnamed: 0_level_0,latitude,longitude,ID
created_at_CET,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-01-01 01:00:03,951559.007301,6.017154e+06,577072
2015-01-01 01:00:09,977051.170693,5.806027e+06,372682
2015-01-01 01:00:14,771332.751707,5.838228e+06,921301
2015-01-01 01:00:22,694856.261532,5.841778e+06,151568
2015-01-01 01:00:34,977051.170693,5.806027e+06,372724
...,...,...,...
2018-09-06 16:51:13,916382.048210,5.909327e+06,118483
2018-09-06 16:53:31,684169.590415,5.812457e+06,238174
2018-09-06 16:53:53,679382.852311,5.817445e+06,570773
2018-09-06 16:54:32,810739.851447,5.950239e+06,518695


In [111]:
geo_map_data['2015-01-01':'2015-02-01']

KeyError: '2015-01-01'

In [76]:
temp = geo_map_data[['latitude','longitude']]

In [84]:
geo_map_data[['latitude','longitude']]

Unnamed: 0_level_0,latitude,longitude
created_at_CET,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-12-05 22:47:22,47.562,7.593
2017-10-07 08:52:47,47.451,8.583
2017-03-29 00:36:11,46.202,6.156
2015-06-27 12:47:23,47.558,7.588
2015-01-05 14:06:44,46.853,9.513
...,...,...
2015-11-26 15:03:17,47.367,8.550
2015-11-28 22:38:34,46.872,9.537
2018-07-14 07:49:02,46.231,6.132
2018-04-17 03:43:48,46.590,7.906


In [79]:
temp1 = transformer.transform(temp['latitude'].values,
                             temp['longitude'].values)

In [80]:
temp1

(array([ 845248.89359333,  955455.18947867,  685282.78532339, ...,
         682611.11754435,  880091.89421162, 1068221.83365225]),
 array([6034294.12691147, 6016001.97297282, 5812779.16327661, ...,
        5817444.72286644, 5875406.53685592, 5888212.81523809]))

In [86]:
web_mercator_transform(geo_map_data[['latitude','longitude']])

Unnamed: 0,latitude,longitude
0,8.452489e+05,6.034294e+06
1,9.554552e+05,6.016002e+06
2,6.852828e+05,5.812779e+06
3,8.446923e+05,6.033634e+06
4,1.058982e+06,5.918113e+06
...,...,...
1115888,9.517816e+05,6.002185e+06
1115889,1.061654e+06,5.921206e+06
1115890,6.826111e+05,5.817445e+06
1115891,8.800919e+05,5.875407e+06


In [12]:
def dev_bkapp(doc):

    #Add tile_provider, which shows the world in web mercator format
    tile_provider = get_provider(CARTODBPOSITRON)

    # range bounds supplied in web mercator coordinates
    geo_map = figure(x_range=(650000, 1200000), y_range=(5700000, 6100000),
               x_axis_type="mercator", y_axis_type="mercator")
    #Add the background of the graph
    geo_map.add_tile(tile_provider)
    
    #Add two selector tools to the geo graph
    geo_map.add_tools(LassoSelectTool())
    geo_map.add_tools(BoxSelectTool())

    #Transformer to get the coordinates into the correct format
    #TODO: format the data before its parsed
    transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857")
    
    #Copy part of the dataframe to show those data points on the geo graph
    dev_coord_density = dev_coord_data_time.loc['2015-1-1':'2015-2-1'].copy()
        
    #Sort the data points by latitude and longitude
    dev_coord_density = dev_coord_density.sort_values(by=['latitude','longitude'])

    #Rewrite the index with 0 .. length of dataframe, as time has already been specified
    #And this allows for the next step: computing density
    dev_coord_density = dev_coord_density.set_index(np.arange(0,len(dev_coord_density),1))

    #Remove the duplicates from the datagframe, checking only the latitude and longitude
    dev_coord_density_no_dup = dev_coord_density.drop_duplicates(subset=['latitude','longitude'])

    #Create a new array of ones, to compute the densities
    density = np.ones(len(dev_coord_density_no_dup))

    #Loop the dataframe and set the densities to the difference in index
    #Removing duplicates keeps the first index of the duplicates,
    #so the gaps in indicices are equal to the amount of duplicates present
    for i in range(0,len(dev_coord_density_no_dup)-1):
        density[i] = dev_coord_density_no_dup.index[i+1] - dev_coord_density_no_dup.index[i]

    #Add density as a column of the dataframe
    dev_coord_density_no_dup.loc[:,'density'] = density

    #Transform all the coordinates to web mercator format
    dev_merc_coord = transformer.transform(dev_coord_density_no_dup['latitude'].values,
                                           dev_coord_density_no_dup['longitude'].values)
    
    #Reformat the web mercator coordinates to fit the correct format
    plot_data = pd.DataFrame(np.array(dev_merc_coord).T)
    plot_data.columns = ['longitude','latitude']

    #Add the density column to the dataframe
    plot_data.loc[:,'density'] = density

    #Set the source of the graph to the ColumnDataSource of the dataframe
    source = ColumnDataSource(plot_data)

    #Create a mapping of color based on the max and min densities in the dataframe
    color_mapper = log_cmap(field_name = 'density', palette = palette,
                               low = plot_data.loc[:,'density'].min(), high = plot_data.loc[:,'density'].max())

    #Defines color bar, which shows the scale of the color_mapper
    color_bar = ColorBar(color_mapper=color_mapper['transform'], 
                         formatter = NumeralTickFormatter(format='0.0[0000]'), 
                         label_standoff = 13, width=8, location=(0,0))# Set color_bar location

    #Add the color bar to the graph on the right side
    geo_map.add_layout(color_bar, 'right')
    
    #Add the data points as circles to the geo graph
    geo_map.circle(x='longitude', y='latitude', size=2, alpha=0.7, source=source, color=color_mapper)
    
    #Define a handler which is called when the coupled button is pressed, coupling occurs later
    def my_button_handler(new):
        
        start_date = start_date_input.value
        
        end_date = end_date_input.value
        
        dev_coord_density = dev_coord_data_time.loc[start_date:end_date].copy()
        
        dev_coord_density = dev_coord_density.sort_values(by=['latitude','longitude'])
        
        dev_coord_density = dev_coord_density.set_index(np.arange(0,len(dev_coord_density),1))
        
        dev_coord_density_no_dup = dev_coord_density.drop_duplicates(subset=['latitude','longitude'])
        
        density = np.ones(len(dev_coord_density_no_dup))
        
        for i in range(0,len(dev_coord_density_no_dup)-1):
            density[i] = dev_coord_density_no_dup.index[i+1] - dev_coord_density_no_dup.index[i]
        
        dev_merc_coord = transformer.transform(dev_coord_density_no_dup['latitude'].values,
                                               dev_coord_density_no_dup['longitude'].values)
        
        plot_data = pd.DataFrame(np.array(dev_merc_coord).T)
        plot_data.columns = ['longitude','latitude']

        plot_data.loc[:,'density'] = density

        color_mapper = log_cmap(field_name = 'density', palette = palette,
                                   low = plot_data.loc[:,'density'].min(), high = plot_data.loc[:,'density'].max())
        
        color_bar.color_mapper=color_mapper['transform']
        
        source.data = plot_data

    #Create a button for the updating of geo graph
    button = Button(label="Update", button_type="success")

    #Couple the button to the handler defined above
    button.on_click(my_button_handler)
    
    #Create a button for the updating of the histogram
    dev_button = Button(label="Print", button_type="success")

    #Temporary handler for dev_button which should couple to the histogram
    def temp():
        selected = source.selected.indices
        
        hist.data['x'] = selected
        
        print(selected)
        
        print(plot_data[selected[0]])
    
    dev_button.on_click(temp)
    
    start_date_input = DatePicker(value="2015-01-01", title="Start Date:")
    
    end_date_input = DatePicker(value="2015-02-01", title="End Date:")

    histogram = figure()
    
    hist = ColumnDataSource(data=dict(x=[0,1,2,3,4,5]))
    
    histogram.circle(x='x', size=10, alpha=0.7, source=hist)
    
    doc.add_root(row(column(geo_map,row(start_date_input,end_date_input),row(button)),column(histogram,dev_button)))

In [11]:
show(dev_bkapp)

ERROR:tornado.application:Uncaught exception GET /autoload.js?bokeh-autoload-element=1003&bokeh-absolute-url=http://localhost:50265&resources=none (127.0.0.1)
HTTPServerRequest(protocol='http', host='localhost:50265', method='GET', uri='/autoload.js?bokeh-autoload-element=1003&bokeh-absolute-url=http://localhost:50265&resources=none', version='HTTP/1.1', remote_ip='127.0.0.1')
Traceback (most recent call last):
  File "C:\Users\Laptop\Anaconda3\envs\Master Project\lib\site-packages\tornado\web.py", line 1703, in _execute
    result = await result
  File "C:\Users\Laptop\Anaconda3\envs\Master Project\lib\site-packages\bokeh\server\views\autoload_js_handler.py", line 60, in get
    session = await self.get_session()
  File "C:\Users\Laptop\Anaconda3\envs\Master Project\lib\site-packages\bokeh\server\views\session_handler.py", line 120, in get_session
    session = await self.application_context.create_session_if_needed(session_id, self.request, token)
  File "C:\Users\Laptop\Anaconda3\en