### Imports

In [1]:
import numpy as np
import pandas as pd
from pyproj import Transformer
from datetime import date,datetime

In [818]:
#Import general library
import bokeh

#Import to handle data parsed to bokeh plots
from bokeh.models import ColumnDataSource

#Import to handle bokeh notebook
from bokeh.io import output_notebook

#Import for the background of the map plot
from bokeh.tile_providers import CARTODBPOSITRON, get_provider

#Import to create plot, show plot, and output plot
from bokeh.plotting import figure, output_file, show

#Import to handle layout of the bokeh plots
from bokeh.layouts import column, row, layout

#Importing the RangeTool used in the Timeline plot
from bokeh.models import RangeTool

#Import to set the range of the Timeline
from bokeh.models import Range1d

#Import for the button and datepicker widget
from bokeh.models import Button, DatePicker

#Import for callback on an event
from bokeh.events import ButtonClick

#Import palette for the color mapper used in geo_map
from bokeh.palettes import Turbo256
palette = Turbo256

#Import log_cmap to support the color mapper for the geo_map
from bokeh.transform import log_cmap

#Importing the select tools for bokeh plots
from bokeh.models import LassoSelectTool, BoxSelectTool

output_notebook()

### Assumptions

In [256]:
#screen resolution
screen_height = 680
screen_length = 1208

### Data import and preparation

In [4]:
#Columns that we want to keep from the data we load in
import_columns = ['created_at_CET','latitude','longitude','Overall.score','Anger','Confusion',
                  'Disgust','Fear','Happiness','Sadness','Shame','Surprise']

#Load the data into the python environment
twitter_data = pd.read_csv("D:/Master Project/complete_swiss_dataset.csv",encoding = "ISO-8859-15",low_memory=False)

#Select only the columns that are relevant
#Timeline Map Histogram data (TMH_data)
TMH_data = twitter_data[import_columns].sort_values(by='created_at_CET',ignore_index=True)

#Round all entries in dataframe
#Mainly to remove specific gps locations of users
TMH_data = TMH_data.round(3)

#Format the string input to datetime
TMH_data['created_at_CET'] = pd.to_datetime(TMH_data['created_at_CET'], format='%Y-%m-%d %H:%M:%S')

#Sort the data by the date at creation
TMH_data = TMH_data.sort_values(by='created_at_CET')

#Removing the entire dataset now that the selection has been made
twitter_data = None

In [5]:
#Declare transformer to convert from epsg4326 to epsg 3856 (web mercator)
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857")

#Transform the coordinates from epsg 4326 to epsg 3857
mercator_transform = transformer.transform(TMH_data['latitude'].values,
                                           TMH_data['longitude'].values)

#
TMH_data['x_mercator'] = mercator_transform[0].round(1)
TMH_data['y_mercator'] = mercator_transform[1].round(1)

5

### Timeline bokeh plot

In [621]:
%%time

#Declare a variable to keep track of the current day and year
current_day = (TMH_data['created_at_CET'][0]).dayofyear
current_year = (TMH_data['created_at_CET'][0]).year

#Declare a counter for the frequency on a given day
freq_counter = 0
#Declare a list to store the frequencies on each day
freq_list = []

for date in TMH_data['created_at_CET']:
    if(current_day == date.dayofyear):
        freq_counter += 1
    else:
        
        freq_list.append((datetime.strptime(str(current_year)+str(current_day),"%Y%j"),
                          freq_counter))
        
        if(current_year == date.year):
            current_day = date.dayofyear
        else:
            current_year = date.year
            current_day = date.dayofyear
        freq_counter = 1
        
freq_list.append((datetime.strptime(str(current_year)+str(current_day),"%Y%j"),
                          freq_counter))

freq_list = np.array(freq_list).T

Wall time: 2.48 s


In [658]:
def timeline(doc):
    
    #Create the Timeline figure
    Timeline = figure(background_fill_color="#fafafa",
                      x_axis_type='datetime',
                      #y_axis_type=None,
                      tools="pan,box_zoom,wheel_zoom,reset",
                      toolbar_location="above",
                      plot_width=int(screen_length*0.7),
                      plot_height=int(screen_height*0.3))
    
    #Load the data for the timeline into a ColumnDataSource for the plot
    source = ColumnDataSource(data=dict(date=freq_list[0], freq=freq_list[1]))
    
    #Plot a line on the graph
    Timeline.line('date','freq',source=source)
    
    #Declare a button which will update the values in Map according to the time period
    button = Button(label="Update Time Period Map", button_type="success",
                   width = int(screen_length*0.05),
                   height = int(screen_height*0.075))

    #Callback to handle the changing x range of the range tool
    #Will update the corresponding start and end dates in the datepickers
    #datepickers: start_date_input and end_date_input
    def range_tool_to_datepicker_handler(attr,old,new):

        #If the starting value of the interval is a float then the user has moved the range
        if(type(range_tool.x_range.start) == float):
            
            #Extract the start value of the interval and convert it into correct format for datepicker
            new_start_date = datetime.fromtimestamp(range_tool.x_range.start / 1e3).date()
            
            #Check whether the date is less than the minimum of of the data
            if(new_start_date >= min_date_interval):
                #If the start date is later than the global minimum then set it as the new date
                start_date_input.value = new_start_date
            else:
                #If the date is before the global minimum then take the global minimum as the start of the range
                start_date_input.value = min_date_interval

        #If the ending value of the interval is a float then the user has moved the range
        if(type(range_tool.x_range.end) == float):
            
            #Extract the end value of the interval and convert it into correct format for datepicker
            new_end_date = datetime.fromtimestamp(range_tool.x_range.end / 1e3).date()
            
            #Check whether the date is less than the maximum of of the data
            if(new_end_date <= max_date_interval):
                #If the new end date is before the global maximum
                #Set the end date to the new end date
                end_date_input.value = new_end_date
            else:
                #If the value is after the global maximum
                #then set the value to the global maximum
                end_date_input.value = max_date_interval

    #Callback for when the datepicker of starting date changes
    def date_picker_to_range_tool_handler_start(attr,old,new):
        range_tool.x_range.start = datetime.fromisoformat(start_date_input.value).timestamp() * 1e3
        
    #Callback for when the datepicker of ending date changes
    def date_picker_to_range_tool_handler_end(attr,old,new):
        range_tool.x_range.end = datetime.fromisoformat(end_date_input.value).timestamp() * 1e3
    
    
    #button.on_click(date_picker_to_range_tool_handler)
    
    #Set the global max and min for the date ranges
    min_date_interval = TMH_data.iloc[0]['created_at_CET'].date()
    max_date_interval = TMH_data.iloc[-1]['created_at_CET'].date()
    
    #Create a datepicker for the start date
    start_date_input = DatePicker(title='Start date', value=min_date_interval,
                            min_date=min_date_interval, max_date=max_date_interval,
                            width = int(screen_length*0.1),
                            height = int(screen_height*0.075))
    
    #Link the datepicker to the callback for when the value changes
    start_date_input.on_change('value',date_picker_to_range_tool_handler_start)
    
    #Create a datepicker for the end date
    end_date_input = DatePicker(title='End date', value=max_date_interval,
                          min_date=min_date_interval, max_date=max_date_interval,
                          width = int(screen_length*0.1),
                          height = int(screen_height*0.075))
    
    #Link the datepicker to the callback for when the value changes
    end_date_input.on_change('value',date_picker_to_range_tool_handler_end)
    
    #Declare a RangeTool for the selection of the date ranges
    range_tool = RangeTool(x_range=Range1d(start=TMH_data.iloc[0]['created_at_CET'],
                                           end=TMH_data.iloc[-1]['created_at_CET']))
    range_tool.overlay.fill_color = "navy"
    range_tool.overlay.fill_alpha = 0.1
    
    #Link the range_tool to the callbacks for its interval
    range_tool.x_range.on_change('start',range_tool_to_datepicker_handler)
    range_tool.x_range.on_change('end',range_tool_to_datepicker_handler)
    
    #Add the range_tool to the Timeline plot
    Timeline.add_tools(range_tool)
    Timeline.toolbar.active_multi = range_tool
    
    #Create the layout for the Timeline plot
    plots = layout([
        [Timeline,[start_date_input,end_date_input,button]],
    ])
    #Add the plot to doc
    doc.add_root(plots)

In [659]:
show(timeline)

### Map bokeh plot

In [826]:
def geoMap(doc):
    #load the background of the map plot
    tile_provider = get_provider(CARTODBPOSITRON)

    #Create the Map figure
    # range bounds supplied in web mercator coordinates
    geo_map = figure(x_range=(650000, 1200000), y_range=(5700000, 6100000),
                 x_axis_type="mercator", y_axis_type="mercator",
                 plot_width=int(screen_length*0.4),
                 plot_height=int(screen_height*0.6))
    geo_map.add_tools(LassoSelectTool())
    geo_map.add_tools(BoxSelectTool())
    
    #Add the background to the map
    geo_map.add_tile(tile_provider)
    
    source = ColumnDataSource(test2)
    
    color_mapper = log_cmap(field_name = 'test_density', palette = palette,
                            low = 90, high = 100,nan_color = (247,247,246))
    
    geo_map.circle(x='x_mercator', y='y_mercator', size=2, color=color_mapper, alpha=0.7, source=source)
    
    #Declare a button which will update the values in Map according to the time period
    map_update_button = Button(label="Update Time Period Map", button_type="success",
                               width = int(screen_length*0.05),
                               height = int(screen_height*0.075))
    
    #Set the global max and min for the date ranges
    min_date_interval = TMH_data.iloc[0]['created_at_CET'].date()
    max_date_interval = TMH_data.iloc[-1]['created_at_CET'].date()
    
    #Create a datepicker for the start date
    start_date_input = DatePicker(title='Start date', value=min_date_interval,
                            min_date=min_date_interval, max_date=max_date_interval,
                            width = int(screen_length*0.1),
                            height = int(screen_height*0.075))
    
    #Create a datepicker for the end date
    end_date_input = DatePicker(title='End date', value=max_date_interval,
                          min_date=min_date_interval, max_date=max_date_interval,
                          width = int(screen_length*0.1),
                          height = int(screen_height*0.075))
    
    def temp():
        print(source.selected.indices)
    
    map_update_button.on_click(temp)
    
    plots = layout([
        [geo_map],
        [start_date_input,end_date_input,map_update_button],
        
    ])
    doc.add_root(plots)

In [827]:
show(geoMap)

[]
[76853, 76815, 76949, 76941, 76932, 76931, 77004, 76966, 76980, 76981, 76928, 76930, 77011, 76904, 76929, 76857, 76912, 76908, 76899, 76902, 76901, 76906, 76896, 76909, 76898, 76897, 76907, 76903, 76914, 76913, 76910, 76924, 76868, 76895, 76893, 76871, 76875, 76900, 76894, 76948, 76911, 76954, 76952, 76945, 76936, 76938, 76944, 76940, 76878, 76889, 76859, 76864, 76863, 76861, 76890, 76866, 76874, 76881, 76888, 76876, 76873, 76870, 76872, 76869, 76810, 76813, 76829, 76832, 76852, 76823, 76839, 76824, 76825, 76843, 76860, 76858, 76850, 76851, 76883, 76882, 76979, 76971, 76923, 76933, 76840, 76838, 76822, 76877, 76817, 76887, 76922, 76919, 76960, 76845, 76856, 76811, 76879, 76978, 76925, 76905, 76935, 76982, 76956, 76969, 76943, 76983, 77006, 77009, 76989, 76917, 77003, 76975, 76788, 76785, 76782, 76786, 76812, 76809, 76779, 76842, 76828, 76844, 76835, 76880, 76848, 76849, 76837, 76886, 76781, 76783, 76795, 76799, 76796, 76805, 76804, 76800, 76798, 76797, 76794, 76793, 76790, 76792, 76

### Histogram bokeh plot

In [676]:
%%time
test = TMH_data[['x_mercator','y_mercator']].sort_values(by=['x_mercator','y_mercator'])

Wall time: 233 ms


In [810]:
color_mapper = log_cmap(field_name = 'test_density', palette = palette,
                            low = 90, high = 100)

In [765]:
color_mapper['transform'].nan_color

'gray'

In [778]:
%%time
test2 = test.drop_duplicates(subset=['x_mercator','y_mercator']).copy()

Wall time: 80 ms


In [779]:
random = np.empty((len(test2),1))
random = np.array(np.random.uniform(0,100,len(test2)).round(0))

In [780]:
random.shape

(77101,)

In [781]:


test2.loc[:,'test_density'] = np.random.uniform(0,100,len(test2)).round(0).copy()

In [801]:
count = 0
while(count < len(test2)):
    test2.iloc[count]['test_density'] = None
    count = count + int(3*np.random.uniform())

In [802]:
test2

Unnamed: 0,x_mercator,y_mercator,test_density
547313,-3197095.8,4668810.0,
66586,-2449585.4,9388979.0,20.0
254259,-986290.7,4655423.8,
278343,-963581.5,5315625.6,
1108542,-961689.1,5221539.4,1.0
...,...,...,...
877772,5350460.0,3417842.8,
548225,5515992.1,3137846.8,68.0
958063,5519665.6,2918426.3,
16982,5527569.3,3124346.3,


In [684]:
%%time
TMH_data.iloc[[123,124,11,23]]

Wall time: 998 µs


Unnamed: 0,created_at_CET,latitude,longitude,Overall.score,Anger,Confusion,Disgust,Fear,Happiness,Sadness,Shame,Surprise,x_mercator,y_mercator
123,2015-01-01 01:45:40,46.188,6.137,0,0,0,0,0,0,0,0,0,683167.7,5810527.7
124,2015-01-01 01:45:41,47.563,7.599,0,0,0,0,0,0,0,0,0,845916.8,6034459.1
11,2015-01-01 01:02:01,47.055,8.315,3,0,0,0,0,3,0,0,0,925621.6,5951056.1
23,2015-01-01 01:05:33,47.525,7.558,4,0,0,0,0,4,0,0,0,841352.7,6028192.4


### Adding all the bokeh plots together

In [15]:
def bkapp(doc):
    
    #Create the Timeline figure
    Timeline = figure(background_fill_color="#fafafa",
                      x_axis_type='datetime',
                      y_axis_type=None,
                      tools="",toolbar_location=None,
                      plot_width=int(screen_length*0.8),
                      plot_height=int(screen_height*0.2))
    
    #range_tool = RangeTool(x_range=Range1d(start=TMH_data.iloc[0]['created_at_CET'],end=TMH_data.iloc[0-1]['created_at_CET']))
    range_tool = RangeTool(x_range=Range1d(1,3))
    range_tool.overlay.fill_color = "navy"
    range_tool.overlay.fill_alpha = 0.2
    
    Timeline.add_tools(range_tool)
    Timeline.toolbar.active_multi = range_tool
    
    Timeline.circle([1,2,3,4,5,6],[1,1,1,1,1,1])

    #load the background of the map plot
    tile_provider = get_provider(CARTODBPOSITRON)

    #Create the Map figure
    # range bounds supplied in web mercator coordinates
    Map = figure(x_range=(650000, 1200000), y_range=(5700000, 6100000),
                 x_axis_type="mercator", y_axis_type="mercator",
                 plot_width=int(screen_length*0.4),
                 plot_height=int(screen_height*0.6))
    
    #Add the background to the map
    Map.add_tile(tile_provider)
    
    source = ColumnDataSource(data=dict(longitude=TMH_data['x_mercator'][0:1000], latitude=TMH_data['y_mercator'][0:1000]))

    Map.circle(x='longitude', y='latitude', size=2, color="black", alpha=0.7, source=source)
    
    #Create the Histogram figure
    Histogram = figure(background_fill_color="#fafafa",
                       plot_width=int(screen_length*0.4),
                       plot_height=int(screen_height*0.6))
    
    Histogram.circle(1,1)
    

    plots = layout([
        [Timeline],
        [Map,Histogram],
        
    ])
    doc.add_root(plots)

In [16]:
show(bkapp)

In [56]:
datetime.datetime(TMH_data['created_at_CET'][0])



AttributeError: type object 'datetime.datetime' has no attribute 'datetime'

In [207]:
%%time
TMH_data.sort_values(by='created_at_CET')

Wall time: 1.33 s


Unnamed: 0,created_at_CET,latitude,longitude,Overall.score,Anger,Confusion,Disgust,Fear,Happiness,Sadness,Shame,Surprise,x_mercator,y_mercator
0,2015-01-01 01:00:03,47.458,8.548,0,0,0,0,0,0,0,0,0,951559.0,6017154.4
1,2015-01-01 01:00:09,46.160,8.777,0,0,0,0,0,0,0,0,0,977051.2,5806026.5
2,2015-01-01 01:00:14,46.360,6.929,3,0,0,0,0,3,0,0,0,771332.8,5838228.4
3,2015-01-01 01:00:22,46.382,6.242,0,0,0,0,0,0,0,0,0,694856.3,5841777.7
4,2015-01-01 01:00:34,46.160,8.777,0,0,0,0,0,0,0,0,0,977051.2,5806026.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1115888,2018-09-06 16:51:13,46.799,8.232,0,0,0,0,0,0,0,0,0,916382.0,5909327.2
1115889,2018-09-06 16:53:31,46.200,6.146,0,0,0,0,0,0,0,0,0,684169.6,5812457.5
1115890,2018-09-06 16:53:53,46.231,6.103,0,0,0,0,0,0,0,0,0,679382.9,5817444.7
1115891,2018-09-06 16:54:32,47.050,7.283,0,0,0,0,0,0,0,0,0,810739.9,5950239.2


In [213]:
%%time
TMH_data.sort_values(by='created_at_CET')

Wall time: 174 ms


Unnamed: 0,created_at_CET,latitude,longitude,Overall.score,Anger,Confusion,Disgust,Fear,Happiness,Sadness,Shame,Surprise,x_mercator,y_mercator
0,2015-01-01 01:00:03,47.458,8.548,0,0,0,0,0,0,0,0,0,951559.0,6017154.4
1,2015-01-01 01:00:09,46.160,8.777,0,0,0,0,0,0,0,0,0,977051.2,5806026.5
2,2015-01-01 01:00:14,46.360,6.929,3,0,0,0,0,3,0,0,0,771332.8,5838228.4
3,2015-01-01 01:00:22,46.382,6.242,0,0,0,0,0,0,0,0,0,694856.3,5841777.7
4,2015-01-01 01:00:34,46.160,8.777,0,0,0,0,0,0,0,0,0,977051.2,5806026.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1115888,2018-09-06 16:51:13,46.799,8.232,0,0,0,0,0,0,0,0,0,916382.0,5909327.2
1115889,2018-09-06 16:53:31,46.200,6.146,0,0,0,0,0,0,0,0,0,684169.6,5812457.5
1115890,2018-09-06 16:53:53,46.231,6.103,0,0,0,0,0,0,0,0,0,679382.9,5817444.7
1115891,2018-09-06 16:54:32,47.050,7.283,0,0,0,0,0,0,0,0,0,810739.9,5950239.2


In [210]:
TMH_data['created_at_CET'] = pd.to_datetime(TMH_data['created_at_CET'], format='%Y-%m-%d %H:%M:%S')

In [231]:
Range1d(start=TMH_data.iloc[0]['created_at_CET'],end=TMH_data.iloc[0-1]['created_at_CET'])

In [230]:
TMH_data.iloc[0]['created_at_CET']

Timestamp('2015-01-01 01:00:03')