### Imports

In [1]:
import numpy as np
import pandas as pd
from pyproj import Transformer
from datetime import date,datetime

In [26]:
#Import general library
import bokeh

#Import to handle data parsed to bokeh plots
from bokeh.models import ColumnDataSource

#Import to handle bokeh notebook
from bokeh.io import output_notebook

#Import for the background of the map plot
from bokeh.tile_providers import CARTODBPOSITRON, get_provider

#Import to create plot, show plot, and output plot
from bokeh.plotting import figure, output_file, show

#Import to handle layout of the bokeh plots
from bokeh.layouts import column, row, layout

#Importing the RangeTool used in the Timeline plot
from bokeh.models import RangeTool

#Import to set the range of the Timeline
from bokeh.models import Range1d

#Import for the button and datepicker widget
from bokeh.models import Button, DatePicker

#Import for callback on an event
from bokeh.events import ButtonClick

#Import palette for the color mapper used in geo_map
from bokeh.palettes import Turbo256
palette = Turbo256

#Import log_cmap to support the color mapper for the geo_map
from bokeh.transform import log_cmap, linear_cmap

#Importing the select tools for bokeh plots
from bokeh.models import LassoSelectTool, BoxSelectTool

#Import the tools for the color bar
from bokeh.models import ColorBar, NumeralTickFormatter

output_notebook()

### Assumptions

In [3]:
#screen resolution
screen_height = 680
screen_length = 1208

### Data import and preparation

In [4]:
#Columns that we want to keep from the data we load in
import_columns = ['created_at_CET','latitude','longitude','Overall.score','Anger','Confusion',
                  'Disgust','Fear','Happiness','Sadness','Shame','Surprise']

#Load the data into the python environment
twitter_data = pd.read_csv("D:/Master Project/complete_swiss_dataset.csv",encoding = "ISO-8859-15",low_memory=False)

#Select only the columns that are relevant
#Timeline Map Histogram data (TMH_data)
TMH_data = twitter_data[import_columns].sort_values(by='created_at_CET',ignore_index=True)

#Round all entries in dataframe
#Mainly to remove specific gps locations of users
TMH_data = TMH_data.round(3)

#Format the string input to datetime
TMH_data['created_at_CET'] = pd.to_datetime(TMH_data['created_at_CET'], format='%Y-%m-%d %H:%M:%S')

#Sort the data by the date at creation
TMH_data = TMH_data.sort_values(by='created_at_CET')

#Removing the entire dataset now that the selection has been made
twitter_data = None

In [5]:
#Declare transformer to convert from epsg4326 to epsg 3856 (web mercator)
transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857")

#Transform the coordinates from epsg 4326 to epsg 3857
mercator_transform = transformer.transform(TMH_data['latitude'].values,
                                           TMH_data['longitude'].values)

#
TMH_data['x_mercator'] = mercator_transform[0].round(1)
TMH_data['y_mercator'] = mercator_transform[1].round(1)

### Timeline bokeh plot

In [6]:
%%time

#Declare a variable to keep track of the current day and year
current_day = (TMH_data['created_at_CET'][0]).dayofyear
current_year = (TMH_data['created_at_CET'][0]).year

#Declare a counter for the frequency on a given day
freq_counter = 0
#Declare a list to store the frequencies on each day
freq_list = []

for date in TMH_data['created_at_CET']:
    if(current_day == date.dayofyear):
        freq_counter += 1
    else:
        
        freq_list.append((datetime.strptime(str(current_year)+str(current_day),"%Y%j"),
                          freq_counter))
        
        if(current_year == date.year):
            current_day = date.dayofyear
        else:
            current_year = date.year
            current_day = date.dayofyear
        freq_counter = 1
        
freq_list.append((datetime.strptime(str(current_year)+str(current_day),"%Y%j"),
                          freq_counter))

freq_list = np.array(freq_list).T

Wall time: 2.75 s


In [117]:
def timeline(doc):
    
    #Create the Timeline figure
    Timeline = figure(background_fill_color="#fafafa",
                      x_axis_type='datetime',
                      #y_axis_type=None,
                      tools="pan,box_zoom,wheel_zoom,reset",
                      toolbar_location="above",
                      plot_width=int(screen_length*0.7),
                      plot_height=int(screen_height*0.3))
    
    #Load the data for the timeline into a ColumnDataSource for the plot
    source = ColumnDataSource(data=dict(date=freq_list[0], freq=freq_list[1]))
    
    #Plot a line on the graph
    Timeline.line('date','freq',source=source)
    
    #Declare a button which will update the values in Map according to the time period
    button = Button(label="Update Time Period", button_type="success",
                   width = int(screen_length*0.05),
                   height = int(screen_height*0.075))

    #Callback to handle the changing x range of the range tool
    #Will update the corresponding start and end dates in the datepickers
    #datepickers: start_date_input and end_date_input
    def range_tool_to_datepicker_handler(attr,old,new):

        #If the starting value of the interval is a float then the user has moved the range
        if(type(range_tool.x_range.start) == float):
            
            #Extract the start value of the interval and convert it into correct format for datepicker
            new_start_date = datetime.fromtimestamp(range_tool.x_range.start / 1e3).date()
            
            #Check whether the date is less than the minimum of of the data
            if(new_start_date >= min_date_interval):
                #If the start date is later than the global minimum then set it as the new date
                start_date_input.value = new_start_date
            else:
                #If the date is before the global minimum then take the global minimum as the start of the range
                start_date_input.value = min_date_interval

        #If the ending value of the interval is a float then the user has moved the range
        if(type(range_tool.x_range.end) == float):
            
            #Extract the end value of the interval and convert it into correct format for datepicker
            new_end_date = datetime.fromtimestamp(range_tool.x_range.end / 1e3).date()
            
            #Check whether the date is less than the maximum of of the data
            if(new_end_date <= max_date_interval):
                #If the new end date is before the global maximum
                #Set the end date to the new end date
                end_date_input.value = new_end_date
            else:
                #If the value is after the global maximum
                #then set the value to the global maximum
                end_date_input.value = max_date_interval

    #Callback for when the datepicker of starting date changes
    def date_picker_to_range_tool_handler_start(attr,old,new):
        range_tool.x_range.start = datetime.fromisoformat(start_date_input.value).timestamp() * 1e3
        
    #Callback for when the datepicker of ending date changes
    def date_picker_to_range_tool_handler_end(attr,old,new):
        range_tool.x_range.end = datetime.fromisoformat(end_date_input.value).timestamp() * 1e3
    
    
    #button.on_click(date_picker_to_range_tool_handler)
    
    #Set the global max and min for the date ranges
    min_date_interval = TMH_data.iloc[0]['created_at_CET'].date()
    max_date_interval = TMH_data.iloc[-1]['created_at_CET'].date()
    
    #Create a datepicker for the start date
    start_date_input = DatePicker(title='Start date', value=min_date_interval,
                            min_date=min_date_interval, max_date=max_date_interval,
                            width = int(screen_length*0.1),
                            height = int(screen_height*0.075))
    
    #Link the datepicker to the callback for when the value changes
    start_date_input.on_change('value',date_picker_to_range_tool_handler_start)
    
    #Create a datepicker for the end date
    end_date_input = DatePicker(title='End date', value=max_date_interval,
                          min_date=min_date_interval, max_date=max_date_interval,
                          width = int(screen_length*0.1),
                          height = int(screen_height*0.075))
    
    #Link the datepicker to the callback for when the value changes
    end_date_input.on_change('value',date_picker_to_range_tool_handler_end)
    
    #Declare a RangeTool for the selection of the date ranges
    range_tool = RangeTool(x_range=Range1d(start=TMH_data.iloc[0]['created_at_CET'],
                                           end=TMH_data.iloc[-1]['created_at_CET']))
    range_tool.overlay.fill_color = "navy"
    range_tool.overlay.fill_alpha = 0.1
    
    #Link the range_tool to the callbacks for its interval
    range_tool.x_range.on_change('start',range_tool_to_datepicker_handler)
    range_tool.x_range.on_change('end',range_tool_to_datepicker_handler)
    
    #Add the range_tool to the Timeline plot
    Timeline.add_tools(range_tool)
    Timeline.toolbar.active_multi = range_tool
    
    #Create the layout for the Timeline plot
    plots = layout([
        [Timeline,[start_date_input,end_date_input,button]],
    ])
    #Add the plot to doc
    doc.add_root(plots)

In [118]:
show(timeline)

### Map bokeh plot

In [113]:
#Copy the relevant data from the dataframe
Map_data = TMH_data[['x_mercator','y_mercator']].sort_values(by=['x_mercator','y_mercator']).copy()

#Reset the index for the calculation of densities
Map_data = Map_data.set_index(np.arange(0,len(Map_data)))

#Drop all the duplciates, which keeps the first occuring entry in the dataframe
Map_data = Map_data.drop_duplicates()

#Declare an array the length of the coordinates without duplicates
#Will represent the density at each coordinate
density = np.ones(len(Map_data))

#Loop the array with all the coordinates
for i in range(0,len(Map_data)-1):
    density[i] = Map_data.index[i+1] - Map_data.index[i]
    
#Add the column to the Map_data
Map_data['density'] = density

In [114]:
def geoMap(doc):
    #load the background of the map plot
    tile_provider = get_provider(CARTODBPOSITRON)

    #Create the Map figure
    # range bounds supplied in web mercator coordinates
    geo_map = figure(x_range=(650000, 1200000), y_range=(5700000, 6100000),
                 x_axis_type="mercator", y_axis_type="mercator",
                 plot_width=int(screen_length*0.4),
                 plot_height=int(screen_height*0.6))
    geo_map.add_tools(LassoSelectTool())
    geo_map.add_tools(BoxSelectTool())
    
    #Add the background to the map
    geo_map.add_tile(tile_provider)
    
    source = ColumnDataSource(Map_data)
    
    color_mapper = log_cmap(field_name = 'density', palette = palette,
                               low = Map_data['density'].min(),
                               high = Map_data['density'].max())
    
    #Defines color bar
    color_bar = ColorBar(color_mapper=color_mapper['transform'], 
                     formatter = NumeralTickFormatter(format='0.0[0000]'), 
                     label_standoff = 13, width=8, location=(0,0))# Set color_bar location
    
    geo_map.add_layout(color_bar, 'right')
    
    geo_map.circle(x='x_mercator', y='y_mercator', size=1, color=color_mapper, alpha=0.5, source=source)
    
    #Declare a button which will update the values in Map according to the time period
    map_update_button = Button(label="Update Time Period Map", button_type="success",
                               width = int(screen_length*0.05),
                               height = int(screen_height*0.075))
    
    #Set the global max and min for the date ranges
    min_date_interval = TMH_data.iloc[0]['created_at_CET'].date()
    max_date_interval = TMH_data.iloc[-1]['created_at_CET'].date()
    
    #Create a datepicker for the start date
    start_date_input = DatePicker(title='Start date', value=min_date_interval,
                            min_date=min_date_interval, max_date=max_date_interval,
                            width = int(screen_length*0.1),
                            height = int(screen_height*0.075))
    
    #Create a datepicker for the end date
    end_date_input = DatePicker(title='End date', value=max_date_interval,
                          min_date=min_date_interval, max_date=max_date_interval,
                          width = int(screen_length*0.1),
                          height = int(screen_height*0.075))
    
    def temp():
        print(source.selected.indices)
    
    map_update_button.on_click(temp)
    
    plots = layout([
        [geo_map],
        [start_date_input,end_date_input,map_update_button],
        
    ])
    doc.add_root(plots)

In [115]:
show(geoMap)

### Histogram bokeh plot

In [101]:
%%time
Map_data = TMH_data[['x_mercator','y_mercator']].sort_values(by=['x_mercator','y_mercator']).copy()

Wall time: 268 ms


In [46]:
%%time
Map_data = Map_data.set_index(np.arange(0,len(Map_data)))

Wall time: 18 ms


In [102]:
%%time
Map_data = Map_data.drop_duplicates()

Wall time: 77 ms


In [104]:
#Declare an array the length of the coordinates without duplicates
#Will represent the density at each coordinate
density = np.ones(len(Map_data))

#Loop the array with all the coordinates
for i in range(0,len(Map_data)-1):
    density[i] = Map_data.index[i+1] - Map_data.index[i]
    
#Add the column to the Map_data
Map_data['density'] = density

In [84]:
%%time
density.sort()

Wall time: 999 µs


In [111]:
Map_data.iloc[[75220, 75221, 75235, 75236, 75489, 75582, 75661, 75815, 75816, 75830, 75841, 75842, 75869, 75876, 75877, 75882, 75889, 75890, 75891, 75897, 75898, 75899, 75905, 75913, 75923, 75934, 75944, 75961, 75968, 75969, 75970, 75971, 75977, 75978, 75980, 75981, 75982, 75984, 75985, 75986, 75987, 75992, 75993, 75994, 76001, 76002, 76003, 76006, 76007, 76008, 76009, 76010, 76011, 76012, 76024, 76025, 76026, 76027, 76040, 76041, 76077, 76078, 76088, 76089, 76120, 76151, 76159]]['density']

10362     178230.0
188592   -187826.0
324339   -220169.0
104170    284826.0
97461      -6010.0
            ...   
393994    252081.0
646075   -598966.0
14717     125363.0
786769   -759815.0
850494   -379462.0
Name: density, Length: 67, dtype: float64

In [87]:
density[-100]

894.0

In [18]:
count = 0
while(count < len(test2)):
    test2.iloc[count]['test_density'] = None
    count = count + int(3*np.random.uniform())

In [33]:
test2

Unnamed: 0,x_mercator,y_mercator,test_density
547313,-3197095.8,4668810.0,
66586,-2449585.4,9388979.0,
254259,-986290.7,4655423.8,91.0
278343,-963581.5,5315625.6,
1108542,-961689.1,5221539.4,82.0
...,...,...,...
877772,5350460.0,3417842.8,52.0
548225,5515992.1,3137846.8,
958063,5519665.6,2918426.3,
16982,5527569.3,3124346.3,25.0


In [20]:
%%time
TMH_data.iloc[[123,124,11,23]]

Wall time: 999 µs


Unnamed: 0,created_at_CET,latitude,longitude,Overall.score,Anger,Confusion,Disgust,Fear,Happiness,Sadness,Shame,Surprise,x_mercator,y_mercator
123,2015-01-01 01:45:40,46.188,6.137,0,0,0,0,0,0,0,0,0,683167.7,5810527.7
124,2015-01-01 01:45:41,47.563,7.599,0,0,0,0,0,0,0,0,0,845916.8,6034459.1
11,2015-01-01 01:02:01,47.055,8.315,3,0,0,0,0,3,0,0,0,925621.6,5951056.1
23,2015-01-01 01:05:33,47.525,7.558,4,0,0,0,0,4,0,0,0,841352.7,6028192.4


### Adding all the bokeh plots together

In [21]:
def bkapp(doc):
    
    #Create the Timeline figure
    Timeline = figure(background_fill_color="#fafafa",
                      x_axis_type='datetime',
                      y_axis_type=None,
                      tools="",toolbar_location=None,
                      plot_width=int(screen_length*0.8),
                      plot_height=int(screen_height*0.2))
    
    #range_tool = RangeTool(x_range=Range1d(start=TMH_data.iloc[0]['created_at_CET'],end=TMH_data.iloc[0-1]['created_at_CET']))
    range_tool = RangeTool(x_range=Range1d(1,3))
    range_tool.overlay.fill_color = "navy"
    range_tool.overlay.fill_alpha = 0.2
    
    Timeline.add_tools(range_tool)
    Timeline.toolbar.active_multi = range_tool
    
    Timeline.circle([1,2,3,4,5,6],[1,1,1,1,1,1])

    #load the background of the map plot
    tile_provider = get_provider(CARTODBPOSITRON)

    #Create the Map figure
    # range bounds supplied in web mercator coordinates
    Map = figure(x_range=(650000, 1200000), y_range=(5700000, 6100000),
                 x_axis_type="mercator", y_axis_type="mercator",
                 plot_width=int(screen_length*0.4),
                 plot_height=int(screen_height*0.6))
    
    #Add the background to the map
    Map.add_tile(tile_provider)
    
    source = ColumnDataSource(data=dict(longitude=TMH_data['x_mercator'][0:1000], latitude=TMH_data['y_mercator'][0:1000]))

    Map.circle(x='longitude', y='latitude', size=2, color="black", alpha=0.7, source=source)
    
    #Create the Histogram figure
    Histogram = figure(background_fill_color="#fafafa",
                       plot_width=int(screen_length*0.4),
                       plot_height=int(screen_height*0.6))
    
    Histogram.circle(1,1)
    

    plots = layout([
        [Timeline],
        [Map,Histogram],
        
    ])
    doc.add_root(plots)

In [22]:
show(bkapp)

In [23]:
datetime.datetime(TMH_data['created_at_CET'][0])



AttributeError: type object 'datetime.datetime' has no attribute 'datetime'

In [None]:
%%time
TMH_data.sort_values(by='created_at_CET')

In [None]:
%%time
TMH_data.sort_values(by='created_at_CET')

In [None]:
TMH_data['created_at_CET'] = pd.to_datetime(TMH_data['created_at_CET'], format='%Y-%m-%d %H:%M:%S')

In [None]:
Range1d(start=TMH_data.iloc[0]['created_at_CET'],end=TMH_data.iloc[0-1]['created_at_CET'])

In [None]:
TMH_data.iloc[0]['created_at_CET']

[74898, 74971, 74972, 75011, 75069, 75124, 75152, 75192, 75228, 75244, 75262, 75381, 75518, 75543, 75552, 75553, 75580, 75581, 75644, 75660, 75730, 75734, 75735, 75746, 75754, 75773, 75793, 75803, 75814, 75839, 75840, 75881, 75888, 75917, 75931, 75932, 75942, 75960, 75976, 76127, 76135]
