## Load Libraries

In [69]:
import numpy as np
import pandas as pd
import geopandas as gpd
import json
import calendar

from bokeh.io import show, output_file, curdoc
from bokeh.layouts import column, row, widgetbox, gridplot
from bokeh.models import (ColumnDataSource, RangeTool, Panel, 
                          Tabs, CDSView, ColorBar, CustomJS, 
                          CustomJSFilter,GeoJSONDataSource, HoverTool,
                          LinearColorMapper, Slider, Select,
                          BoxZoomTool, PanTool, LinearColorMapper,
                          WheelZoomTool, SaveTool, LassoSelectTool,
                          CategoricalColorMapper, ResetTool)
from bokeh.models.widgets import (Slider, Select, TextInput, 
                                  Div, CheckboxGroup, RangeSlider, 
                                  Tabs, DataTable, TableColumn)
from bokeh.plotting import figure, output_file, show
from bokeh.models import DatetimeTickFormatter
from datetime import datetime
from bokeh.models import BoxZoomTool
from bokeh.models import Legend
from bokeh.core.properties import value
from bokeh.palettes import brewer, Spectral5, Pastel1_7, YlGnBu, Set3
from geopy.geocoders import Nominatim


## Load Data

In [3]:
DFU = pd.read_csv("Uber.csv")

In [4]:
DF = pd.read_csv("TaxiS.csv")

## Prepare Data

In [5]:
#add Type to each dataframe
DF['Type'] = 'Taxi'
DFU['Type'] = 'Uber'

In [14]:
#split date and time from datetime
DFU['Time'] = pd.to_datetime(DFU['Date/Time']).dt.time
DFU['Date'] = pd.to_datetime(DFU['Date/Time']).dt.date
DFU['Date']=pd.to_datetime(DFU['Date'])

In [15]:
#split date and time from datetime
DF['Time'] = pd.to_datetime(DF['pickup_datetime']).dt.time
DF['Date'] = pd.to_datetime(DF['pickup_datetime']).dt.date
DF['Date']=pd.to_datetime(DF['Date'])

In [16]:
#extract Hours from Time
DFU.loc[:, 'Hour'] = DFU['Time'].apply(lambda x: int(str(x)[:2]))
DF.loc[:, 'Hour'] = DFU['Time'].apply(lambda x: int(str(x)[:2]))

In [17]:
#extract Months from the Date
DF['Month'] = DF['Date'].dt.month
DF['Month'] = DF['Month'].apply(lambda x: calendar.month_abbr[x])
DFU['Month'] = DFU['Date'].dt.month
DFU['Month'] = DFU['Month'].apply(lambda x: calendar.month_abbr[x])

In [18]:
#extract days from the Date
DFU['day_of_week'] = DFU['Date'].dt.day_name()
DF['day_of_week'] = DF['Date'].dt.day_name()

In [19]:
#delete white space in the columns
DFU.columns = DFU.columns.str.strip()
DF.columns = DF.columns.str.strip()

## Plot Tab1

In [20]:
#Caculate total trips for Uber and Taxi for each day
DFUb1 = pd.DataFrame({'Total' : DFU.groupby( ["Date","Type"] ).size()}).reset_index()
DF1 = pd.DataFrame({'Total' : DF.groupby( ["Date","Type"] ).size()}).reset_index()

In [21]:
#set up x range
x = np.arange(1,184)
DFUb1['Days'] = x
DF1['Days']=x
dates = DFUb1['Days']
source1 = ColumnDataSource(data=dict(date=dates, total_uber=DFUb1['Total']))
source2 = ColumnDataSource(data=dict(date=dates, total_taxi=DF1['Total']))

In [25]:
#start plotting line graph
L = figure(plot_height=300, plot_width=800, tools="xpan,box_zoom,reset",
           x_axis_location="below", title='Taxi vs Uber in 2014',
           x_range=(dates[0], dates[182]))

U1 = L.line('date', 'total_uber', source=source1, line_color='black', alpha=0.8)
T1 = L.line('date', 'total_taxi', source=source2, line_color='gold', alpha=0.8)
L.xaxis.axis_label = 'Days'
L.yaxis.axis_label = 'Total'
L.left[0].formatter.use_scientific = False
L.xaxis.major_tick_line_color = None  
L.xaxis.minor_tick_line_color = None  
L.yaxis.major_tick_line_color = None  
L.yaxis.minor_tick_line_color = None  
legend = Legend(items=[("Uber", [U1]),
                       ("Taxi" , [T1])],
                       location="center",
                      click_policy="hide")
L.add_layout(legend, 'right')

select = figure(title="Drag the middle and edges of the selection box to change the range above",
                plot_height=130, plot_width=800, y_range=L.y_range,
                y_axis_type=None, tools="", toolbar_location=None)

range_tool = RangeTool(x_range=L.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

zoom_overlay = L.select_one(BoxZoomTool).overlay
zoom_overlay.line_color = "black"
zoom_overlay.line_width = 3
zoom_overlay.line_dash = "dashed"
zoom_overlay.fill_color = None

select.line('date', 'total_uber', source=source1, line_color='black')
select.line('date', 'total_taxi', source=source2, line_color='yellow')
select.ygrid.grid_line_color = None
select.add_tools(range_tool)
select.toolbar.active_multi = range_tool

Line = (column(L,select))
#show(Line)

In [28]:
# prepare data for histogram graph
def month(x):
    if x<31:
        return 1
    elif x<61:
        return 2
    elif x<91:
        return 3
    elif x<121:
        return 4
    elif x<151:
        return 5
    elif x<181:
        return 6
    else:
        pass

DFUb1['month'] = [month(x) for x in DFUb1['Days']]
DFUb1 = DFUb1.dropna(subset=['month'])

DF1['month'] = [month(x) for x in DF1['Days']]
DF1 = DF1.dropna(subset=['month'])

DFUb2 = pd.DataFrame({'total' : DFUb1.groupby(['month', 'Type'])['Total'].agg('sum')}).reset_index()
DFUb2['pct_change'] = DFUb2['total'].pct_change()
DFUb2['pct_change'] = DFUb2['pct_change'] * 100
DFUb2 = DFUb2.dropna(subset=['pct_change'])

DF2 = pd.DataFrame({'total' : DF1.groupby(['month', 'Type'])['Total'].agg('sum')}).reset_index()
DF2['pct_change'] = DF2['total'].pct_change()
DF2['pct_change'] = DF2['pct_change'] * 100
DF2 = DF2.dropna(subset=['pct_change'])
DFUb2['Month'] = ['May','June','July','August','September']
DF2['Month'] = ['May','June','July','August','September']
Uber = list(DFUb2['pct_change'])
Taxi = list(DF2['pct_change'])
months = ['September','August','July','June','May']
type1 = ['Uber']
type2 = ['Taxi']

dct = {'months' : months,
       'Uber'   : Uber,
       'Taxi'   : Taxi}

In [29]:
#start plotting histogram graph
Lh = figure(y_range=months, plot_height=250, x_range=(-25, 25), title="Percentage Change by Month", 
           toolbar_location=None)

Lh.hbar_stack(type1, y='months', height=0.9, color='black', source=ColumnDataSource(dct), legend=value("Uber"))
Lh.hbar_stack(type2, y='months', height=0.9, color='gold', source=ColumnDataSource(dct), legend=value("Taxi"))
Lh.legend.location='top_left'
Lh.xaxis.axis_label = 'Percentage'
Lh.yaxis.axis_label = 'Months'

#show(Lh)

In [30]:
layouttab1 = row(Line, Lh)

In [31]:
tab1 = Panel(child=layouttab1, title = 'Overview')

## Plot Tab2

In [36]:
#prepare uber data for tab2
DFUb3 = pd.DataFrame({'pickups': DFU.groupby(['day_of_week', 'Hour']).size()}).reset_index()

def make_dataset(list1):
    by_day = pd.DataFrame(columns = ['day_of_week','Hour','pickups','color'])
    
    for i, day_name in enumerate(list1):
        subset = DFUb3[DFUb3['day_of_week'] == day_name]
        subset['color'] = Pastel1_7[i]
        by_day = by_day.append(subset)
        
    return by_day
days1 = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
DFUFinal = make_dataset(days1)
days = DFUFinal['day_of_week']
days1 = list(set(DFUFinal['day_of_week']))
day_of_week = Select(title="Day", options=days1, value="All")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [37]:
source = ColumnDataSource(data=dict(days=days, hour=DFUFinal['Hour'], pickups=DFUFinal['pickups'], color=DFUFinal['color'] ))

#blank plot with correct labels
h = figure(plot_width = 700, plot_height = 700, 
                  title = 'Uber Demand',
                  x_axis_label = 'Hours', y_axis_label = 'Pickups')
        
#quad glyphs to create a histogram
h.quad(source = source, bottom = 0, top = 'pickups', left = 'hour', right = 'hour',
               color = 'color', fill_alpha = 0.1, legend = 'days',
               hover_fill_alpha = 0.1, line_color = 'color', line_width = 22, line_alpha = 0.5)
    
#title 
h.title.align = 'center'
h.title.text_font_size = '20pt'
h.title.text_font = 'serif'

#axis titles
h.xaxis.axis_label_text_font_size = '14pt'
h.xaxis.axis_label_text_font_style = 'bold'
h.yaxis.axis_label_text_font_size = '14pt'
h.yaxis.axis_label_text_font_style = 'bold'

#tick labels
h.xaxis.major_label_text_font_size = '12pt'
h.yaxis.major_label_text_font_size = '12pt'
h.yaxis.minor_tick_line_color = None
h.xgrid.grid_line_color = None
h.ygrid.grid_line_color = None
      
h.legend.location = "top_left"
h.background_fill_color = "gainsboro"
h.background_fill_alpha = 0.3
h.outline_line_width = 3
h.outline_line_alpha = 0.5
h.outline_line_color = "black"

Uber = h

In [38]:
#prepare taxi data for tab 2
DFb2 = pd.DataFrame({'pickups': DF.groupby(['day_of_week', 'Hour']).size()}).reset_index()
def make_dataset(list1):
    by_day = pd.DataFrame(columns = ['day_of_week','Hour','pickups','color'])
    
    for i, day_name in enumerate(list1):
        subset = DFb2[DFb2['day_of_week'] == day_name]
        subset['color'] = Pastel1_7[i]
        by_day = by_day.append(subset)
        
    return by_day

days2 = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
DFFinal1 = make_dataset(days2)
days = DFFinal1['day_of_week']
days2 = list(set(DFFinal1['day_of_week']))
day_of_week1 = Select(title="Day", options=days1, value="All")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [39]:
source1 = ColumnDataSource(data=dict(days=days, hour=DFFinal1['Hour'], pickups=DFFinal1['pickups'], color=DFFinal1['color'] ))

#blank plot with correct labels
h1 = figure(plot_width = 700, plot_height = 700, 
                  title = 'Taxi Demand',
                  x_axis_label = 'Hours', y_axis_label = 'Pickups')
#quad glyphs to create a histogram
h1.quad(source = source1, bottom = 0, top = 'pickups', left = 'hour', right = 'hour',
               color = 'color', fill_alpha = 0.1, legend = 'days',
               hover_fill_alpha = 0.1, line_color = 'color', line_width = 22, line_alpha = 0.5)
    
#title 
h1.title.align = 'center'
h1.title.text_font_size = '20pt'
h1.title.text_font = 'serif'

#axis titles
h1.xaxis.axis_label_text_font_size = '14pt'
h1.xaxis.axis_label_text_font_style = 'bold'
h1.yaxis.axis_label_text_font_size = '14pt'
h1.yaxis.axis_label_text_font_style = 'bold'

#tick labels
h1.xaxis.major_label_text_font_size = '12pt'
h1.yaxis.major_label_text_font_size = '12pt'
h1.yaxis.minor_tick_line_color = None
h1.xgrid.grid_line_color = None
h1.ygrid.grid_line_color = None
      
h1.legend.location = "top_left"
h1.background_fill_color = "gainsboro"
h1.background_fill_alpha = 0.3
h1.outline_line_width = 3
h1.outline_line_alpha = 0.5
h1.outline_line_color = "black"

Taxi = h1

In [40]:
#add extra information 
t1 = {'Category': ['Base Fare ($)', 'Minimum Fare ($)', '$ per minute', '$ per mile', 'Additional Charges', 'Order Medium'], 
     'Uber': ['2.55', '7.00', '0.35', '1.75', 'Surge Charging during High Demand', 'Uber App Only'], 
     'Taxi': ['2.50', '2.50', '0.50', '2.50', 'Improvement Surcharge, Overnight Surcharge, Rush Hour Surcharge', 'Hail or Telephone']}
table = pd.DataFrame(data=t1)

In [41]:
source2 = ColumnDataSource(table)
columns = [TableColumn(field="Category", title="Category"),
           TableColumn(field="Uber", title="Uber"), 
           TableColumn(field="Taxi", title="Taxi")]
ptable = DataTable(source=source2, columns=columns, width=1200, height=280, index_position=None)

In [44]:
layouttab2 = gridplot([Uber, Taxi, ptable], sizing_mode= "scale_width", ncols=2)

In [None]:
tab2 = Panel(child=layouttab2, title = '24/7 Availability')

## Plot Tab3

In [46]:
#import New York Shapefile
NYS = gpd.read_file(gpd.datasets.get_path('nybb'))

In [47]:
#check the coordinate system of New York Shapefile and convert it to EPSG4326
NYS.crs

{'init': 'epsg:2263'}

In [48]:
NYS = NYS.to_crs(epsg=4326)

  return _prepare_from_string(" ".join(pjargs))


In [49]:
#start plotting the map
geosource = GeoJSONDataSource(geojson = NYS.to_json())
#create figure object.
m = figure(title = 'Availability of Uber within 24 Hours in New York', 
           plot_height = 650 ,
           plot_width = 650, 
           toolbar_location = 'below',
           x_range = (-74.60, -73.38),
           y_range = (40.44, 40.94),
           tools = 'pan, wheel_zoom, box_zoom, reset'
          )
m.xgrid.grid_line_color = None
m.ygrid.grid_line_color = None
m.background_fill_color = "black"
#add patch renderer to figure.
area = m.patches('xs','ys', source = geosource,
                   fill_color = 'black',
                   line_color = 'white', 
                   line_width = 0.5, 
                   fill_alpha = 1)
#create hover tool
m.add_tools(HoverTool(renderers = [area],
                      tooltips = [('Borough','@BoroName')]))

In [50]:
#assign the coordinate system to dataframe
DFU_crs = {'init': 'epsg:4326'}

In [51]:
#prepare data for the plot
points = gpd.points_from_xy(DFU.Lon, DFU.Lat)
DFU_geo = gpd.GeoDataFrame(DFU, crs=DFU_crs, geometry=points)
# Get x and y coordinates
DFU_geo['x'] = [geometry.x for geometry in DFU_geo['geometry']]
DFU_geo['y'] = [geometry.y for geometry in DFU_geo['geometry']]
m_df = DFU_geo.drop('geometry', axis = 1).copy()
sitesource = ColumnDataSource(m_df)

In [52]:
#make a slider object to toggle the hour shown
slider = Slider(title = 'Hour', 
                start = 0, end = 24, 
                step = 1, value = 1)

In [53]:
#this callback triggers the filter when the slider changes
callback = CustomJS(args = dict(source=sitesource), 
                    code = """source.change.emit();""")
slider.js_on_change('value', callback)
#creates custom filter that selects the rows of the hour based on the value in the slider
custom_filter = CustomJSFilter(args = dict(slider = slider, 
                                           source = sitesource), 
                               code = """
var indices = [];
// iterate through rows of data source and see if each satisfies some constraint
for (var i = 0; i < source.get_length(); i++){
 if (source.data['Hour'][i] == slider.value){
 indices.push(true);
 } else {
 indices.push(false);
 }
}
return indices;
""")
#uses custom_filter to determine which set of sites are visible
view = CDSView(source = sitesource, filters = [custom_filter])

In [59]:
#plot data on the map based on hour in slider
sites = m.circle('x', 'y', source = sitesource, color = 'red',  
                 size = 1, alpha = 0.3, view = view)
# Make a column layout of widgetbox(slider) and plot, and add it to the current document
layout1 = column(m, widgetbox(slider))

In [55]:
d2 = {'Borough': ['The Bronx', 'Brooklyn', 'Manhattan', 'Queens', 'Staten Island'], 
      'Per Capita Income 2013-17($)': ['19,721', '29,928', '69,529', '28,814', '33,922'], 
      'Population estimate 2018': ['1,432,132', '2,582,830', '1,628,701', '2,278,906', '476,179'],
      '% Working Age 2018 (18-65 years)': ['62.4','62.0','69.2','64.2','62.0'],
      'Total Retail Sales per capita 2012 ($)': ['4,880', '8,003', '27,200', '7,481', '8,106']
     }
table2 = pd.DataFrame(data=d2)

In [56]:
source = ColumnDataSource(table2)
columns = [TableColumn(field="Borough", title="Borough"),
           TableColumn(field="Per Capita Income 2013-17($)", title="Per Capita Income 2013-17($)"),
           TableColumn(field="Population estimate 2018", title="Population estimate 2018"), 
           TableColumn(field="% Working Age 2018 (18-65 years)", title="% Working Age 2018 (18-65 years)"),
           TableColumn(field="Total Retail Sales per capita 2012 ($)", title="Total Retail Sales per capita 2012 ($)")]
data_table = DataTable(source=source, columns=columns, width=1200, height=280, index_position=None)

In [60]:
layouttab3 = row(layout1, data_table)

In [None]:
tab3 = Panel(child=layouttab3, title = 'Geospatial Analysis')

## Plot Tab4

In [64]:
#prepare data for the plot
DF2 = pd.DataFrame({'Total' : DF.groupby( ["Month","pickup_longitude","pickup_latitude","Type"] ).size()}).reset_index()
DFU1 = pd.DataFrame({'Total' : DFU.groupby( ["Month","Lon", "Lat","Type"] ).size()}).reset_index()

#Taxi - Tourism
DFT1 = DF2.loc[(DF2["pickup_longitude"]>-73.9868) & (DF2['pickup_longitude']<-73.9848) & (DF2['pickup_latitude'] > 40.7562) & (DF2['pickup_latitude'] < 40.7582) ]
DFT1 = DFT1.copy()
DFT1['Landmark'] = 'Times Square'
DFT2 = DF2.loc[(DF2["pickup_longitude"]>-73.9972) & (DF2['pickup_longitude']<-73.9952) & (DF2['pickup_latitude'] > 40.7154) & (DF2['pickup_latitude'] < 40.7174) ]
DFT2 = DFT2.copy()
DFT2['Landmark'] = 'Chinatown'
DFT3 = DF2.loc[(DF2["pickup_longitude"]>-73.9632) & (DF2['pickup_longitude']<-73.9612) & (DF2['pickup_latitude'] > 40.7777) & (DF2['pickup_latitude'] < 40.7797) ]
DFT3 = DFT3.copy()
DFT3['Landmark'] = 'Metropolitan Museum'
DFT4 = DF2.loc[(DF2["pickup_longitude"]<-73.9749) & (DF2['pickup_longitude']>-73.9759) & (DF2['pickup_latitude'] > 40.7613) & (DF2['pickup_latitude'] < 40.7633) ]
DFT4 = DFT4.copy()
DFT4['Landmark'] = 'Trump Tower'
DFT5 = DF2.loc[(DF2["pickup_longitude"]>-73.9742) & (DF2['pickup_longitude']<-73.9722) & (DF2['pickup_latitude'] > 40.7804) & (DF2['pickup_latitude'] < 40.74504) ]
DFT5 = DFT5.copy()
DFT5['Landmark'] = 'Central Park West'
DFT6 = DF2.loc[(DF2["pickup_longitude"]>-73.9866) & (DF2['pickup_longitude']<-73.9846) & (DF2['pickup_latitude'] > 40.7474) & (DF2['pickup_latitude'] < 40.7494) ]
DFT6 = DFT6.copy()
DFT6['Landmark'] = 'Empire State Building'
DFT = (pd.concat([DFT1, DFT2, DFT3, DFT4, DFT5, DFT6], sort=True))
DFTF = pd.DataFrame({'Overall' : DFT.groupby( ["Month","Landmark","Type"] ).size()}).reset_index()
DFTF['Function'] = 'Tourism'

#Taxi - Business
DFB1 = DF2.loc[(DF2["pickup_longitude"]>-73.9763) & (DF2['pickup_longitude']<-73.9743) & (DF2['pickup_latitude'] > 40.7506) & (DF2['pickup_latitude'] < 40.7526) ]
DFB1 = DFB1.copy()
DFB1['Landmark'] = 'Chrysler Building'
DFB2 = DF2.loc[(DF["pickup_longitude"]>-73.9777) & (DF2['pickup_longitude']<-73.9757) & (DF2['pickup_latitude'] > 40.7544) & (DF2['pickup_latitude'] < 40.7564) ]
DFB2 = DFB2.copy()
DFB2['Landmark'] = 'JP Morgan Chase'
DFB3 = DF2.loc[(DF2["pickup_longitude"]>-74.2337) & (DF2['pickup_longitude']<-74.2317) & (DF2['pickup_latitude'] > 40.6494) & (DF2['pickup_latitude'] < 40.6514) ]
DFB3 = DFB3.copy()
DFB3['Landmark'] = '200 Park Avenue'
DFB4 = DF2.loc[(DF2["pickup_longitude"]>-74.0154) & (DF2['pickup_longitude']<-74.0134) & (DF2['pickup_latitude'] > 40.7137) & (DF2['pickup_latitude'] < 40.7157) ]
DFB4 = DFB4.copy()
DFB4['Landmark'] = 'Goldman Sachs'
DFB5 = DF2.loc[(DF2["pickup_longitude"]>-73.9865) & (DF2['pickup_longitude']<-73.9845) & (DF2['pickup_latitude'] > 40.7589) & (DF2['pickup_latitude'] < 40.7609) ]
DFB5 = DFB5.copy()
DFB5['Landmark'] = 'Morgan Stanley'
DFB6 = DF2.loc[(DF2["pickup_longitude"]>-74.0079) & (DF2['pickup_longitude']<-74.0059) & (DF2['pickup_latitude'] > 40.7038) & (DF2['pickup_latitude'] < 40.7058) ]
DFB6 = DFB6.copy()
DFB6['Landmark'] = 'Wall Street'
DFB = (pd.concat([DFB1, DFB2, DFB3, DFB4, DFB5, DFB6], sort=True))
DFBF = pd.DataFrame({'Overall' : DFB.groupby( ["Month","Landmark","Type"] ).size()}).reset_index()
DFBF['Function'] = 'Business'

#Taxi - Education
DFE1 = DF2.loc[(DF2["pickup_longitude"]>-73.9831) & (DF2['pickup_longitude']<-73.9811) & (DF2['pickup_latitude'] > 40.7523) & (DF2['pickup_latitude'] < 40.7543) ]
DFE1 = DFE1.copy()
DFE1['Landmark'] = 'New York Public Library'
DFE2 = DF2.loc[(DF2["pickup_longitude"]>-73.9972) & (DF2['pickup_longitude']<-73.9952) & (DF2['pickup_latitude'] > 40.7282) & (DF2['pickup_latitude'] < 40.7302) ]
DFE2 = DFE2.copy()
DFE2['Landmark'] = 'NYU'
DFE3 = DF2.loc[(DF2["pickup_longitude"]>-73.9627) & (DF2['pickup_longitude']<-73.9607) & (DF2['pickup_latitude'] > 40.8069) & (DF2['pickup_latitude'] < 40.8089) ]
DFE3 = DFE3.copy()
DFE3['Landmark'] = 'Columbia University'
DFE4 = DF2.loc[(DF2["pickup_longitude"]>-73.9839) & (DF2['pickup_longitude']<-73.9819) & (DF2['pickup_latitude'] > 40.7729) & (DF2['pickup_latitude'] < 40.7749) ]
DFE4 = DFE4.copy()
DFE4['Landmark'] = 'The Julliard School'
DFE = (pd.concat([DFE1, DFE2, DFE3, DFE4], sort=True))
DFEF = pd.DataFrame({'Overall' : DFE.groupby( ["Month","Landmark","Type"] ).size()}).reset_index()
DFEF['Function'] = 'Education'

#Taxi - Nightlife
DFN1 = DF2.loc[(DF2["pickup_longitude"]>-73.7512) & (DF2['pickup_longitude']<-73.7492) & (DF2['pickup_latitude'] > 40.6539) & (DF2['pickup_latitude'] < 40.6559) ]
DFN1 = DFN1.copy()
DFN1['Landmark'] = 'Palace Theatre'
DFN2 = DF2.loc[(DF2["pickup_longitude"]>-73.9887) & (DF2['pickup_longitude']<-73.9867) & (DF2['pickup_latitude'] > 40.7201) & (DF2['pickup_latitude'] < 40.7221) ]
DFN2 = DFN2.copy()
DFN2['Landmark'] = 'Ludlow Street'
DFN3 = DF2.loc[(DF2["pickup_longitude"]>-75.2277) & (DF2['pickup_longitude']<-75.2257) & (DF2['pickup_latitude'] > 43.1000) & (DF2['pickup_latitude'] < 43.1020) ]
DFN4 = DF2.loc[(DF2["pickup_longitude"]>-74.0016) & (DF2['pickup_longitude']<-73.0094) & (DF2['pickup_latitude'] > 40.7394) & (DF2['pickup_latitude'] < 40.7319) ]
DFN4 = DFN4.copy()
DFN4['Landmark'] = 'MacDougal Street'
DFN5 = DF2.loc[(DF2["pickup_longitude"]>-73.9852) & (DF2['pickup_longitude']<-73.9832) & (DF2['pickup_latitude'] > 40.7715) & (DF2['pickup_latitude'] < 40.7735) ]
DFN5 = DFN5.copy()
DFN5['Landmark'] = 'Lincoln Centre for Performing Arts'
DFN = (pd.concat([DFN1, DFN2, DFN3, DFN4, DFN5], sort=True))
DFNF = pd.DataFrame({'Overall' : DFN.groupby( ["Month","Landmark","Type"] ).size()}).reset_index()
DFNF['Function'] = 'Nightlife'
DF_tab4 = (pd.concat([DFNF,DFEF,DFBF,DFTF], sort=True))

#Uber - Tourism
DFUT1 = DFU1.loc[(DFU1["Lon"]>-73.9868) & (DFU1['Lon']<-73.9848) & (DFU1['Lat'] > 40.7562) & (DFU1['Lat'] < 40.7582) ]
DFUT1 = DFUT1.copy()
DFUT1['Landmark'] = 'Times Square'
DFUT2 = DFU1.loc[(DFU1["Lon"]>-73.9972) & (DFU1['Lon']<-73.9952) & (DFU1['Lat'] > 40.7154) & (DFU1['Lat'] < 40.7174) ]
DFUT2 = DFUT2.copy()
DFUT2['Landmark'] = 'Chinatown'
DFUT3 = DFU1.loc[(DFU1["Lon"]>-73.9632) & (DFU1['Lon']<-73.9612) & (DFU1['Lat'] > 40.7777) & (DFU1['Lat'] < 40.7797) ]
DFUT3 = DFUT3.copy()
DFUT3['Landmark'] = 'Metropolitan Museum'
DFUT4 = DFU1.loc[(DFU1["Lon"]>-73.9749) & (DFU1['Lon']<-73.9759) & (DFU1['Lat'] > 40.7613) & (DFU1['Lat'] < 40.7633) ]
DFUT4 = DFUT4.copy()
DFUT4['Landmark'] = 'Trump Tower'
DFUT5 = DFU1.loc[(DFU1["Lon"]>-73.9742) & (DFU1['Lon']<-73.9722) & (DFU1['Lat'] > 40.7804) & (DFU1['Lat'] < 40.74504) ]
DFUT5 = DFUT5.copy()
DFUT5['Landmark'] = 'Central Park West'
DFUT6 = DFU1.loc[(DFU1["Lon"]>-73.9866) & (DFU1['Lon']<-73.9846) & (DFU1['Lat'] > 40.7474) & (DFU1['Lat'] < 40.7494) ]
DFUT6 = DFUT6.copy()
DFUT6['Landmark'] = 'Empire State Building'
DFUT = (pd.concat([DFUT1, DFUT2, DFUT3, DFUT4, DFUT5, DFUT6], sort=True))
DFUTF = pd.DataFrame({'Overall' : DFUT.groupby( ["Month","Landmark","Type"] ).size()}).reset_index()
DFUTF['Function'] ='Tourism'

#Uber - Business
DFUB1 = DFU1.loc[(DFU1["Lon"]>-73.9763) & (DFU1['Lon']<-73.9743) & (DFU1['Lat'] > 40.7506) & (DFU1['Lat'] < 40.7526) ]
DFUB1 = DFUB1.copy()
DFUB1['Landmark'] = 'Chrysler Building'
DFUB2 = DFU1.loc[(DFU1["Lon"]>-73.9777) & (DFU1['Lon']<-73.9757) & (DFU1['Lat'] > 40.7544) & (DFU1['Lat'] < 40.7564) ]
DFUB2 = DFUB2.copy()
DFUB2['Landmark'] = 'JP Morgan Chase'
DFUB3 = DFU1.loc[(DFU1["Lon"]>-74.2337) & (DFU1['Lon']<-74.2317) & (DFU1['Lat'] > 40.6494) & (DFU1['Lat'] < 40.6514) ]
DFUB3 = DFUB3.copy()
DFUB3['Landmark'] = '200 Park Avenue'
DFUB4 = DFU1.loc[(DFU1["Lon"]>-74.0154) & (DFU1['Lon']<-74.0134) & (DFU1['Lat'] > 40.7137) & (DFU1['Lat'] < 40.7157) ]
DFUB4 = DFUB4.copy()
DFUB4['Landmark'] = 'Goldman Sachs'
DFUB5 = DFU1.loc[(DFU1["Lon"]>-73.9865) & (DFU1['Lon']<-73.9845) & (DFU1['Lat'] > 40.7589) & (DFU1['Lat'] < 40.7609) ]
DFUB5 = DFUB5.copy()
DFUB5['Landmark'] = 'Morgan Stanley'
DFUB6 = DFU1.loc[(DFU1["Lon"]>-74.0079) & (DFU1['Lon']<-74.0059) & (DFU1['Lat'] > 40.7038) & (DFU1['Lat'] < 40.7058) ]
DFUB6 = DFUB6.copy()
DFUB6['Landmark'] = 'Wall Street'
DFUB = (pd.concat([DFUB1, DFUB2, DFUB3, DFUB4, DFUB5, DFUB6], sort=True))
DFUBF = pd.DataFrame({'Overall' : DFUB.groupby( ["Month","Landmark","Type"] ).size()}).reset_index()
DFUBF['Function'] = 'Business'

#Uber - Education
DFUE1 = DFU1.loc[(DFU1["Lon"]>-73.9831) & (DFU1['Lon']<-73.9811) & (DFU1['Lat'] > 40.7523) & (DFU1['Lat'] < 40.7543) ]
DFUE1 = DFUE1.copy()
DFUE1['Landmark'] = 'New York Public Library'
DFUE2 = DFU1.loc[(DFU1["Lon"]>-73.9972) & (DFU1['Lon']<-73.9952) & (DFU1['Lat'] > 40.7282) & (DFU1['Lat'] < 40.7302) ]
DFUE2 = DFUE2.copy()
DFUE2['Landmark'] = 'NYU'
DFUE3 = DFU1.loc[(DFU1["Lon"]>-73.9627) & (DFU1['Lon']<-73.9607) & (DFU1['Lat'] > 40.8069) & (DFU1['Lat'] < 40.8089) ]
DFUE3 = DFUE3.copy()
DFUE3['Landmark'] = 'Columbia University'
DFUE4 = DFU1.loc[(DFU1["Lon"]>-73.9839) & (DFU1['Lon']<-73.9819) & (DFU1['Lat'] > 40.7729) & (DFU1['Lat'] < 40.7749) ]
DFUE4 = DFUE4.copy()
DFUE4['Landmark'] = 'The Julliard School'
DFUE = (pd.concat([DFUE1, DFUE2, DFUE3, DFUE4], sort=True))
DFUEF = pd.DataFrame({'Overall' : DFUE.groupby( ["Month","Landmark","Type"] ).size()}).reset_index()
DFUEF['Function'] = 'Education'

#Uber - Nightlife
DFUN1 = DFU1.loc[(DFU1["Lon"]>-73.7512) & (DFU1['Lon']<-73.7492) & (DFU1['Lat'] > 40.6539) & (DFU1['Lat'] < 40.6559) ]
DFUN1 = DFUN1.copy()
DFUN1['Landmark'] = 'Palace Theatre'
DFUN2 = DFU1.loc[(DFU1["Lon"]>-73.9887) & (DFU1['Lon']<-73.9867) & (DFU1['Lat'] > 40.7201) & (DFU1['Lat'] < 40.7221) ]
DFUN2 = DFUN2.copy()
DFUN2['Landmark'] = 'Ludlow Street'
DFUN3 = DFU1.loc[(DFU1["Lon"]>-75.2277) & (DFU1['Lon']<-75.2257) & (DFU1['Lat'] > 43.1000) & (DFU1['Lat'] < 43.1020) ]
DFUN3 = DFUN3.copy()
DFUN3['Landmark'] = 'Bleecker Street'
DFUN4 = DFU1.loc[(DFU1["Lon"]>-73.9101) & (DFU1['Lon']<-73.9081) & (DFU1['Lat'] > 40.6794) & (DFU1['Lat'] < 40.6814) ]
DFUN4 = DFUN4.copy()
DFUN4['Landmark'] = 'MacDougal Street'
DFUN5 = DFU1.loc[(DFU1["Lon"]>-73.9852) & (DFU1['Lon']<-73.9832) & (DFU1['Lat'] > 40.7715) & (DFU1['Lat'] < 40.7735) ]
DFUN5 = DFUN5.copy()
DFUN5['Landmark'] = 'Lincoln Centre for Performing Arts'
DFUN = (pd.concat([DFUN1, DFUN2, DFUN3, DFUN4, DFUN5], sort=True))
DFUNF = pd.DataFrame({'Overall' : DFUN.groupby( ["Month","Landmark","Type"] ).size()}).reset_index()
DFUNF['Function'] = 'Nightlife'
DFU_tab4= (pd.concat([DFUNF,DFUEF,DFUBF,DFUTF], sort=True))
DF_tab4['Overall']=DF_tab4['Overall'].div(18.17)
DF_tab4.Overall = DF_tab4.Overall.round()

In [70]:
#start plotting
DFU_tab4.loc[DFU_tab4['Function'] == 'Education', 'Color'] = 'brown'
DFU_tab4.loc[DFU_tab4['Function'] == 'Tourism', 'Color'] = 'aqua'
DFU_tab4.loc[DFU_tab4['Function'] == 'Business', 'Color'] = 'limegreen'
DFU_tab4.loc[DFU_tab4['Function'] == 'Nightlife', 'Color'] = 'hotpink'
function = DFU_tab4['Function']
landmark = DFU_tab4['Landmark']
month = DFU_tab4['Month']
source = ColumnDataSource(data=dict(color=DFU_tab4['Color'], month=month, 
                                    function=function, landmark=landmark, 
                                    total_uber=DFU_tab4['Overall'], total_taxi=DF_tab4['Overall']))

hover = HoverTool(tooltips=[
    ("Function", "@function"),
    ("Landmark", "@landmark"),
    ("Month", "@month"),
])
TOOLS = [
    hover, BoxZoomTool(), LassoSelectTool(), WheelZoomTool(), PanTool(),
    ResetTool(), SaveTool()
]

s = figure(
    plot_height=600,
    plot_width=700,
    title="Manhattan Taxi vs Uber",
    tools=TOOLS,
    x_axis_label="Taxi",
    y_axis_label="Uber",
    toolbar_location="above",
    y_range=(0, 360))

s.circle(
    y="total_uber",
    x="total_taxi",
    source=source,
    fill_color='color',
    size=7,
    alpha=0.4,
    legend="function")

#show(s)

In [72]:
layouttab4 = (s)
tab4 = Panel(child=layouttab4, title = 'Manhattan Hotspots')

In [None]:
tabs = Tabs(tabs=[tab1,tab2,tab3,tab4])
curdoc().add_root(tabs)