In [1]:
# import neccessary packages
import folium
import geopandas as gpd
import pickle
import pandas as pd
from shapely.geometry import Point
import calendar


In [2]:
import jupyter_dash 

In [3]:
# Reading in cleaned and merged dataset - trip + fare 
merged_dataset = pd.read_pickle(r'merged_dataset.pkl')

In [4]:
# Completing some final cleaning before visualisation:
#rename
merged_dataset = merged_dataset.rename(columns = {"pickup_day_of_week_x": "Weekday",
                                  "vendor_id_x":"Vendor",
                                  "passenger_count": "Passengers"
                                                 })

#Clean passengers:
merged_dataset["Weekday"] = merged_dataset["Weekday"].apply(lambda x: 'weekday' if x in {1,2,3,4,5} else 'weekend')

#clean to passengers - turn into boolean flag:
merged_dataset = merged_dataset[(merged_dataset["Passengers"] != 0) & (merged_dataset["Passengers"] != 208)]
merged_dataset["Passengers"] = merged_dataset["Passengers"].apply(lambda x: 'single ride' if x == 1 else 'shared ride')

#clean vendor
merged_dataset["Vendor"] = merged_dataset["Vendor"].apply(lambda x: 'Creative Mobile Technologies' if x == 'CMT' else 'VeriFone')


In [5]:
taxi_zones = gpd.read_file("data/shapefiles/taxi_zones.shp").to_crs({'init': 'epsg:4326'})
taxi_zones = taxi_zones.drop(['Shape_Area', 'Shape_Leng', 'OBJECTID'], axis=1)

  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [6]:
# turn taxi datasets into geodataframes
taxi_fare_trip = gpd.GeoDataFrame(merged_dataset, crs="epsg:4326",geometry=gpd.points_from_xy(merged_dataset.pickup_longitude, merged_dataset.pickup_latitude))

In [7]:
# Spatial Join by geometry, where co-ordinates of pick-up location is within the boundary of taxi zones
joined = gpd.sjoin(
        taxi_zones,
        taxi_fare_trip, 
        how='left'    )

  warn(


In [8]:
# Check to visualise outcome of join:
print(joined.shape)
print(taxi_fare_trip.shape)

(1589234, 32)
(1621917, 28)


In [9]:
def create_choropleth(dataset, target_variable, group_variable):
  mymap = folium.Map(location=[40.75055690397181, -73.97406181019522], zoom_start=11,tiles=None)
  folium.TileLayer('CartoDB positron',name="Dark Map",control=False).add_to(mymap)
  myscale = (dataset[target_variable].quantile((0,0.1,0.75,0.9,0.98,1))).tolist()

  # capturing the return of folium.Choropleth()
  cp = folium.Choropleth(
      geo_data=dataset,
      name='Choropleth',
      data=dataset,
      columns=[group_variable,target_variable],
      key_on=f"feature.properties.{group_variable}",
      fill_color='YlGnBu',
      threshold_scale=myscale,
      fill_opacity=1,
      line_opacity=0.2,
      legend_name=f'Proportion % of {target_variable} by {group_variable}',
      smooth_factor=0
    ).add_to(mymap)

  folium.GeoJsonTooltip([target_variable, group_variable]).add_to(cp.geojson)
    
  folium.LayerControl().add_to(mymap)

  return mymap

In [10]:
import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash
import pyproj
# pyproj.Proj("+init=epsg:4326")

#
all_options = {
    'No filter':list(['None']),
    'Passengers': list(set(i for i in joined["Passengers"] if pd.notna(i))),
    'Weekday': list(set(i for i in joined["Weekday"] if pd.notna(i))),
    'Vendor': list(set(i for i in joined["Vendor"] if pd.notna(i))),
}

# Build App

app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1("New York City Taxi Dashboard"),
    
    ##Dash_HTML component where the map is displayed within an Iframe. Not included in the callback, but on the layout page
    html.Iframe(id='map', srcDoc=None, width='100%', height='400'), 
        
    html.Label([
        "Choose statistic:",
        dcc.Dropdown(
            id='target', clearable=False,
            value='number_of_trips', options=['trip_distance', 'trip_time_in_secs', 'fare_amount', 'number_of_trips'])
    ], style={'width': '48%', 'display': 'inline-block', 'fontSize': '16px'}),
    html.Label([
        "Group by:",
        dcc.Dropdown(
            id='groupby', clearable=False,
            value='borough', options=['zone', 'borough'])
    ], style={'width': '48%', 'display': 'inline-block', 'fontSize': '16px'}),

    html.Hr(),

    html.Label([
        "Filter by:",
    dcc.RadioItems(
        list(all_options.keys()),
        'No filter',
        id='column-radio',
    ) ], style={'width': '48%', 'display': 'inline-block', 'fontSize': '16px'}),
    html.Label([
        "Select value:",
    dcc.RadioItems( id='values-radio')], style={'width': '48%', 'display': 'inline-block', 'fontSize': '16px'}),

    html.Hr(),
    
    dcc.Graph(id = 'live-graph'),
    
#    dcc.Graph(id = 'graph-multivariate', style={'width': '48%', 'display': 'inline-block', 'fontSize': '16px'})
    
])
@app.callback(
    Output('values-radio', 'options'),
    Input('column-radio', 'value'))
def set_cities_options(selected_country):
    return [{'label': i, 'value': i} for i in all_options[selected_country]]


@app.callback(
    Output('values-radio', 'value'),
    Input('values-radio', 'options'))
def set_cities_value(available_options):
    return available_options[0]['value']


@app.callback(dash.dependencies.Output('map', 'srcDoc'),
                [dash.dependencies.Input('target', 'value'),
                dash.dependencies.Input('groupby', 'value'),
                dash.dependencies.Input('column-radio', 'value'),
                dash.dependencies.Input('values-radio', 'value')
                ]
    )

def update_folium_map( target, groupby, filter_column, filter_value):
    """  
        From a dataset, create a folium choropleth map, filtering dataset by parameters, then grouping by groupby variable in map
        and using target as measure
    """
    
    dataset = joined.copy()

    if (filter_column and filter_value) and filter_column != 'No filter':
            dataset = dataset[dataset[filter_column] == filter_value]
            
    aggregated = dataset.groupby([groupby])
    counts = aggregated.size().to_frame(name='number_of_trips')
    prepped = (counts.join(aggregated.agg({'trip_distance':'mean'}))
        .join(aggregated.agg({'trip_time_in_secs':'mean'}))
        .join(aggregated.agg({'fare_amount':'mean'}))
        .join(aggregated.agg({'total_amount': 'mean'}))
        .reset_index() )
    merged = taxi_zones.merge(prepped, left_on=groupby, right_on=groupby)  

    m = create_choropleth(merged,  target, groupby)
    
    #Save the map in a .html file
    m.save("mymapnew.html")
    return open('mymapnew.html', 'r').read()


@app.callback(
    Output('live-graph', 'figure'),
    [ dash.dependencies.Input('target', 'value'),
                dash.dependencies.Input('groupby', 'value'),
                dash.dependencies.Input('column-radio', 'value'),
                dash.dependencies.Input('values-radio', 'value') ]
)
def update_histogram(target, groupby, filter_column, filter_value):
    dataset = joined.copy() 
    if filter_column == 'No filter':
        filter_column = groupby
    if target == 'number_of_trips':
        target = 'fare_amount'
    barchart = px.histogram(
        data_frame=dataset,
        x=target,
        color=filter_column,
        opacity=1,
        barmode='group')
    return barchart

# @app.callback(
#     Output('graph-multivariate', 'figure'),
#     [ dash.dependencies.Input('target', 'value'),
#                 dash.dependencies.Input('groupby', 'value'),
#                 dash.dependencies.Input('column-radio', 'value'),
#                 dash.dependencies.Input('values-radio', 'value') ]
# )
# def update_histogram(target, groupby, filter_column, filter_value):
#     dataset = joined.copy()
#     if filter_column == 'No filter':
#         filter_column = groupby
#     if target == 'number_of_trips':
#         target = 'fare_amount'
#     barchart = px.bar(
#         data_frame=dataset,
#         x=filter_column,
#         y=target)
#     return barchart

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


In [11]:
# Run app and display result inline in the notebook
app.run_server(mode='external', port=8221)

Dash app running on http://127.0.0.1:8221/
