In [1]:
import pandas as pd
import plotly.offline as py
import plotly.graph_objs as go

py.offline.init_notebook_mode(connected=True)

## Loading the data

In [2]:
def load_data(city):
    locations = pd.read_csv('city/' +city +'/locations.csv')
    pickup_times =pd.read_csv('city/' +city +'/pickup_times.csv')
    
    return locations, pickup_times
    

In [3]:
locations, pickup_times = load_data('helsinki')

## Calculating location medians

In [5]:
import warnings
warnings.filterwarnings('always')

In [41]:
def locations_median(day, start_hour, end_hour):

    start = '' + day + 'T' + start_hour + 'Z'
    end = '' + day + 'T' + end_hour + 'Z'

    pickup_times_slice = pickup_times[(pickup_times['iso_8601_timestamp'] > start) 
    & (pickup_times['iso_8601_timestamp'] <= end)]

    pickup_times_by_location = pickup_times_slice.groupby('location_id')
    location_medians = pickup_times_by_location['pickup_time'].median()

    location_indexes = location_medians.index.values
    location_medians = location_medians

    location_medians_with_index = pd.DataFrame(data={'location_id': location_indexes, 'median': location_medians},
    columns=['location_id', 'median'])
    
    #Ambiquity error fix for pandas 0.24 released Jan 25
    location_medians_with_index.index.name = 'index'
    
    joined = location_medians_with_index.merge(locations, left_on='location_id', right_on='location_id', how='outer')

    return joined

In [42]:
example_location_medians = locations_median('2019-01-13', '15:00', '16:00')

In [43]:
example_location_medians.head()

Unnamed: 0,location_id,median,longitude,latitude
0,1,22.0,24.941583,60.17087
1,2,29.0,24.934201,60.169802
2,3,16.5,24.938493,60.166408
3,4,29.0,24.940467,60.167048
4,5,20.0,24.938793,60.167518


## Visualizing location medians

In [17]:
mapbox_access_token='pk.eyJ1IjoiYW50aWthc3N0dWRlbnQiLCJhIjoiY2pyZXZmbGp5MDczaDN5bGNmZHg5bDVnbSJ9.fFX_URUh20VXDmCwUhRSpw'

In [18]:
def formation_location_text(location_data):
    location_text = []
    
    locations = location_data.location_id
    medians = location_data['median']

    for row in location_data.iterrows(): 
        text = 'id: ' + str(row[1]['location_id']) + ' Md: ' + str(row[1]['median']) + ''
        location_text.append(text)
            
    return location_text


def visualize_location_medians(location_data):
    
    data = [
    go.Scattermapbox(
        lon=location_data.longitude,
        lat=location_data.latitude,
        mode = 'markers',
        text = formation_location_text(location_data),
        marker=dict(
            size=10,
            color=location_data['median'],
            colorbar=dict(
                title='Median (Min)',
                dtick =2),
            colorscale='RdBu'
            ),
        ),
    ]
    
    layout = go.Layout(
        autosize=True,
        hovermode='closest',
        mapbox=dict(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat=60.17,
                lon=24.94
            ),
        pitch=0,
        zoom=10
        ),
        
        title= 'Helsinki restaurant median pickup times'
    )
    
    fig = go.Figure(data=data, layout=layout)
    py.iplot(fig)
    


In [19]:
visualize_location_medians(example_location_medians)

## Location medians to csv

In [20]:
def export_to_csv(locations, filename):
    locations.to_csv(filename, index=False)    

In [21]:
export_to_csv(example_location_medians[['location_id', 'median']], 'example-location-medians.csv')