In [21]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# read in the data (note that this is a subsample)
df = pd.read_csv("sample.csv")

df.tail()

Unnamed: 0,VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,...,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,PickupCell,DropoffCell
99995,2,4/12/15 22:55,4/12/15 23:03,1,0.75,-73.99437,40.746239,1,N,-73.980774,...,2,6.5,0.5,0.5,0.0,0.0,0.3,7.8,25:69,27:68
99996,1,4/12/15 22:55,4/12/15 23:08,1,2.4,-73.968346,40.759735,1,N,-73.969879,...,2,11.0,0.5,0.5,0.0,0.0,0.3,12.3,27:64,24:60
99997,1,4/12/15 22:55,4/12/15 23:01,1,0.8,-73.993484,40.742168,1,N,-73.98439,...,1,6.0,0.5,0.5,1.45,0.0,0.3,8.75,25:69,26:67
99998,2,4/12/15 22:55,4/12/15 23:17,1,4.73,-73.984993,40.747929,1,N,-73.981552,...,1,18.5,0.5,0.5,3.96,0.0,0.3,23.76,26:68,33:76
99999,2,4/12/15 22:55,4/12/15 22:59,2,0.8,-73.975731,40.751968,1,N,-73.981247,...,1,4.5,0.5,0.5,1.16,0.0,0.3,6.96,27:66,27:68


In [3]:
# base folium map
import folium
COORDS = ['pickup_latitude', 'pickup_longitude']
Mcoords = df[COORDS].describe().loc[["50%"]].values[0]
nycM = folium.Map(location=Mcoords, tiles="Stamen Terrain", zoom_start=10)

# save plot
nycM.save('foliumBaseMap.html')
nycM

In [4]:
from bokeh.plotting import figure, show

# tile providers are the underlying map 
from bokeh.tile_providers import get_provider, Vendors

# to display bokeh plots inside jupyter, we need to use output_notebook
from bokeh.io import reset_output, output_notebook
reset_output()
output_notebook()
# note below that it says "BokehJS 1.4.0 successfully loaded."

In [5]:
def lat2mercer(coords):
    """
    Function which converts latitude to its mercer coordinate representation
    """
    k = 6378137
    converted = list()
    for lat in coords:
        converted.append(np.log(np.tan((90 + lat) * np.pi/360.0)) * k)
    return converted

def lon2mercer(coords):
    """
    Function which converts longitude to its mercer coordinate representation
    """
    k = 6378137
    converted = list()
    for lon in coords:
        converted.append(lon * (k * np.pi/180.0))
    return converted

In [9]:
# Bbox = Boundary box
Bbox = df[COORDS].describe().loc[['min','max']].values
yRange, xRange = sorted(i[0] for i in Bbox), sorted(i[1] for i in Bbox)
# for each map type in list of Vendors
for mapType in Vendors:
    # create plot with coords
    p = figure(x_range=lon2mercer(xRange), y_range=lat2mercer(yRange),
           x_axis_type="mercator", y_axis_type="mercator")
    # add underlying tile from provider
    p.add_tile(get_provider(mapType))
    p.title.text = mapType
    
    # display
    show(p)

In [10]:
TILE = get_provider("STAMEN_TERRAIN_RETINA")

pPickup = figure(x_range=lon2mercer(xRange), y_range=lat2mercer(yRange),
       x_axis_type="mercator", y_axis_type="mercator")
pPickup.add_tile(TILE)
pPickup.title.text = "Pickups in NYC"

In [14]:
df['pickupX'] = df['pickup_longitude'].apply(lambda x: lon2mercer([x])[0])
df['pickupY'] = df['pickup_latitude'].apply(lambda x: lat2mercer([x])[0])

pPickup.circle(x='pickupX', y='pickupY', 
         size=5, fill_color="blue", fill_alpha=0.5, 
         source=df[['pickupX','pickupY']])




In [17]:
show(pPickup)

In [22]:
df.to_feather('sample.feather')