# Interactive NYC Taxi Fare Duration Prediction Project Deployment

In [1]:
from math import radians, cos, sin, asin, sqrt, atan2
import numpy as np
import pandas as pd
import pickle
from sklearn.externals import joblib
from geopy.geocoders import Nominatim
import shapefile as shp
from shapely.geometry import Point, Polygon
from ipyleaflet import Map, Marker, Polyline

## Geo Location - Manhattan Definition and distance calculation

`The bounderies (lat/long) are bad. lets take NYC center coordinates (40.7127, -74.0059) and consider as bounders 1 degree (aprox 69 Miles) in each direction.`

In [2]:
lat_min = 39.7
lat_max = 41.7
long_min = -75.0
long_max = -73.0

line = Polyline(
    locations=[[[[lat_min, long_min], [lat_min, long_max], [lat_max, long_max],
                 [lat_max, long_min], [lat_min, long_min]]]],
    color="green",
    fill_color="green")
m = Map(center=(40.7127, -74.0059), zoom=7)
m.add_layer(line)
m

Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

In [3]:
shp_path = "./data/NYC_BB/geo_export_8cc1e1df-036a-4a38-ae67-63772ec4de2a.shp"
sf = shp.Reader(shp_path)

In [4]:
def haver_dist_in_miles(lat1, lat2, long1, long2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians
    long1, lat1, long2, lat2 = map(radians, [long1, lat1, long2, lat2])

    # haversine formula
    dlong = long2 - long1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlong/2)**2
    c = 2 * asin(sqrt(a))
    r = 3956
    return c * r

In [5]:
def polygon_create(sf, id):  
    coords = list()
    shape_ex = sf.shape(id)
    x_lon = np.zeros((len(shape_ex.points),1))
    y_lat = np.zeros((len(shape_ex.points),1))
    for ip in range(len(shape_ex.points)):
        x_lon[ip] = shape_ex.points[ip][0]
        y_lat[ip] = shape_ex.points[ip][1]

    for i in range(len(x_lon)):
        data = ((x_lon[i], y_lat[i]))
        coords.append(data)  
    return Polygon(coords)

In [6]:
manh_poly = polygon_create(sf, 0)

## Loading and testing saved ML models

In [7]:
# Load the fare model from a pickled file
fare_model = joblib.load('./models/fare_model.pkl')

# Load the duration model from a pickled file
dur_model = joblib.load('./models/dur_model.pkl')

# Load the distance model from a pickled file
dis_model = joblib.load('./models/dis_model.pkl')

## Prediction functions with Geo Location

In [8]:
def estimate_nyc_taxi_fare_duration(pickup_longitude,
                                    pickup_latitude,
                                    dropoff_longitude,
                                    dropoff_latitude,
                                    RateCodeID=1,
                                    passenger_count=1,
                                    in_man=1,
                                    out_man=0):

    import datetime as dt
    hour = (dt.datetime.now().hour) - 2
    weekday = dt.datetime.now().weekday()

    haver_dist = haver_dist_in_miles(pickup_latitude, dropoff_latitude,
                                   pickup_longitude, dropoff_longitude)

    data_input = pd.DataFrame(
        {
            'passenger_count': passenger_count,
            'pickup_longitude': pickup_longitude,
            'pickup_latitude': pickup_latitude,
            'RateCodeID': RateCodeID,
            'dropoff_longitude': dropoff_longitude,
            'dropoff_latitude': dropoff_latitude,
            'weekday': weekday,
            'hour': hour,
            'in_man': in_man,
            'out_man': out_man,
            'haver_dist': haver_dist
        },
        index=[0],
        columns=[
            'passenger_count', 'pickup_longitude', 'pickup_latitude',
            'RateCodeID', 'dropoff_longitude', 'dropoff_latitude', 'weekday',
            'hour', 'in_man', 'out_man', 'haver_dist'
        ])

    fare_pred = fare_model.predict(data_input)
    dur_pred = dur_model.predict(data_input)
    dis_pred = dis_model.predict(data_input)
    print("\nTrip has {} miles and will last {} minutes, with a basic cost od ${}".format(
        round(dis_pred[0], 2), round(dur_pred[0], 1), round(fare_pred[0], 2)))

In [9]:
def nyc_trip_estimation(start_loc, end_loc, passenger_count=1):

    geolocator = Nominatim(user_agent="mjrovai")
    start_point = geolocator.geocode(start_loc)
    end_point = geolocator.geocode(end_loc)

    sp = Point(start_point.longitude, start_point.latitude)
    sp = sp.within(manh_poly)
    ep = Point(end_point.longitude, end_point.latitude)
    ep = ep.within(manh_poly)

    if (sp == True) and (ep == True):
        in_man = 1
        trip = 'inside Manhattan'
    else:
        in_man = 0

    if (sp == False) and (ep == False):
        out_man = 1
        trip = 'outside Manhattan'
    else:
        out_man = 0

    if (in_man == 0) and (out_man == 0):
        trip = 'partially in Manhattan'
        
    if (sp == True) and (end_loc == 'JFK'):
        RateCodeID = 2
        Code = 'JFK'
    elif end_loc == 'EWR':
        RateCodeID = 3
        Code = 'EWR'
    else:
        RateCodeID = 1
        Code = 'Standard'

    estimate_nyc_taxi_fare_duration(
        start_point.longitude,
        start_point.latitude,
        end_point.longitude,
        end_point.latitude,
        RateCodeID=RateCodeID,
        passenger_count=passenger_count,
        in_man=in_man,
        out_man=out_man)

    print(
        "[Trip Info]: Trip {}; Rate Code: {}"
        .format(trip, Code))
    
    return start_point.latitude, start_point.longitude, end_point.latitude, end_point.longitude,

In [10]:
def plot_map(plat, plon, dlat, dlon):
    pickup = [plat, plon]
    dropoff = [dlat, dlon]

    map_center = ((plat + dlat) / 2), ((plon + dlon) / 2)
    m = Map(center=map_center)

    mp = Marker(location=pickup, draggable=True, opacity = 1, title = 'Pickup')
    m.add_layer(mp);

    md = Marker(location=dropoff, draggable=True, opacity = 0.5, title='Dropoff')
    m.add_layer(md);

    return m, mp, md

---
## Predictions

In [20]:
def predict_trip(start_loc, end_loc):
    plat, plon, dlat, dlon = nyc_trip_estimation(start_loc, end_loc)
    m, mp, md = plot_map(plat, plon, dlat, dlon)
    return m

In [21]:
start_loc = "Empire State Building"
end_loc = "Brooklyn Bridge"

predict_trip(start_loc, end_loc)


Trip has 5.31 miles and will last 25.6 minutes, with a basic cost od $20.88
[Trip Info]: Trip partially in Manhattan; Rate Code: Standard


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

- Google: 20 to 24 minutes and 3.7 to 5.2 miles
- TaxiFareFinder: $21.25, 5.1 miles and 14 min

In [41]:
start_loc = "Columbia University"
end_loc = "Empire State Building"

predict_trip(start_loc, end_loc)


Trip has 8.49 miles and will last 28.6 minutes, with a basic cost od $25.4
[Trip Info]: Trip inside Manhattan; Rate Code: Standard


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

- Google: 28 to 41 minutes; 5 to 7 miles
- TaxiFareFinder: $26.43, 5.8 miles and 23 minutes

In [42]:
start_loc = "Carnegie Hall"
end_loc = "EWR"

predict_trip(start_loc, end_loc)


Trip has 18.74 miles and will last 44.2 minutes, with a basic cost od $68.5
[Trip Info]: Trip partially in Manhattan; Rate Code: EWR


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

- Google: 34 to 38 minutes; 16.8 to 20.1 miles
- TaxiFareFinder: $54.04, 17 miles and 33 minutes

In [43]:
start_loc = "Hoboken NJ"
end_loc = "JFK"

predict_trip(start_loc, end_loc)


Trip has 18.95 miles and will last 78.3 minutes, with a basic cost od $64.56
[Trip Info]: Trip outside Manhattan; Rate Code: Standard


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

- Google: 62 to 67 minutes and 19.2 to 33.8 miles
- TaxiFareFinder: $67.45, 21.7 miles and 59 min

In [44]:
start_loc = "Empire State Building"
end_loc = "JFK"

predict_trip(start_loc, end_loc)


Trip has 17.01 miles and will last 67.0 minutes, with a basic cost od $52.0
[Trip Info]: Trip partially in Manhattan; Rate Code: JFK


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

- Google: 43 to 52 minutes and 15.2 to 20.4 miles
- TaxiFareFinder: $52.00 (special Fare), 17.6 miles and 42 min

In [45]:
start_loc = "152 W 10th St New York, NY"
end_loc = "249 E 65th St, New York, NY "

predict_trip(start_loc, end_loc)


Trip has 3.7 miles and will last 32.2 minutes, with a basic cost od $20.17
[Trip Info]: Trip partially in Manhattan; Rate Code: Standard


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

- Google: 29 to 31 min and 3.5 to 3.6 miles
- TaxiFareFinder: $20.35. 3.5 miles and 21 min

Veryfy Fare at *TaxiFareFinder*:
https://www.taxifarefinder.com/main.php?city=NY

In [12]:
start_loc = "Empire State Building"
end_loc = "World Trade Center"

predict_trip(start_loc, end_loc)


Trip has 4.12 miles and will last 22.0 minutes, with a basic cost od $17.73
[Trip Info]: Trip inside Manhattan; Rate Code: Standard


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

In [26]:
start_loc = "New York Marriott Downtown"
end_loc = "Empire State Building"

predict_trip(start_loc, end_loc)


Trip has 4.8 miles and will last 28.0 minutes, with a basic cost od $21.57
[Trip Info]: Trip inside Manhattan; Rate Code: Standard


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

## Getting  Fare/Duration directly from Map

In [11]:
def get_mark_locations():   
    d = 0.01
    NYC_center = [40.7227, -74.0039]
    pickup = [NYC_center[0]-d, NYC_center[1]-d]
    dropoff = [NYC_center[0]+d, NYC_center[1]+d]
    m = Map(center=NYC_center)

    mp = Marker(location=pickup, draggable=True, opacity = 1, title = 'Pickup')
    m.add_layer(mp);

    md = Marker(location=dropoff, draggable=True, opacity = 0.5, title='Dropoff')
    m.add_layer(md);

    return m, mp, md

In [12]:
m, mp, md = get_mark_locations()
m

Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

In [18]:
print ("Pickup location: ", mp.location)
print ("Dropoff location: ", md.location)

Pickup location:  [40.709548030891845, -74.01469945907594]
Dropoff location:  [40.748785032897864, -73.98560285568239]


In [16]:
estimate_nyc_taxi_fare_duration(mp.location[1],
                                    mp.location[0],
                                    md.location[1],
                                    md.location[0],
                                    RateCodeID=1,
                                    passenger_count=1,
                                    in_man=1,
                                    out_man=0)


Trip has 4.64 miles and will last 28.3 minutes, with a basic cost od $21.27
