# NYC Taxi data

## Import cuxfilter

In [None]:
from cuxfilter.charts import bokeh, cudatashader
import cuxfilter
from bokeh import palettes
from cuxfilter.layouts import double_feature

import cudf

In [None]:
#update data_dir if you have downloaded datasets elsewhere
DATA_DIR = '/home/ajay/data'

## Download required datasets

In [None]:
from cuxfilter.sampledata import datasets_check
datasets_check('nyc_taxi', base_dir=DATA_DIR)

## preprocess the data

In [None]:
!pip install pyproj

cudf_df = cudf.read_csv('./data/nyc_taxi.csv')

from pyproj import Proj, transform

inProj = Proj(init='epsg:4326') # Latitude and longitudes
outProj = Proj(init='epsg:3857') # 2D projected points

cudf_df['dropoff_x'], cudf_df['dropoff_y'] = transform(inProj, outProj, cudf_df['dropoff_longitude'].to_array(), cudf_df['dropoff_latitude'].to_array()) # Apply transformation

cudf_df = cudf_df.drop(['dropoff_latitude', 'dropoff_longitude'], axis=1)
cudf_df = cudf_df.dropna(axis=0)


cudf_df = cudf_df[(cudf_df.dropoff_x > -8239910.23) & (cudf_df.dropoff_x < -8229529.24) & (cudf_df.dropoff_y > 4968481.34) & (cudf_df.dropoff_y < 4983152.92)] # Filter over Manhattan


cudf_df.head()

## Read the dataset

In [None]:
cux_df = cuxfilter.DataFrame.from_dataframe(cudf_df)

## Define charts

In [None]:
from bokeh.tile_providers import get_provider as gp
tile_provider = gp('CARTODBPOSITRON')

> Uncomment the below lines and replace MAPBOX_TOKEN with mapbox token string if you want to use mapbox map-tiles. Can be created for free here -https://www.mapbox.com/help/define-access-token/

In [None]:
#from cuxfilter.assets.custom_tiles import get_provider, Vendors
#tile_provider = get_provider(Vendors.MAPBOX_LIGHT, access_token=MAPBOX_TOKEN)

In [None]:
chart1 = cuxfilter.charts.cudatashader.scatter_geo(x='dropoff_x',
                                         y='dropoff_y',
                                         aggregate_fn='count',
                                         tile_provider=tile_provider, x_range=(-8239910.23,-8229529.24), y_range=(4968481.34,4983152.92))

chart2 = cuxfilter.charts.bokeh.bar('passenger_count', data_points=9)

## Create a dashboard object

In [None]:
d = cux_df.dashboard([chart1, chart2], layout=double_feature, theme=cuxfilter.themes.dark, title= 'NYC TAXI DATASET')

In [None]:
#dashboard object
d

## Starting the dashboard

1. d.show('url you want the dashboard to run') remote dashboard

2. d.app('10.110.47.43:8888') within the notebook: If you are using jupyter remotely, use this line instead of second, and replace url with current notebook url

In [None]:
# d.show(url='replace this by url you want the dashboard to run at(including port)')
d.show('10.110.47.43:8889')

## Export the queried data into a dataframe

In [None]:
queried_df = d.export()