# Exploratory notebook for final project. 

In [4]:
# general imports
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import shapefile as shp
import json
import requests
import datetime
import warnings 
warnings.filterwarnings("ignore")
# itertools handles the cycling
import itertools 
from pprint import pprint

# bokeh imports
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, FactorRange,GMapOptions,Dropdown,CDSView, BooleanFilter
from bokeh.models import Legend, HoverTool

from bokeh.core.properties import value
from bokeh.transform import factor_cmap
# select a palette
from bokeh.palettes import Spectral3
from bokeh.palettes import Category20b_13 as palette
from bokeh.palettes import Category20b_14 as palette2
from bokeh.transform import dodge
from bokeh.io import reset_output, show
from bokeh.plotting import gmap

# sklearn imports
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc
from sklearn import datasets, metrics, model_selection, svm
from sklearn.metrics import classification_report

sns.set(style='darkgrid', palette='muted', color_codes=True)

# Magic command useful jupyter notebook
%matplotlib inline

#Set plot size.
plt.rcParams['figure.figsize'] = [13,7]

#Set font size
plt.rcParams.update({'font.size':16})

In [6]:
df = pd.read_csv('DOHMH_New_York_City_Restaurant_Inspection_Results.csv')

In [7]:
df.columns

Index(['CAMIS', 'DBA', 'BORO', 'BUILDING', 'STREET', 'ZIPCODE', 'PHONE',
       'CUISINE DESCRIPTION', 'INSPECTION DATE', 'ACTION', 'VIOLATION CODE',
       'VIOLATION DESCRIPTION', 'CRITICAL FLAG', 'SCORE', 'GRADE',
       'GRADE DATE', 'RECORD DATE', 'INSPECTION TYPE', 'Latitude', 'Longitude',
       'Community Board', 'Council District', 'Census Tract', 'BIN', 'BBL',
       'NTA'],
      dtype='object')

In [8]:
df.count()

CAMIS                    402052
DBA                      401681
BORO                     402052
BUILDING                 401788
STREET                   402050
ZIPCODE                  396568
PHONE                    402035
CUISINE DESCRIPTION      402052
INSPECTION DATE          402052
ACTION                   400764
VIOLATION CODE           396492
VIOLATION DESCRIPTION    393097
CRITICAL FLAG            393097
SCORE                    385289
GRADE                    203739
GRADE DATE               201077
RECORD DATE              402052
INSPECTION TYPE          400764
Latitude                 401630
Longitude                401630
Community Board          396148
Council District         396160
Census Tract             396160
BIN                      394401
BBL                      401630
NTA                      396148
dtype: int64

In [9]:
df.isnull().sum()

CAMIS                         0
DBA                         371
BORO                          0
BUILDING                    264
STREET                        2
ZIPCODE                    5484
PHONE                        17
CUISINE DESCRIPTION           0
INSPECTION DATE               0
ACTION                     1288
VIOLATION CODE             5560
VIOLATION DESCRIPTION      8955
CRITICAL FLAG              8955
SCORE                     16763
GRADE                    198313
GRADE DATE               200975
RECORD DATE                   0
INSPECTION TYPE            1288
Latitude                    422
Longitude                   422
Community Board            5904
Council District           5892
Census Tract               5892
BIN                        7651
BBL                         422
NTA                        5904
dtype: int64

In [10]:
df[['GRADE', 'SCORE']].head(50)

Unnamed: 0,GRADE,SCORE
0,A,13.0
1,B,19.0
2,,75.0
3,A,12.0
4,A,11.0
5,,10.0
6,,52.0
7,A,11.0
8,A,7.0
9,A,9.0


In [11]:
df_unique = df[['CAMIS', 'CUISINE DESCRIPTION', 'STREET', 'SCORE', 'Latitude','Longitude', 'DBA', 'BORO']].dropna()

In [12]:
df_loc = df_unique.groupby(['CAMIS', 'CUISINE DESCRIPTION', 'STREET', 'Latitude','Longitude', 'DBA', 'BORO'],as_index=False)['SCORE'].mean()

In [13]:
df_loc

Unnamed: 0,CAMIS,CUISINE DESCRIPTION,STREET,Latitude,Longitude,DBA,BORO,SCORE
0,30075445,Bakery,MORRIS PARK AVE,40.848231,-73.855972,MORRIS PARK BAKE SHOP,Bronx,10.600000
1,30112340,Hamburgers,FLATBUSH AVENUE,40.662652,-73.962081,WENDY'S,Brooklyn,19.809524
2,30191841,Irish,WEST 57 STREET,40.767326,-73.984310,DJ REYNOLDS PUB AND RESTAURANT,Manhattan,10.888889
3,40356018,American,STILLWELL AVENUE,40.579920,-73.982090,RIVIERA CATERERS,Brooklyn,11.125000
4,40356483,Delicatessen,AVENUE U,40.620112,-73.906989,WILKEN'S FINE FOOD,Brooklyn,13.000000
...,...,...,...,...,...,...,...,...
25977,50103853,American,CLARKSON AVE,40.655755,-73.944580,D BUILDING CAFE,Brooklyn,2.000000
25978,50103876,Pizza/Italian,METROPOLITAN AVE,40.712469,-73.895242,FONTANA PIZZERIA,Queens,25.000000
25979,50103991,Pizza,TRINITY PL,40.709494,-74.011813,LAZZARO,Manhattan,2.000000
25980,50104160,Café/Coffee/Tea,ORCHARD ST,40.717976,-73.990296,SUNDAY TO SUNDAY,Manhattan,2.000000


In [14]:
cusines = df_loc['CUISINE DESCRIPTION'].unique()

In [15]:
cusines.tolist()

['Bakery',
 'Hamburgers',
 'Irish',
 'American',
 'Delicatessen',
 'Ice Cream, Gelato, Yogurt, Ices',
 'Hotdogs',
 'Jewish/Kosher',
 'Chinese',
 'Sandwiches/Salads/Mixed Buffet',
 'Caribbean',
 'Donuts',
 'Bagels/Pretzels',
 'Continental',
 'Pizza',
 'Soul Food',
 'Pizza/Italian',
 'Steak',
 'Italian',
 'Polish',
 'Latin (Cuban, Dominican, Puerto Rican, South & Central American)',
 'German',
 'French',
 'Spanish',
 'Café/Coffee/Tea',
 'Seafood',
 'Tex-Mex',
 'Bottled beverages, including water, sodas, juices, etc.',
 'Mexican',
 'Japanese',
 'Greek',
 'Thai',
 'Indian',
 'Mediterranean',
 'Russian',
 'Eastern European',
 'Chicken',
 'Ethiopian',
 'Barbecue',
 'Middle Eastern',
 'Korean',
 'Egyptian',
 'English',
 'Pancakes/Waffles',
 'Other',
 'Chinese/Cuban',
 'Asian',
 'Portuguese',
 'Indonesian',
 'Armenian',
 'Turkish',
 'Moroccan',
 'Hawaiian',
 'Vegetarian',
 'Filipino',
 'Juice, Smoothies, Fruit Salads',
 'Brazilian',
 'Vietnamese/Cambodian/Malaysia',
 'Soups & Sandwiches',
 'Af

In [16]:
cusines = ['Bakery',
 'Hamburgers',
 'Irish',
 'American',
 'Delicatessen',
 'Ice Cream, Gelato, Yogurt, Ices',
 'Hotdogs',
 'Jewish/Kosher',
 'Chinese',
 'Sandwiches/Salads/Mixed Buffet',
 'Caribbean',
 'Donuts',
 'Bagels/Pretzels',
 'Continental',
 'Pizza',
 'Soul Food',
 'Pizza/Italian',
 'Steak',
 'Italian']

In [17]:
test = df_loc.groupby(['Latitude','Longitude']).first()[['CUISINE DESCRIPTION', 'DBA']]

In [18]:
test.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,CUISINE DESCRIPTION,DBA
Latitude,Longitude,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,0.0,Continental,THE GEORGIAN SUITE KITCHEN
40.508069,-74.228282,American,SOUTH SHORE SWIMMING CLUB
40.509115,-74.246612,Mexican,DON CESAR
40.509175,-74.246464,Pizza/Italian,MANGIA
40.509829,-74.24761,American,B & E NEIGHBORHOOD PUB


In [19]:
help(GMapOptions)

Help on class GMapOptions in module bokeh.models.map_plots:

class GMapOptions(MapOptions)
 |  GMapOptions(*args, **kwargs)
 |  
 |  Options for ``GMapPlot`` objects.
 |  
 |  Method resolution order:
 |      GMapOptions
 |      MapOptions
 |      bokeh.model.Model
 |      bokeh.core.has_props.HasProps
 |      bokeh.util.callback_manager.PropertyCallbackManager
 |      bokeh.util.callback_manager.EventCallbackManager
 |      builtins.object
 |  
 |  Data descriptors defined here:
 |  
 |  map_type
 |      The `map type`_ to use for the ``GMapPlot``.
 |      
 |      .. _map type: https://developers.google.com/maps/documentation/javascript/reference#MapTypeId
 |  
 |  scale_control
 |      Whether the Google map should display its distance scale control.
 |  
 |  styles
 |      A JSON array of `map styles`_ to use for the ``GMapPlot``. Many example styles can
 |      `be found here`_.
 |      
 |      .. _map styles: https://developers.google.com/maps/documentation/javascript/reference#

In [28]:
# Test square
source = ColumnDataSource(df_loc)

circle = {}
items = []
colors = itertools.cycle(palette)

map_options = GMapOptions(lat=40.788231, lng=-73.955972, map_type="roadmap", zoom=11)
reset_output()
output_notebook()
# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
#TOOLS = "pan, wheel_zoom, box_zoom, box_select,reset, save" # the tools you want to add to your graph


p = gmap("AIzaSyC9oBzH-G-njsYE1jJ1iU97crMeiBCMthw", map_options, title="New York",plot_width=1100,plot_height=700)

for indx,i in enumerate(zip(cusines,colors)):
    booleans = (df_loc == i[0])['CUISINE DESCRIPTION']
    view1 = CDSView(source = source, filters=[BooleanFilter(booleans)])
    circle[i[0]] = p.circle(x="Longitude", y="Latitude", size=7,fill_alpha=0.8 ,source=source,view = view1,muted_alpha = False, muted = True,color=i[1])
#i stands for a column that we use, top=y; we are specifying that our numbers comes from column i
#read up what legend_label, muted and muted_alpha do... you can add more attributes (you HAVE TO)
    items.append((i[0], [circle[i[0]]])) ### figure where to add it
legend = Legend(items=items, location=(0,10),click_policy="mute") ## figure where to add it
p.add_layout(legend, 'right') ## figure where to add it

#add hover
hover = HoverTool()
hover.tooltips=[
    ('Name', '@DBA'), #$name provides data from legend
]
p.add_tools(hover)
### if you read the guide, it will make sense

show(p)


In [None]:
circle = {}
items = []
colors = itertools.cycle(palette)

map_options = GMapOptions(lat=40.848231, lng=-73.855972, map_type="roadmap", zoom=11)
reset_output()
output_notebook()
# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
TOOLTIPS = [
    ("Name", "@DBA")
]

p = gmap("AIzaSyC9oBzH-G-njsYE1jJ1iU97crMeiBCMthw", map_options, title="New York",plot_width=1100,plot_height=700)
p.add_tools(HoverTool(tooltips=TOOLTIPS))

for indx,i in enumerate(zip(cusines,colors)):
    source = ColumnDataSource(df_loc[df_loc['CUISINE DESCRIPTION']== i[0]])
    circle[i[0]] = p.circle(x="Longitude", y="Latitude", size=7,fill_alpha=0.8 ,source=source,muted_alpha = False, muted = True,color=i[1])
#i stands for a column that we use, top=y; we are specifying that our numbers comes from column i
#read up what legend_label, muted and muted_alpha do... you can add more attributes (you HAVE TO)
    items.append((i[0], [circle[i[0]]])) ### figure where to add it
legend = Legend(items=items, location=(0,10),click_policy="mute") ## figure where to add it
p.add_layout(legend, 'right') ## figure where to add it
### if you read the guide, it will make sense

show(p)



In [None]:
source = ColumnDataSource(df_loc)
colors = itertools.cycle(palette)

hours = [str(elem) for elem in df_hour.index.to_list()]

output_notebook()

p = figure(x_range = FactorRange(factors = hours),width=1000, height=400)


bar ={} # to store vbars
items = [] ### for the custom legend // you need to figure out where to add it
### here we will do a for loop:
for indx,i in enumerate(zip(focuscrimes,colors)):
    bar[i[0]] = p.vbar(x='Time', top=i[0],width = 0.9,source= source,muted_alpha = False, muted = True,color=i[1]) 
#i stands for a column that we use, top=y; we are specifying that our numbers comes from column i
#read up what legend_label, muted and muted_alpha do... you can add more attributes (you HAVE TO)
    items.append((i[0], [bar[i[0]]])) ### figure where to add it
legend = Legend(items=items, location=(0,20),click_policy="mute") ## figure where to add it
p.add_layout(legend, 'right') ## figure where to add it
### if you read the guide, it will make sense

In [None]:
for indx,i in enumerate(zip(cusines,colors)):
    print(indx,i)
    print(i[0])

## Api tryout

In [None]:
import requests
import pandas as pd
import folium

In [None]:
params = {'city': 'New York', 'per_page': '100'}
r = requests.get('http://opentable.herokuapp.com/api/restaurants',params=params)
json_response = r.json()
json_response

In [None]:
data = pd.DataFrame.from_dict(json_response['restaurants'])
data

In [None]:
def create_restdata():
    params = {'city': 'New York', 'per_page': '100','page': 1}
    r = requests.get('http://opentable.herokuapp.com/api/restaurants',params=params)
    json_response0 = r.json()
    data0 = pd.DataFrame.from_dict(json_response0['restaurants'])
    for i in range(2,18):
        params = {'city': 'New York', 'per_page': '100','page': i}
        r = requests.get('http://opentable.herokuapp.com/api/restaurants',params=params)
        json_response = r.json()
        data = pd.DataFrame.from_dict(json_response['restaurants'])
        data0 = [data0, data]
        data0 = pd.concat(data0)
    return data0

In [None]:
df = create_restdata()

In [None]:
df.to_csv('NYrestaurants.csv', index=False)

In [None]:
df = pd.read_csv('NYrestaurants.csv')

In [None]:
for index, row in df.iterrows():
    print(row['price'], row['name'])

In [None]:
mapz = folium.Map([40.77, -73.96], zoom_start=13)

In [None]:
mapz

In [None]:
subdf= df[df['price'] == 4]

for index, row in subdf.iterrows():
    folium.Marker([subdf.loc[index]['lat'], subdf.loc[index]['lng']], 
              popup=subdf.loc[index]['name']
             ).add_to(mapz)
mapz