In [1]:
from census import Census
from config import (api_key, gkey)
import gmaps
import numpy as np
import pandas as pd
import requests
import time
from scipy.stats import linregress
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Census & gmaps API Keys

c = Census(api_key, year=2019)


# Configure gmaps
gmaps.configure(api_key=gkey)

print(api_key)
print(gkey)

704c072b5bb03ba8ab6e2f758756a33931eebc8d
AIzaSyA0RYZHOmd0A6AagliHke2J4YmG0bj93ik


In [2]:
gkey

'AIzaSyA0RYZHOmd0A6AagliHke2J4YmG0bj93ik'

In [3]:
locations_df = pd.read_csv("Cleaned.csv")
locations_df.head()

Unnamed: 0.1,Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,Michelin Star Count
0,0,Kilian Stuba,2019,47.34858,10.17114,Kleinwalsertal,Austria,87568,Creative,$$$$$,https://guide.michelin.com/at/en/vorarlberg/kl...,1
1,1,Pfefferschiff,2019,47.83787,13.07917,Hallwang,Austria,5300,Classic cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...,1
2,2,Esszimmer,2019,47.80685,13.03409,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...,1
3,3,Carpe Diem,2019,47.80001,13.04006,Salzburg,Austria,5020,Market cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...,1
4,4,Edvard,2019,48.216503,16.36852,Wien,Austria,1010,Modern cuisine,$$$$,https://guide.michelin.com/at/en/vienna/wien/r...,1


In [4]:
locations_df.rename(columns = {"Michelin Star Count":"michelin_star_count"}, inplace = True)

In [5]:
regionsUS_df = locations_df.loc[(locations_df['region'] == "California") | (locations_df['region'] == "New York City") | (locations_df['region'] == "Chicago") | (locations_df['region'] == "Washington DC")]
regionsUS_df

Unnamed: 0.1,Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,michelin_star_count
12,12,Chez TJ,2019,37.394680,-122.08044,South San Francisco,California,94041,Contemporary,$$$$,https://guide.michelin.com/us/en/california/so...,1
13,13,Protégé,2019,37.427853,-122.14362,South San Francisco,California,94301,Contemporary,$$$,https://guide.michelin.com/us/en/california/so...,1
14,14,Madera,2019,37.420140,-122.21151,San Francisco,California,94025,Contemporary,$$$,https://guide.michelin.com/us/en/california/sa...,1
15,15,The Village Pub,2019,37.428970,-122.25178,San Francisco,California,94062,Contemporary,$$$,https://guide.michelin.com/us/en/california/sa...,1
16,16,Plumed Horse,2019,37.256480,-122.03537,South San Francisco,California,95070,Contemporary,$$$$,https://guide.michelin.com/us/en/california/so...,1
...,...,...,...,...,...,...,...,...,...,...,...,...
681,22,Per Se,2019,40.768280,-73.98292,New York,New York City,10019,Contemporary,$$$$,https://guide.michelin.com/us/en/new-york-stat...,3
682,23,Le Bernardin,2019,40.761770,-73.98223,New York,New York City,10019,Seafood,$$$$,https://guide.michelin.com/us/en/new-york-stat...,3
683,24,Eleven Madison Park,2019,40.741700,-73.98712,New York,New York City,10010,Contemporary,$$$$,https://guide.michelin.com/us/en/new-york-stat...,3
684,25,Chef's Table at Brooklyn Fare,2019,40.688720,-73.98581,New York,New York City,10018,Contemporary,$$$$,https://guide.michelin.com/us/en/new-york-stat...,3


In [6]:
US_url = regionsUS_df["url"].tolist()

In [7]:
restaurant_name = locations_df["name"].tolist()

In [8]:
#International map - displays Restaurant Name
marker_locations = locations_df[['latitude', 'longitude']]

# Create a marker_layer using the poverty list to fill the info box
fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations,
    info_box_content=[f"Restaurant Name: {name}" for name in restaurant_name])
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(height='420px'))

In [9]:
#US Only - displays URL from CSV
US_marker_locations = regionsUS_df[['latitude', 'longitude']]

# Create a marker_layer using the poverty list to fill the info box
fig = gmaps.figure()
US_markers = gmaps.marker_layer(US_marker_locations,
    info_box_content=[f"Michelin Guide URL: {url}" for url in US_url])
fig.add_layer(US_markers)
fig

Figure(layout=FigureLayout(height='420px'))

In [10]:
# regionsUS_df.head()

# def make_clickable(val):
#     return f'<a target="_blank" href="{val}">{val}</a>'

# regionsUS_df.style.format({'url': make_clickable})

In [11]:
int_star_count = locations_df.michelin_star_count.astype(float)

In [12]:
fig = gmaps.figure()

heat_layer_int = gmaps.heatmap_layer(marker_locations, weights=int_star_count, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)

fig.add_layer(heat_layer_int)


fig

Figure(layout=FigureLayout(height='420px'))

In [13]:
star_count = regionsUS_df.michelin_star_count.astype(float)

In [14]:
#US-only, heat map and restaurant markers

fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(US_marker_locations, weights=star_count, 
                                 dissipating=False, max_intensity=100,
                                 point_radius = 1)

fig.add_layer(heat_layer)
fig.add_layer(US_markers)

fig

Figure(layout=FigureLayout(height='420px'))

In [15]:
#use Heat Map from gold-star project per Alexander

In [16]:
unique_cuisines = locations_df.cuisine.unique().tolist()
unique_cuisines

['Creative',
 'Classic cuisine',
 'Market cuisine',
 'Modern cuisine',
 'Japanese',
 'Vegetarian',
 'Contemporary',
 'Indian',
 'Korean',
 'Californian',
 'American',
 'International',
 'Moroccan',
 'Thai',
 'Chinese',
 'Fusion',
 'Italian',
 'French',
 'Asian',
 'Steakhouse',
 'Mexican',
 'Gastropub',
 'Danish',
 'Classic French',
 'Finnish',
 'Mediterranean',
 'Seafood',
 'Cantonese',
 'Dim Sum',
 'Teppanyaki',
 'Sushi',
 'Shanghainese',
 'Noodles and congee',
 'Innovative',
 'Cantonese Roast Meats',
 'Taizhou',
 'Sichuan',
 'European contemporary',
 'Scandinavian',
 'Austrian',
 'Spanish',
 'Modern British',
 'modern',
 'Asian influences',
 'creative',
 'French contemporary',
 'Korean contemporary',
 'Temple cuisine',
 'Street Food',
 'Fujian',
 'Australian',
 'Italian contemporary',
 'Barbecue',
 'Peranakan',
 'Meats and grills',
 'Taiwanese',
 'Hang Zhou',
 'Asian contemporary',
 'Thai Contemporary',
 'Southern Thai',
 'European',
 'Creative British',
 'Traditional British',
 'Reg

In [17]:
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name",
                                      "zip code tabulation area": "zipCode"})

# Add in Employment Rate (Employment Count / Population)
census_pd["Unemployment Rate"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)


#Final Dataframe
census_pd = census_pd[["zipCode", "Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Unemployment Count", "Unemployment Rate"]]

census_pd.head()

Unnamed: 0,zipCode,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Unemployment Count,Unemployment Rate
0,601,ZCTA5 00601,17113.0,41.9,14361.0,7493.0,10552.0,1968.0,11.500029
1,602,ZCTA5 00602,37751.0,42.9,16807.0,9694.0,18653.0,1420.0,3.76149
2,603,ZCTA5 00603,47081.0,42.1,16049.0,11259.0,23691.0,3197.0,6.790425
3,606,ZCTA5 00606,6392.0,44.3,12119.0,6093.0,4185.0,169.0,2.64393
4,610,ZCTA5 00610,26686.0,42.7,19898.0,10572.0,12204.0,1029.0,3.855954


In [18]:
regionsUS_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 202 entries, 12 to 689
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Unnamed: 0           202 non-null    int64  
 1   name                 202 non-null    object 
 2   year                 202 non-null    int64  
 3   latitude             202 non-null    float64
 4   longitude            202 non-null    float64
 5   city                 202 non-null    object 
 6   region               202 non-null    object 
 7   zipCode              202 non-null    object 
 8   cuisine              202 non-null    object 
 9   price                202 non-null    object 
 10  url                  202 non-null    object 
 11  michelin_star_count  202 non-null    int64  
dtypes: float64(2), int64(3), object(7)
memory usage: 20.5+ KB


In [19]:
mergedcensus_df = pd.merge(regionsUS_df, census_pd, on = "zipCode", how = "inner")
mergedcensus_df.head()

Unnamed: 0.1,Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,michelin_star_count,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Unemployment Count,Unemployment Rate
0,12,Chez TJ,2019,37.39468,-122.08044,South San Francisco,California,94041,Contemporary,$$$$,https://guide.michelin.com/us/en/california/so...,1,ZCTA5 94041,14572.0,34.6,139818.0,81705.0,815.0,361.0,2.477354
1,13,Protégé,2019,37.427853,-122.14362,South San Francisco,California,94301,Contemporary,$$$,https://guide.michelin.com/us/en/california/so...,1,ZCTA5 94301,16582.0,44.9,159148.0,110423.0,1153.0,357.0,2.152937
2,14,Madera,2019,37.42014,-122.21151,San Francisco,California,94025,Contemporary,$$$,https://guide.michelin.com/us/en/california/sa...,1,ZCTA5 94025,43392.0,38.3,161827.0,85240.0,3103.0,843.0,1.942754
3,15,The Village Pub,2019,37.42897,-122.25178,San Francisco,California,94062,Contemporary,$$$,https://guide.michelin.com/us/en/california/sa...,1,ZCTA5 94062,28423.0,45.0,178480.0,95838.0,1885.0,420.0,1.477677
4,16,Plumed Horse,2019,37.25648,-122.03537,South San Francisco,California,95070,Contemporary,$$$$,https://guide.michelin.com/us/en/california/so...,1,ZCTA5 95070,31196.0,50.2,193617.0,94484.0,1455.0,527.0,1.689319


In [20]:
mergedcensus_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 180 entries, 0 to 179
Data columns (total 20 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Unnamed: 0           180 non-null    int64  
 1   name                 180 non-null    object 
 2   year                 180 non-null    int64  
 3   latitude             180 non-null    float64
 4   longitude            180 non-null    float64
 5   city                 180 non-null    object 
 6   region               180 non-null    object 
 7   zipCode              180 non-null    object 
 8   cuisine              180 non-null    object 
 9   price                180 non-null    object 
 10  url                  180 non-null    object 
 11  michelin_star_count  180 non-null    int64  
 12  Name                 180 non-null    object 
 13  Population           180 non-null    float64
 14  Median Age           180 non-null    float64
 15  Household Income     180 non-null    flo

In [21]:
mergedcensus_df.price.value_counts()

$$$$    118
$$$      45
$$       17
Name: price, dtype: int64

In [22]:
mergedcensus_df.columns

Index(['Unnamed: 0', 'name', 'year', 'latitude', 'longitude', 'city', 'region',
       'zipCode', 'cuisine', 'price', 'url', 'michelin_star_count', 'Name',
       'Population', 'Median Age', 'Household Income', 'Per Capita Income',
       'Poverty Count', 'Unemployment Count', 'Unemployment Rate'],
      dtype='object')

In [23]:
#regression

In [24]:
feature = mergedcensus_df[['Population', 'Median Age', 'Household Income', 'Per Capita Income',
       'Poverty Count', 'Unemployment Count', 'Unemployment Rate', 'michelin_star_count']]
target = mergedcensus_df["price"]

reg = LogisticRegression()
reg.fit(np.array(feature), np.array(target))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

In [25]:
predicted = reg.predict(feature)
actual = target
probs = reg.predict_proba(feature)

In [26]:
predicted

array(['$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$', '$$', '$$',
       '$$', '$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',
       '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$', '$$$$',

In [27]:
actual

0      $$$$
1       $$$
2       $$$
3       $$$
4      $$$$
       ... 
175    $$$$
176    $$$$
177    $$$$
178    $$$$
179    $$$$
Name: price, Length: 180, dtype: object

In [28]:
probs

array([[0.07574935, 0.28126144, 0.6429892 ],
       [0.03103115, 0.20415998, 0.76480887],
       [0.13178075, 0.30858274, 0.55963651],
       [0.07642574, 0.25900651, 0.66456776],
       [0.09859288, 0.29648585, 0.60492127],
       [0.11192692, 0.30738191, 0.58069117],
       [0.2214439 , 0.35416454, 0.42439156],
       [0.11995433, 0.34186283, 0.53818285],
       [0.26237597, 0.34147989, 0.39614414],
       [0.26237597, 0.34147989, 0.39614414],
       [0.26237597, 0.34147989, 0.39614414],
       [0.26237557, 0.34147866, 0.39614577],
       [0.26237557, 0.34147866, 0.39614577],
       [0.06833885, 0.20077151, 0.73088964],
       [0.06833885, 0.20077151, 0.73088964],
       [0.06833885, 0.20077151, 0.73088964],
       [0.06833885, 0.20077151, 0.73088964],
       [0.01073561, 0.14847727, 0.84078712],
       [0.01073551, 0.14847533, 0.84078916],
       [0.09714326, 0.2902883 , 0.61256844],
       [0.09714326, 0.2902883 , 0.61256844],
       [0.09714326, 0.2902883 , 0.61256844],
       [0.

In [29]:
print(confusion_matrix(actual, predicted))

[[  1   0  16]
 [  0   0  45]
 [  3   2 113]]


In [30]:
print(classification_report(actual, predicted))

              precision    recall  f1-score   support

          $$       0.25      0.06      0.10        17
         $$$       0.00      0.00      0.00        45
        $$$$       0.65      0.96      0.77       118

    accuracy                           0.63       180
   macro avg       0.30      0.34      0.29       180
weighted avg       0.45      0.63      0.52       180

