# **Restaurant Star Ratings**

## **Imports**

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly
%matplotlib inline
from wordcloud import WordCloud
import re
import nltk
from nltk.corpus import stopwords

#Sklearn preprocessing
from sklearn import preprocessing,set_config
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder,StandardScaler,LabelEncoder
#Scipy
from scipy import stats
from scipy.stats import norm

#Sklearn Models
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn import preprocessing, set_config
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer,make_column_selector,make_column_transformer
from sklearn.linear_model import LogisticRegression
import warnings
from sklearn.decomposition import PCA

# Geospatial map
import folium
import pandas as pd
import geopandas as gpd
from folium.plugins import MarkerCluster

# Warnings 
import warnings 
warnings.filterwarnings("ignore")

# Configuring diagrams
set_config(display = 'diagram')

## **Import Data**

In [2]:
SR1 = pd.read_csv('F:\Coding Dojo\Data\Resturant Start Ratings\one-star-michelin-restaurants.csv') 

In [3]:
SR2 = pd.read_csv('F:/Coding Dojo/Data/Resturant Start Ratings/two-stars-michelin-restaurants.csv')

In [4]:
SR3 = pd.read_csv('F:/Coding Dojo/Data/Resturant Start Ratings/three-stars-michelin-restaurants.csv')

In [5]:
SR1.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,Kilian Stuba,2019,47.34858,10.17114,Kleinwalsertal,Austria,87568,Creative,$$$$$,https://guide.michelin.com/at/en/vorarlberg/kl...
1,Pfefferschiff,2019,47.83787,13.07917,Hallwang,Austria,5300,Classic cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
2,Esszimmer,2019,47.80685,13.03409,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
3,Carpe Diem,2019,47.80001,13.04006,Salzburg,Austria,5020,Market cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
4,Edvard,2019,48.216503,16.36852,Wien,Austria,1010,Modern cuisine,$$$$,https://guide.michelin.com/at/en/vienna/wien/r...


In [6]:
SR2.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,SENNS.Restaurant,2019,47.83636,13.06389,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
1,Ikarus,2019,47.79536,13.00695,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
2,Mraz & Sohn,2019,48.23129,16.37637,Wien,Austria,1200,Creative,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...
3,Konstantin Filippou,2019,48.21056,16.37996,Wien,Austria,1010,Modern cuisine,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...
4,Silvio Nickol Gourmet Restaurant,2019,48.20558,16.37693,Wien,Austria,1010,Modern cuisine,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...


In [7]:
SR3.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,Amador,2019,48.25406,16.35915,Wien,Austria,1190,Creative,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...
1,Manresa,2019,37.22761,-121.98071,South San Francisco,California,95030,Contemporary,$$$$,https://guide.michelin.com/us/en/california/so...
2,Benu,2019,37.78521,-122.39876,San Francisco,California,94105,Asian,$$$$,https://guide.michelin.com/us/en/california/sa...
3,Quince,2019,37.79762,-122.40337,San Francisco,California,94133,Contemporary,$$$$,https://guide.michelin.com/us/en/california/sa...
4,Atelier Crenn,2019,37.79835,-122.43586,San Francisco,California,94123,Contemporary,$$$$,https://guide.michelin.com/us/en/california/sa...


In [8]:
# add a 'Star Rating' Column to each Dataframe 
SR1['Star Rating'] = 1
SR2['Star Rating'] = 2
SR3['Star Rating'] = 3

In [9]:
# Combine all dataframes 
sr = pd.concat([SR1, SR2, SR3], ignore_index = True)

In [10]:
sr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 695 entries, 0 to 694
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         695 non-null    object 
 1   year         695 non-null    int64  
 2   latitude     695 non-null    float64
 3   longitude    695 non-null    float64
 4   city         693 non-null    object 
 5   region       695 non-null    object 
 6   zipCode      501 non-null    object 
 7   cuisine      695 non-null    object 
 8   price        519 non-null    object 
 9   url          695 non-null    object 
 10  Star Rating  695 non-null    int64  
dtypes: float64(2), int64(2), object(7)
memory usage: 59.9+ KB


In [11]:
sr.describe()

Unnamed: 0,year,latitude,longitude,Star Rating
count,695.0,695.0,695.0,695.0
mean,2018.943885,37.260124,4.417,1.261871
std,0.23031,18.025875,81.095189,0.54527
min,2018.0,-23.634005,-123.719444,1.0
25%,2019.0,25.040718,-73.98292,1.0
50%,2019.0,40.72713,-0.14957,1.0
75%,2019.0,51.508962,100.578855,1.0
max,2019.0,63.43626,127.10257,3.0


In [12]:
sr.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,Star Rating
0,Kilian Stuba,2019,47.34858,10.17114,Kleinwalsertal,Austria,87568,Creative,$$$$$,https://guide.michelin.com/at/en/vorarlberg/kl...,1
1,Pfefferschiff,2019,47.83787,13.07917,Hallwang,Austria,5300,Classic cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...,1
2,Esszimmer,2019,47.80685,13.03409,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...,1
3,Carpe Diem,2019,47.80001,13.04006,Salzburg,Austria,5020,Market cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...,1
4,Edvard,2019,48.216503,16.36852,Wien,Austria,1010,Modern cuisine,$$$$,https://guide.michelin.com/at/en/vienna/wien/r...,1


In [13]:
sr.isna().sum()

name             0
year             0
latitude         0
longitude        0
city             2
region           0
zipCode        194
cuisine          0
price          176
url              0
Star Rating      0
dtype: int64

In [14]:
def drop_columns(sr):
    # Check if 'zipcode' and 'price' columns exist in the dataframe
    if 'zipcode' in sr.columns and 'price' in sr.columns:
        # Drop the specified columns
        dcsr = sr.drop(['zipcode', 'price'], axis=1)
        print("Columns 'zipcode' and 'price' dropped successfully.")
    else:
        print("Columns 'zipcode' and 'price' not found.")
# Example usage:
# Assuming 'your_dataframe' is the name of your DataFrame
drop_columns(sr)        

Columns 'zipcode' and 'price' not found.


In [15]:
# Filter for Michelin star restaurants in New York
msny = sr[(sr['city'] == 'New York')]

In [16]:
print (msny)

                              name  year   latitude  longitude      city  \
204                      Del Posto  2019  40.743270  -74.00770  New York   
205      Le Grill de Joël Robuchon  2019  40.742897  -74.00770  New York   
206                       L'Appart  2019  40.711903  -74.01544  New York   
207                          Okuda  2019  40.743793  -74.00633  New York   
208                         Wallsé  2019  40.735380  -74.00814  New York   
..                             ...   ...        ...        ...       ...   
680                           Masa  2019  40.768550  -73.98335  New York   
681                         Per Se  2019  40.768280  -73.98292  New York   
682                   Le Bernardin  2019  40.761770  -73.98223  New York   
683            Eleven Madison Park  2019  40.741700  -73.98712  New York   
684  Chef's Table at Brooklyn Fare  2019  40.688720  -73.98581  New York   

            region zipCode       cuisine price  \
204  New York City   10011       Ital

In [17]:
msny.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,Star Rating
204,Del Posto,2019,40.74327,-74.0077,New York,New York City,10011.0,Italian,$$$$,https://guide.michelin.com/us/en/new-york-stat...,1
205,Le Grill de Joël Robuchon,2019,40.742897,-74.0077,New York,New York City,,French,$$$$,https://guide.michelin.com/us/en/new-york-stat...,1
206,L'Appart,2019,40.711903,-74.01544,New York,New York City,10281.0,French,$$$$,https://guide.michelin.com/us/en/new-york-stat...,1
207,Okuda,2019,40.743793,-74.00633,New York,New York City,,Japanese,$$$$,https://guide.michelin.com/us/en/new-york-stat...,1
208,Wallsé,2019,40.73538,-74.00814,New York,New York City,10014.0,Austrian,$$$$,https://guide.michelin.com/us/en/new-york-stat...,1


In [18]:
nyc_rsr = msny

In [19]:
# Save the filtered dataset to a new CSV file
nyc_rsr.to_csv('nyc_rsr', index=False)

In [20]:
# Function to retrive list of Restaurant names from Michelin Star dataset to filter out of Inspections data
def compile_unique_restaurant_names(nyc_rsr, name):
    """
    Compiles a list of unique restaurant names from a given dataset.
Args:
    - dataset_filename (str): The filename of the dataset containing restaurant names.
    - restaurant_name_column (str): The name of the column in the dataset that contains restaurant names.

    Returns:
    - List of unique restaurant names.
    """
    # Get a list of unique restaurant names
    unique_names = nyc_rsr[name].unique()

    return unique_names
    

In [43]:
#Names list
names_list = compile_unique_restaurant_names(nyc_rsr, 'name')

In [44]:
sorted_names = sorted(names_list)
for name in sorted_names:
    print(name)

Agern
Ai Fiori
Aldea
Aquavit
Aska
Atera
Atomix
Babbo
Bar Uchū
Blanca
Blue Hill
Bouley at Home
Bâtard
Café Boulud
Café China
Carbone
Casa Enríque
Casa Mono
Caviar Russe
Chef's Table at Brooklyn Fare
Claro
Contra
Cote
Daniel
Del Posto
Eleven Madison Park
Faro
Gabriel Kreuther
Gotham Bar and Grill
Gramercy Tavern
Hirohisa
Ichimura at Uchū
Jean-Georges
Jeju Noodle Bar
Jewel Bako
Jungsik
Junoon
Kajitsu
Kanoyama
Ko
Kosaka
Kyo Ya
L'Appart
L'Atelier de Joël Robuchon
Le Bernardin
Le Coucou
Le Grill de Joël Robuchon
Marea
Masa
Meadowsweet
Nix
NoMad
Noda
Okuda
Oxomoco
Per Se
Peter Luger
Satsuki
Sushi Amane
Sushi Ginza Onodera
Sushi Inoue
Sushi Nakazawa
Sushi Noz
Sushi Yasuda
Tempura Matsui
The Clocktower
The Finch
The Modern
The Musket Room
The River Café
Tuome
Uncle Boons
Wallsé
ZZ's Clam Bar


In [24]:
# Loading NYC Restaurant Data
rd =pd.read_csv('rd')

In [25]:
rd.head()

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,VIOLATION CODE,VIOLATION DESCRIPTION,CRITICAL FLAG,SCORE,GRADE,GRADE DATE,INSPECTION TYPE,Latitude,Longitude,Council District,Cleaned Restaurant Name
0,41408361,BEKY BAKERY AND COFFEE SHOP,Queens,3778,103 STREET,11368.0,Spanish,01/06/2020,No violations were recorded at the time of thi...,,,Not Applicable,,,,Administrative Miscellaneous / Initial Inspection,40.752738,-73.864188,21.0,BEKY BAKERY AND COFFEE SHOP
1,50124500,EAT OFF BEAT,Manhattan,75,9 AVENUE,10011.0,Other,09/21/2023,Establishment re-opened by DOHMH.,,,Not Applicable,0.0,Z,09/21/2023,Pre-permit (Operational) / Reopening Inspection,40.741869,-74.004713,3.0,EAT OFF BEAT
2,50142175,UVA NEXT DOOR,Manhattan,1484,2 AVENUE,10075.0,,01/01/1900,,,,Not Applicable,,,,,40.772284,-73.955805,5.0,UVA NEXT DOOR
3,50107467,CHANCHITOS CAFE,Brooklyn,176,ROCKAWAY AVENUE,11233.0,,01/01/1900,,,,Not Applicable,,,,,40.677091,-73.910865,41.0,CHANCHITOS CAFE
4,50104755,FLIK INTERNATIONAL CORP.,Manhattan,599,LEXINGTON AVENUE,10022.0,,01/01/1900,,,,Not Applicable,,,,,40.758268,-73.971054,4.0,FLIK INTERNATIONAL CORP.


In [26]:
rd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 208029 entries, 0 to 208028
Data columns (total 20 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   CAMIS                    208029 non-null  int64  
 1   DBA                      207562 non-null  object 
 2   BORO                     208029 non-null  object 
 3   BUILDING                 207653 non-null  object 
 4   STREET                   208023 non-null  object 
 5   ZIPCODE                  205315 non-null  float64
 6   CUISINE DESCRIPTION      205791 non-null  object 
 7   INSPECTION DATE          208029 non-null  object 
 8   ACTION                   205791 non-null  object 
 9   VIOLATION CODE           204640 non-null  object 
 10  VIOLATION DESCRIPTION    204640 non-null  object 
 11  CRITICAL FLAG            208029 non-null  object 
 12  SCORE                    198416 non-null  float64
 13  GRADE                    102267 non-null  object 
 14  GRAD

In [28]:
# Create a base map
m = folium.Map(location=[sr['latitude'].mean(), sr['longitude'].mean()], zoom_start=12)

# Add markers for each restaurant
for index, row in sr.iterrows():
    folium.Marker([row['latitude'], row['longitude']], popup=row['name']).add_to(m)

# Display the map
m