# **Restaurant Star Ratings**

## **Imports**

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly
%matplotlib inline
from wordcloud import WordCloud
import re
import nltk
from nltk.corpus import stopwords

#Sklearn preprocessing
from sklearn import preprocessing,set_config
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder,StandardScaler,LabelEncoder
#Scipy
from scipy import stats
from scipy.stats import norm

#Sklearn Models
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn import preprocessing, set_config
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer,make_column_selector,make_column_transformer
from sklearn.linear_model import LogisticRegression
import warnings
from sklearn.decomposition import PCA

# Geospatial map
import folium
import pandas as pd
import geopandas as gpd
from folium.plugins import MarkerCluster

# Warnings 
import warnings 
warnings.filterwarnings("ignore")

# Configuring diagrams
set_config(display = 'diagram')

## **Import Data**

In [2]:
SR1 = pd.read_csv('F:\Coding Dojo\Data\Resturant Start Ratings\one-star-michelin-restaurants.csv') 

In [3]:
SR2 = pd.read_csv('F:/Coding Dojo/Data/Resturant Start Ratings/two-stars-michelin-restaurants.csv')

In [4]:
SR3 = pd.read_csv('F:/Coding Dojo/Data/Resturant Start Ratings/three-stars-michelin-restaurants.csv')

In [5]:
SR1.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,Kilian Stuba,2019,47.34858,10.17114,Kleinwalsertal,Austria,87568,Creative,$$$$$,https://guide.michelin.com/at/en/vorarlberg/kl...
1,Pfefferschiff,2019,47.83787,13.07917,Hallwang,Austria,5300,Classic cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
2,Esszimmer,2019,47.80685,13.03409,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
3,Carpe Diem,2019,47.80001,13.04006,Salzburg,Austria,5020,Market cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
4,Edvard,2019,48.216503,16.36852,Wien,Austria,1010,Modern cuisine,$$$$,https://guide.michelin.com/at/en/vienna/wien/r...


In [6]:
SR2.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,SENNS.Restaurant,2019,47.83636,13.06389,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
1,Ikarus,2019,47.79536,13.00695,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
2,Mraz & Sohn,2019,48.23129,16.37637,Wien,Austria,1200,Creative,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...
3,Konstantin Filippou,2019,48.21056,16.37996,Wien,Austria,1010,Modern cuisine,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...
4,Silvio Nickol Gourmet Restaurant,2019,48.20558,16.37693,Wien,Austria,1010,Modern cuisine,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...


In [7]:
SR3.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,Amador,2019,48.25406,16.35915,Wien,Austria,1190,Creative,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...
1,Manresa,2019,37.22761,-121.98071,South San Francisco,California,95030,Contemporary,$$$$,https://guide.michelin.com/us/en/california/so...
2,Benu,2019,37.78521,-122.39876,San Francisco,California,94105,Asian,$$$$,https://guide.michelin.com/us/en/california/sa...
3,Quince,2019,37.79762,-122.40337,San Francisco,California,94133,Contemporary,$$$$,https://guide.michelin.com/us/en/california/sa...
4,Atelier Crenn,2019,37.79835,-122.43586,San Francisco,California,94123,Contemporary,$$$$,https://guide.michelin.com/us/en/california/sa...


In [8]:
# add a 'Star Rating' Column to each Dataframe 
SR1['Star Rating'] = 1
SR2['Star Rating'] = 2
SR3['Star Rating'] = 3

In [9]:
# Combine all dataframes 
sr = pd.concat([SR1, SR2, SR3], ignore_index = True)

In [10]:
sr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 695 entries, 0 to 694
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         695 non-null    object 
 1   year         695 non-null    int64  
 2   latitude     695 non-null    float64
 3   longitude    695 non-null    float64
 4   city         693 non-null    object 
 5   region       695 non-null    object 
 6   zipCode      501 non-null    object 
 7   cuisine      695 non-null    object 
 8   price        519 non-null    object 
 9   url          695 non-null    object 
 10  Star Rating  695 non-null    int64  
dtypes: float64(2), int64(2), object(7)
memory usage: 59.9+ KB


In [11]:
sr.describe()

Unnamed: 0,year,latitude,longitude,Star Rating
count,695.0,695.0,695.0,695.0
mean,2018.943885,37.260124,4.417,1.261871
std,0.23031,18.025875,81.095189,0.54527
min,2018.0,-23.634005,-123.719444,1.0
25%,2019.0,25.040718,-73.98292,1.0
50%,2019.0,40.72713,-0.14957,1.0
75%,2019.0,51.508962,100.578855,1.0
max,2019.0,63.43626,127.10257,3.0


In [12]:
sr.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url,Star Rating
0,Kilian Stuba,2019,47.34858,10.17114,Kleinwalsertal,Austria,87568,Creative,$$$$$,https://guide.michelin.com/at/en/vorarlberg/kl...,1
1,Pfefferschiff,2019,47.83787,13.07917,Hallwang,Austria,5300,Classic cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...,1
2,Esszimmer,2019,47.80685,13.03409,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...,1
3,Carpe Diem,2019,47.80001,13.04006,Salzburg,Austria,5020,Market cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...,1
4,Edvard,2019,48.216503,16.36852,Wien,Austria,1010,Modern cuisine,$$$$,https://guide.michelin.com/at/en/vienna/wien/r...,1


In [13]:
# Create a base map
m = folium.Map(location=[sr['latitude'].mean(), sr['longitude'].mean()], zoom_start=12)

# Add markers for each restaurant
for index, row in sr.iterrows():
    folium.Marker([row['latitude'], row['longitude']], popup=row['name']).add_to(m)

# Display the map
m

In [14]:
sr.isna().sum()

name             0
year             0
latitude         0
longitude        0
city             2
region           0
zipCode        194
cuisine          0
price          176
url              0
Star Rating      0
dtype: int64

In [23]:
def drop_columns(sr):
    # Check if 'zipcode' and 'price' columns exist in the dataframe
    if 'zipcode' in sr.columns and 'price' in sr.columns:
        # Drop the specified columns
        dcsr = sr.drop(['zipcode', 'price'], axis=1)
        print("Columns 'zipcode' and 'price' dropped successfully.")
    else:
        print("Columns 'zipcode' and 'price' not found.")
# Example usage:
# Assuming 'your_dataframe' is the name of your DataFrame
drop_columns(sr)        

Columns 'zipcode' and 'price' not found.


In [24]:
# Check to ensure columns are dropped 
sr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 695 entries, 0 to 694
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         695 non-null    object 
 1   year         695 non-null    int64  
 2   latitude     695 non-null    float64
 3   longitude    695 non-null    float64
 4   city         693 non-null    object 
 5   region       695 non-null    object 
 6   zipCode      501 non-null    object 
 7   cuisine      695 non-null    object 
 8   price        519 non-null    object 
 9   url          695 non-null    object 
 10  Star Rating  695 non-null    int64  
dtypes: float64(2), int64(2), object(7)
memory usage: 59.9+ KB


In [25]:
# Filter out rows with missing coordinates
sr = sr.dropna(subset=['latitude', 'longitude'])

# Load a GeoDataFrame with world countries borders
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Create a GeoDataFrame from the restaurant data
geo_data = gpd.GeoDataFrame(sr, geometry=gpd.points_from_xy(sr.longitude, sr.latitude))

# Merge the two GeoDataFrames
merged_data = world.merge(geo_data, how='left', left_on='name', right_on='name')

# Create a base map
m = folium.Map(location=[sr['latitude'].mean(), sr['longitude'].mean()], zoom_start=2)

# Add a choropleth layer
folium.Choropleth(
    geo_data=merged_data,
    name='choropleth',
    data=geo_data,
    columns=['name', 'Star Rating'],
    key_on='feature.properties.name',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Michelin Star Rating'
).add_to(m)

# Add markers for each restaurant
marker_cluster = MarkerCluster().add_to(m)
for idx, row in SR1.iterrows():
    folium.Marker([row['latitude'], row['longitude']], popup=row['name']).add_to(marker_cluster)

# Display the map
m

ValueError: Cannot render objects with any missing geometries:          pop_est      continent                      name iso_a3  gdp_md_est  \
0       889953.0        Oceania                      Fiji    FJI        5496   
1     58005463.0         Africa                  Tanzania    TZA       63177   
2       603253.0         Africa                 W. Sahara    ESH         907   
3     37589262.0  North America                    Canada    CAN     1736425   
4    328239523.0  North America  United States of America    USA    21433226   
..           ...            ...                       ...    ...         ...   
172    6944975.0         Europe                    Serbia    SRB       51475   
173     622137.0         Europe                Montenegro    MNE        5542   
174    1794248.0         Europe                    Kosovo    -99        7926   
175    1394973.0  North America       Trinidad and Tobago    TTO       24269   
176   11062113.0         Africa                  S. Sudan    SSD       11998   

                                            geometry_x  year  latitude  \
0    MULTIPOLYGON (((180.00000 -16.06713, 180.00000...   NaN       NaN   
1    POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...   NaN       NaN   
2    POLYGON ((-8.66559 27.65643, -8.66512 27.58948...   NaN       NaN   
3    MULTIPOLYGON (((-122.84000 49.00000, -122.9742...   NaN       NaN   
4    MULTIPOLYGON (((-122.84000 49.00000, -120.0000...   NaN       NaN   
..                                                 ...   ...       ...   
172  POLYGON ((18.82982 45.90887, 18.82984 45.90888...   NaN       NaN   
173  POLYGON ((20.07070 42.58863, 19.80161 42.50009...   NaN       NaN   
174  POLYGON ((20.59025 41.85541, 20.52295 42.21787...   NaN       NaN   
175  POLYGON ((-61.68000 10.76000, -61.10500 10.890...   NaN       NaN   
176  POLYGON ((30.83385 3.50917, 29.95350 4.17370, ...   NaN       NaN   

     longitude city region zipCode cuisine price  url  Star Rating geometry_y  
0          NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  
1          NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  
2          NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  
3          NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  
4          NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  
..         ...  ...    ...     ...     ...   ...  ...          ...        ...  
172        NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  
173        NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  
174        NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  
175        NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  
176        NaN  NaN    NaN     NaN     NaN   NaN  NaN          NaN       None  

[177 rows x 17 columns]

In [26]:
sr.isna().sum()

name             0
year             0
latitude         0
longitude        0
city             2
region           0
zipCode        194
cuisine          0
price          176
url              0
Star Rating      0
dtype: int64