# Exploratory Michelin Star

In [1]:
#import the dependencies
import pandas as pd

In [2]:
#create a list of files --> will be used in the loop to import
file_names = ["../michelin/one-star-michelin-restaurants.csv",
              "../michelin/two-stars-michelin-restaurants.csv",
              "../michelin/three-stars-michelin-restaurants.csv"]

# "../michelin/two-stars-michelin-restaurants.csv"
# "../michelin/three-stars-michelin-restaurants.csv" 

In [3]:
#create an empty DF that will eventually hold the 3 files (perm-DF)
michelin_df = pd.DataFrame()
michelin_df

In [4]:
#import the data
#for loop to cycle through each file

for i in file_names:
    #import the data
    michelin_data = pd.read_csv(i)
    
    #clear the temp df
    temp_df = pd.DataFrame()
    
    #add file to DF
    temp_df = pd.DataFrame(michelin_data)
    
    #create a column to add the # of stars
    temp_df['stars'] = (file_names.index(i)+1)
        
    #append temp df to michelin_df
    michelin_df = michelin_df.append(temp_df, ignore_index=True)
# michelin_df.head()
michelin_df = michelin_df.drop(['url'], axis=1)

In [5]:
michelin_df['region'].unique().tolist()

['Austria',
 'California',
 'Chicago',
 'Croatia',
 'Czech Republic',
 'Denmark',
 'Finland',
 'Greece',
 'Hong Kong',
 'Hungary',
 'Macau',
 'Norway',
 'New York City',
 'Poland',
 'Ireland',
 'Rio de Janeiro',
 'Sao Paulo',
 'South Korea',
 'Singapore',
 'Sweden',
 'Taipei',
 'Thailand',
 'Washington DC',
 'United Kingdom']

In [6]:
#filter the dataset to only include The british Island countries
michelin_df = michelin_df.loc[(michelin_df['region'] == "United Kingdom")| 
                                    (michelin_df['region'] == "Ireland")]

In [7]:
michelin_df.head()
# michelin_df.dtypes

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,stars
261,Aniar,2019,53.27112,-9.057078,Gaillimh/Galway,Ireland,,Creative,,1
262,Loam,2019,53.27384,-9.044447,Gaillimh/Galway,Ireland,,Creative,,1
263,Wild Honey Inn,2019,53.03151,-9.2911,Lios Dúin Bhearna/Lisdoonvarna,Ireland,,Classic cuisine,,1
264,Chestnut,2019,51.562588,-9.460854,Ballydehob,Ireland,,Modern cuisine,,1
265,Mews,2019,51.48285,-9.37234,Baltimore,Ireland,,Modern cuisine,,1


In [8]:
#truncate latitude to n decimal places
lat_list = michelin_df['latitude'].tolist()

lat_list_trunc = []

for x in lat_list:
    n=2
    multiplier = 10**n
    value = int(x*(10**n))
    lat_list_trunc.append(value)

michelin_df['lat_trunc'] = lat_list_trunc

In [9]:
#truncate longitude to n decimal places
long_list = michelin_df['longitude'].tolist()

long_list_trunc = []

for x in long_list:
    n=2
    multiplier = 10**n
    value = int(x*(10**n))
    long_list_trunc.append(value)

michelin_df['long_trunc'] = long_list_trunc

In [10]:
#create a location key

michelin_df['lat_trunc']=michelin_df['lat_trunc'].astype(str)
michelin_df['long_trunc']=michelin_df['long_trunc'].astype(str)
michelin_df['location_key'] = michelin_df[['lat_trunc', 'long_trunc']].agg('_'.join, axis=1)



In [11]:
#remove string words
words_to_remove = ['Restaurant', 'restaurant', 'The', 'the', 'bar', 'Bar', 'Inn']

for word in words_to_remove:
    michelin_df['restaurant_name_clean'] = michelin_df['name'].str.replace(word,"")

In [12]:
michelin_df.tail(10)

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,stars,lat_trunc,long_trunc,location_key,restaurant_name_clean
654,Dinner by Heston Blumenthal,2019,51.50208,-0.16011,Hyde Park,United Kingdom,SW1X 7LA,Traditional British,,2,5150,-16,5150_-16,Dinner by Heston Blumenthal
655,Umu,2019,51.5113,-0.14455,Mayfair,United Kingdom,W1J 6LX,Japanese,,2,5151,-14,5151_-14,Umu
656,Sketch (The Lecture Room & Library),2019,51.51287,-0.14136,Mayfair,United Kingdom,W1S 2XG,Modern French,,2,5151,-14,5151_-14,Sketch (The Lecture Room & Library)
657,Greenhouse,2019,51.50769,-0.14926,Mayfair,United Kingdom,W1J 5NY,Creative,,2,5150,-14,5150_-14,Greenhouse
658,Claude Bosi at Bibendum,2019,51.49341,-0.169089,Chelsea,United Kingdom,SW3 6RD,French,,2,5149,-16,5149_-16,Claude Bosi at Bibendum
690,Fat Duck,2019,51.50828,-0.70232,Bray,United Kingdom,SL6 2AQ,Creative,,3,5150,-70,5150_-70,Fat Duck
691,Waterside Inn,2019,51.50773,-0.70121,Bray,United Kingdom,SL6 2AT,Classic French,,3,5150,-70,5150_-70,Waterside
692,Alain Ducasse at The Dorchester,2019,51.50712,-0.15252,Mayfair,United Kingdom,W1K 1QA,French,,3,5150,-15,5150_-15,Alain Ducasse at The Dorchester
693,The Araki,2019,51.511826,-0.140389,Mayfair,United Kingdom,W1S 3BF,Japanese,,3,5151,-14,5151_-14,The Araki
694,Gordon Ramsay,2019,51.48546,-0.16202,Chelsea,United Kingdom,SW3 4HP,French,,3,5148,-16,5148_-16,Gordon Ramsay


In [13]:
#export the data
michelin_df.to_csv('Resources/michelin_data.csv')