In [1]:
# install psycopg2
!pip install psycopg2-binary



In [2]:
# import the dependencies
import pandas as pd
from sqlalchemy import create_engine

## Step 1: Extract

#### (1.a) Extract the First Data Source: 2021 Michelin Star Restaurant

In [3]:
# import a csv file (extract)
csv_file = "Resources/restaurants/michelin_my_maps.csv"
main_df = pd.read_csv(csv_file, dtype={"PhoneNumber": "str"})
main_df.head()

Unnamed: 0,Name,Address,Location,MinPrice,MaxPrice,Currency,Cuisine,Longitude,Latitude,PhoneNumber,Url,WebsiteUrl,Award
0,Aqua,"Parkstraße 1, Wolfsburg, 38440, Germany",Wolfsburg,225,225,EUR,"Creative, Modern Cuisine",10.789999,52.433172,495361606056,https://guide.michelin.com/en/niedersachsen/wo...,http://www.restaurant-aqua.com,3 MICHELIN Stars
1,The Table Kevin Fehling,"Shanghaiallee 15, Hamburg, 20457, Germany",Hamburg,230,230,EUR,Creative,10.00298,53.542623,494022867422,https://guide.michelin.com/en/hamburg-region/h...,http://www.the-table-hamburg.de/,3 MICHELIN Stars
2,Restaurant Überfahrt Christian Jürgens,"Überfahrtstraße 10, Rottach-Egern, 83700, Germany",Rottach-Egern,259,319,EUR,Creative,11.758229,47.696685,4980226690,https://guide.michelin.com/en/bayern/rottach-e...,http://www.althoffcollection.com,3 MICHELIN Stars
3,Victor's Fine Dining by christian bau,"Schlossstraße 27, Perl, 66706, Germany",Perl,205,295,EUR,Creative,6.387211,49.535173,49686679118,https://guide.michelin.com/en/saarland/perl/re...,https://www.victors-fine-dining.de/,3 MICHELIN Stars
4,Rutz,"Chausseestraße 8, Berlin, 10115, Germany",Berlin,198,245,EUR,"Modern Cuisine, Creative",13.386087,52.528351,493024628760,https://guide.michelin.com/en/berlin-region/be...,https://www.rutz-restaurant.de/,3 MICHELIN Stars


In [4]:
# clean up with relevant columns and return only the USA restaurants (transform)
all_resto = main_df[['Name', 'Address', 'Location','PhoneNumber','WebsiteUrl','Award','Currency']].copy()
usa_resto = all_resto.loc[(all_resto['Currency'] == 'USD')]
usa_resto.head()

Unnamed: 0,Name,Address,Location,PhoneNumber,WebsiteUrl,Award,Currency
77,The French Laundry,"6640 Washington St., Yountville, 94599, United...",Yountville,17079442380,https://www.thomaskeller.com/tfl,3 MICHELIN Stars,USD
78,SingleThread,"131 North St., Healdsburg, 95448, United States",Healdsburg,17077234646,https://www.singlethreadfarms.com/,3 MICHELIN Stars,USD
79,Manresa,"320 Village Ln., Los Gatos, 95030, United States",Los Gatos,14083544330,https://www.manresarestaurant.com/,3 MICHELIN Stars,USD
80,Atelier Crenn,"3127 Fillmore St., San Francisco, 94123, Unite...",San Francisco,14154400460,https://www.ateliercrenn.com/,3 MICHELIN Stars,USD
81,Benu,"22 Hawthorne St., San Francisco, 94105, United...",San Francisco,14156854860,https://www.benusf.com/,3 MICHELIN Stars,USD


#### (1.b) Extract the Second Data Source: Price Range

In [5]:
# import a csv file (extract) - one-star-michelin
one_csv_file = "Resources/price_range/one-star-michelin-restaurants.csv"
one_star = pd.read_csv(one_csv_file, dtype={"price": "str"})
one_star.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,Kilian Stuba,2019,47.34858,10.17114,Kleinwalsertal,Austria,87568,Creative,$$$$$,https://guide.michelin.com/at/en/vorarlberg/kl...
1,Pfefferschiff,2019,47.83787,13.07917,Hallwang,Austria,5300,Classic cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
2,Esszimmer,2019,47.80685,13.03409,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
3,Carpe Diem,2019,47.80001,13.04006,Salzburg,Austria,5020,Market cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
4,Edvard,2019,48.216503,16.36852,Wien,Austria,1010,Modern cuisine,$$$$,https://guide.michelin.com/at/en/vienna/wien/r...


In [6]:
# check the datatype of one-star-michelin
one_star.dtypes

name          object
year           int64
latitude     float64
longitude    float64
city          object
region        object
zipCode       object
cuisine       object
price         object
url           object
dtype: object

In [7]:
# import a csv file (extract) - two-star-michelin
two_csv_file = "Resources/price_range/two-stars-michelin-restaurants.csv"
two_star = pd.read_csv(two_csv_file, dtype={"price": "str"})
two_star

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,SENNS.Restaurant,2019,47.83636,13.063890,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
1,Ikarus,2019,47.79536,13.006950,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
2,Mraz & Sohn,2019,48.23129,16.376370,Wien,Austria,1200,Creative,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...
3,Konstantin Filippou,2019,48.21056,16.379960,Wien,Austria,1010,Modern cuisine,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...
4,Silvio Nickol Gourmet Restaurant,2019,48.20558,16.376930,Wien,Austria,1010,Modern cuisine,$$$$$,https://guide.michelin.com/at/en/vienna/wien/r...
...,...,...,...,...,...,...,...,...,...,...
105,Dinner by Heston Blumenthal,2019,51.50208,-0.160110,Hyde Park,United Kingdom,SW1X 7LA,Traditional British,,https://guide.michelin.com/gb/en/greater-londo...
106,Umu,2019,51.51130,-0.144550,Mayfair,United Kingdom,W1J 6LX,Japanese,,https://guide.michelin.com/gb/en/greater-londo...
107,Sketch (The Lecture Room & Library),2019,51.51287,-0.141360,Mayfair,United Kingdom,W1S 2XG,Modern French,,https://guide.michelin.com/gb/en/greater-londo...
108,Greenhouse,2019,51.50769,-0.149260,Mayfair,United Kingdom,W1J 5NY,Creative,,https://guide.michelin.com/gb/en/greater-londo...


In [8]:
# check the datatype of two-star-michelin
two_star.dtypes

name          object
year           int64
latitude     float64
longitude    float64
city          object
region        object
zipCode       object
cuisine       object
price         object
url           object
dtype: object

In [33]:
w = lambda x: (x.replace('$$$$$','TEST') if x=='$$$$$' else 'TEST@')


In [34]:
# import a csv file (extract) - three-star-michelin
three_csv_file = "Resources/price_range/three-stars-michelin-restaurants.csv"
three_star = pd.read_csv(three_csv_file, converters={"price":w})
three_star.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,Amador,2019,48.25406,16.35915,Wien,Austria,1190,Creative,TEST,https://guide.michelin.com/at/en/vienna/wien/r...
1,Manresa,2019,37.22761,-121.98071,South San Francisco,California,95030,Contemporary,TEST@,https://guide.michelin.com/us/en/california/so...
2,Benu,2019,37.78521,-122.39876,San Francisco,California,94105,Asian,TEST@,https://guide.michelin.com/us/en/california/sa...
3,Quince,2019,37.79762,-122.40337,San Francisco,California,94133,Contemporary,TEST@,https://guide.michelin.com/us/en/california/sa...
4,Atelier Crenn,2019,37.79835,-122.43586,San Francisco,California,94123,Contemporary,TEST@,https://guide.michelin.com/us/en/california/sa...


In [10]:
# check the datatype of three-star-michelin
three_star.dtypes

name          object
year           int64
latitude     float64
longitude    float64
city          object
region        object
zipCode       object
cuisine       object
price         object
url           object
dtype: object

In [11]:
# consolidate all three files (one-star, two-star and three-star)
appended_file = one_star.append([two_star, three_star])
appended_file.head()

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
0,Kilian Stuba,2019,47.34858,10.17114,Kleinwalsertal,Austria,87568,Creative,$$$$$,https://guide.michelin.com/at/en/vorarlberg/kl...
1,Pfefferschiff,2019,47.83787,13.07917,Hallwang,Austria,5300,Classic cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
2,Esszimmer,2019,47.80685,13.03409,Salzburg,Austria,5020,Creative,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
3,Carpe Diem,2019,47.80001,13.04006,Salzburg,Austria,5020,Market cuisine,$$$$$,https://guide.michelin.com/at/en/salzburg-regi...
4,Edvard,2019,48.216503,16.36852,Wien,Austria,1010,Modern cuisine,$$$$,https://guide.michelin.com/at/en/vienna/wien/r...


In [12]:
# clean up with relevant columns and return only the relevant fields (transform)
michelin_2021 = appended_file[['name', 'region', 'cuisine','price']].copy()
michelin_2021.head()

Unnamed: 0,name,region,cuisine,price
0,Kilian Stuba,Austria,Creative,$$$$$
1,Pfefferschiff,Austria,Classic cuisine,$$$$$
2,Esszimmer,Austria,Creative,$$$$$
3,Carpe Diem,Austria,Market cuisine,$$$$$
4,Edvard,Austria,Modern cuisine,$$$$


In [13]:
# clean up the casing of the key "name" (transform)
michelin_2021_renamed = michelin_2021.rename(columns={"name" :"Name",
                                                     "region":"Region",
                                                     "cuisine":"Cuisine",
                                                     "price":"Price Range"})
michelin_2021_renamed.head()

Unnamed: 0,Name,Region,Cuisine,Price Range
0,Kilian Stuba,Austria,Creative,$$$$$
1,Pfefferschiff,Austria,Classic cuisine,$$$$$
2,Esszimmer,Austria,Creative,$$$$$
3,Carpe Diem,Austria,Market cuisine,$$$$$
4,Edvard,Austria,Modern cuisine,$$$$


In [14]:
merge = pd.merge(usa_resto, michelin_2021_renamed, on = "Name", how = 'left')
merge = pd.merge(usa_resto, michelin_2021_renamed, on = "Name", how = 'left')

merge

Unnamed: 0,Name,Address,Location,PhoneNumber,WebsiteUrl,Award,Currency,Region,Cuisine,Price Range
0,The French Laundry,"6640 Washington St., Yountville, 94599, United...",Yountville,+17079442380,https://www.thomaskeller.com/tfl,3 MICHELIN Stars,USD,California,Contemporary,$$$$
1,SingleThread,"131 North St., Healdsburg, 95448, United States",Healdsburg,+17077234646,https://www.singlethreadfarms.com/,3 MICHELIN Stars,USD,California,Contemporary,$$$$
2,Manresa,"320 Village Ln., Los Gatos, 95030, United States",Los Gatos,+14083544330,https://www.manresarestaurant.com/,3 MICHELIN Stars,USD,California,Contemporary,$$$$
3,Atelier Crenn,"3127 Fillmore St., San Francisco, 94123, Unite...",San Francisco,+14154400460,https://www.ateliercrenn.com/,3 MICHELIN Stars,USD,California,Contemporary,$$$$
4,Benu,"22 Hawthorne St., San Francisco, 94105, United...",San Francisco,+14156854860,https://www.benusf.com/,3 MICHELIN Stars,USD,California,Asian,$$$$
...,...,...,...,...,...,...,...,...,...,...
566,Queen’s English,"3410 11th St. NW, Washington, 20010, United St...",Washington,,https://www.queensenglishdc.com/,Bib Gourmand,USD,,,
567,Residents Cafe & Bar,"1306 18th St. NW, Washington, 20036, United St...",Washington,+12026029563,https://www.residentsdc.com/,Bib Gourmand,USD,,,
568,Makan,"3400 11th St. NW, Washington, 20010, United St...",Washington,+12027302295,https://www.makanrestaurantdc.com/,Bib Gourmand,USD,,,
569,Ellē,"3221 Mt. Pleasant St. NW, Washington, 20010, U...",Washington,+12026520040,https://www.eatatelle.com/,Bib Gourmand,USD,,,


In [15]:
test = usa_resto.loc[(usa_resto['Name'] == "The French Laundry")]
test

Unnamed: 0,Name,Address,Location,PhoneNumber,WebsiteUrl,Award,Currency
77,The French Laundry,"6640 Washington St., Yountville, 94599, United...",Yountville,17079442380,https://www.thomaskeller.com/tfl,3 MICHELIN Stars,USD


In [16]:
test = michelin_2021_renamed.loc[(michelin_2021_renamed['Name'] == "The French Laundry")]
test

Unnamed: 0,Name,Region,Cuisine,Price Range
5,The French Laundry,California,Contemporary,$$$$


In [19]:
test = three_star.loc[(three_star['name'] == "The French Laundry")]
test

Unnamed: 0,name,year,latitude,longitude,city,region,zipCode,cuisine,price,url
5,The French Laundry,2019,38.40443,-122.36474,San Francisco,California,94599,Contemporary,$$$$,https://guide.michelin.com/us/en/california/sa...
