In [43]:
# Import - Basic Library

import pandas as pd
import numpy as np

pd.pandas.set_option('display.max_columns', None)
pd.pandas.set_option('display.max_rows', 20)

In [44]:
# Import - Web Scraping-Related Library

import time
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException

# STEP 1
## Scrape Website Address for ALL Restaurants in Jakarta from Zomato Search Page

In [4]:
# Set - Windows path where WebDriver is located -> to be used for Selenium

chromepath = r'C:\Users\Rizqi Eka Maulana\Downloads\chromedriver_win32\chromedriver.exe'

In [30]:
# Set - Webdriver and Output File/DataFrame
driver = webdriver.Chrome(chromepath)
out_file = open("jkt_restaurant_address.txt", "ab")
out_df = []

# Loop Through - Search Pages that we wanted
for x in range(1, 100):
    print('Opening Pages ' + str(x))
    driver.get('https://www.zomato.com/id/jakarta/restoran?page={}'.format(x))
    html_text = driver.page_source
    soup = BeautifulSoup(html_text, 'lxml')
    print('Accessing Webpage OK \n')

    # Find All Url Address and Write it to Output File/DataFrame
    for tag in soup.find_all("a", attrs={'data-result-type': 'ResCard_Name'}):
        out_file.write(tag['href'].encode('utf-8').strip() + b'\n')
        out_df.append(tag['href'])

# Close - The Driver and Output File/DataFrame
driver.close()
out_file.close()
out_df = pd.DataFrame(out_df, columns=['Website'])

Opening Pages 1
Accessing Webpage OK 

Opening Pages 2
Accessing Webpage OK 

Opening Pages 3
Accessing Webpage OK 

Opening Pages 4
Accessing Webpage OK 

Opening Pages 5
Accessing Webpage OK 

Opening Pages 6
Accessing Webpage OK 

Opening Pages 7
Accessing Webpage OK 

Opening Pages 8
Accessing Webpage OK 

Opening Pages 9
Accessing Webpage OK 

Opening Pages 10
Accessing Webpage OK 

Opening Pages 11
Accessing Webpage OK 

Opening Pages 12
Accessing Webpage OK 

Opening Pages 13
Accessing Webpage OK 

Opening Pages 14
Accessing Webpage OK 

Opening Pages 15
Accessing Webpage OK 

Opening Pages 16
Accessing Webpage OK 

Opening Pages 17
Accessing Webpage OK 

Opening Pages 18
Accessing Webpage OK 

Opening Pages 19
Accessing Webpage OK 

Opening Pages 20
Accessing Webpage OK 

Opening Pages 21
Accessing Webpage OK 

Opening Pages 22
Accessing Webpage OK 

Opening Pages 23
Accessing Webpage OK 

Opening Pages 24
Accessing Webpage OK 

Opening Pages 25
Accessing Webpage OK 

Opening P

In [39]:
# Show - The Result of Scraping
out_df.head()

Unnamed: 0,Website
0,https://www.zomato.com/id/jakarta/waki-japanes...
1,https://www.zomato.com/id/jakarta/acta-brasser...
2,https://www.zomato.com/id/jakarta/furusato-iza...
3,https://www.zomato.com/id/jakarta/animale-rest...
4,https://www.zomato.com/id/jakarta/wolfgangs-st...


In [37]:
# Observe - Whether we have duplicate websites or not
out_df[out_df.duplicated(['Website'], keep='last')]

Unnamed: 0,Website
355,https://www.zomato.com/id/jakarta/kembang-bawa...
846,https://www.zomato.com/id/jakarta/madam-lee-ko...
1289,https://www.zomato.com/id/jakarta/pilar-minang...
1319,https://www.zomato.com/id/jakarta/dapur-cokela...


In [40]:
# Make A New DataFrame - without duplicated values
out_df_nd = out_df[~out_df.duplicated(['Website'], keep='last')]

# STEP 2
## Scrape Restaurant Data for ALL Restaurants in Jakarta from Zomato Website TextFile / DataFrame

In [45]:
# Since we're going to loop ten thousands of Websites, let's make a function to simplify the initialization process

def OpenPage(url):

    # Generally same like before, but we add time.sleep to add pause when we open the url
    driver.get(url)
    time.sleep(8)
    html_text = driver.page_source
    global soup
    soup = BeautifulSoup(html_text, 'html.parser')
    print('Accessing Webpage OK')
    
    # If we need to check the HTML pages directly, we may need this code
    # html = soup.prettify('utf-8')
    # with open("output1.html", "wb") as file:
        # file.write(html)

    return


In [46]:
# Initialize - Empty Columns that we will use to store the scraping data results

rest_name = []
rest_type = []
cuisine_type = []
rest_area = []
rest_rating = []
review_counts = []
price_for_2 = []
rest_address = []
rest_info = []
rest_lat = []
rest_long = []


# Initialize - Webdriver
driver = webdriver.Chrome(chromepath)


# Scrape the data by looping through entries in Textfile / DataFrame
with open('jkt_restaurant_address.txt', 'r', encoding="utf-8") as f:
    for line in f:

        # Open Browser and Restaurant Web Page
        OpenPage(line)


        # 1. Scrape Restaurant Name
        name_anchor = soup.find("h1") 

        # 1.1 Conditional - If there is no H1 in the URL, then it's 404 Error
        if name_anchor is None:
            rest_name.append("404 error")
            rest_type.append("404 error")
            cuisine_type.append("404 error")
            rest_area.append("404 error")
            rest_rating.append("404 error")
            review_counts.append("404 error")
            price_for_2.append("404 error")
            rest_address.append("404 error")
            rest_info.append("404 error")
            rest_lat.append("404 error")
            rest_long.append("404 error")
        else:
            name = name_anchor.text.strip()
            
            # 1.2 Conditional - If we found 502 Bad Gateway, then we reopen the URL
            if name != "502 Bad Gateway":
                rest_name.append(name)

            else:
                driver.get(line)
                time.sleep(120)
                html_text = driver.page_source
                soup = BeautifulSoup(html_text, 'html.parser')
                name_anchor = soup.find("h1")
                name = name_anchor.text.strip()
                rest_name.append(name)
            

            print(f'Scraping Restaurant Name - {name} - OK')


            # 2. Scrape Restaurant Type
            try:
                rest_type_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[3]/section/section[1]/section[1]/div/a[1]""")

            except NoSuchElementException:
                driver.get(line)
                time.sleep(45)
                rest_type_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[3]/section/section[1]/section[1]/div/a[1]""")
            
            rest_type_text = rest_type_anchor.text
            rest_type.append(rest_type_text)
            print(f'Scraping Restaurant Type - {name} - {rest_type_text} - OK')

            # 3. Scrape Cuisine Type
            cuisine_types = []
            for index in list(range(2,11)):
                try:
                    cuisine_types_anchor = driver.find_element_by_xpath(f"""/html/body/div[1]/div[2]/main/div/section[3]/section/section[1]/section[1]/div/a[{index}]""")
                    cuisine_types.append(cuisine_types_anchor.text)
                except NoSuchElementException:
                    pass

            cuisine_type.append(cuisine_types)
            print(f'Scraping Cuisine Type - {name} - {rest_type_text} - OK')


            # 4. Scrape Restaurant Area
            rest_area_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[3]/section/section[1]/section[1]/a""")
            rest_area_text = rest_area_anchor.text
            rest_area.append(rest_area_text)
            print(f'Scraping Restaurant Area - {name} - {rest_area_text} - OK')


            # 5. Scrape Restaurant Rating
            try:
                rest_rating_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[3]/section/section[2]/section/div[1]/p""")
                rest_rating_text = rest_rating_anchor.text
            except NoSuchElementException:
                rest_rating_text = "Not Rated Yet"
                pass

            rest_rating.append(rest_rating_text)


            # 6. Scrape Restaurant Review
            try:
                review_counts_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[3]/section/section[2]/section/div[2]/p""")
                review_counts_text = review_counts_anchor.text
            except NoSuchElementException:
                review_counts_text = "Not Reviewed Yet"
                pass

            review_counts.append(review_counts_text)
            print(f'Scraping Restaurant Rating & Review Counts - {name} - {rest_rating_text} - {review_counts_text} - OK')


            # 7. Scrape Restaurant's Price for 2 Estimation
            try:
                price_for_2_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/p[1]""")
                price_for_2_text = price_for_2_anchor.text
            
            except NoSuchElementException:
                driver.get(line)
                time.sleep(60)

                try:
                    price_for_2_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/p[1]""")
                    price_for_2_text = price_for_2_anchor.text

                except NoSuchElementException:
                    price_for_2_text = "No Price Data Found"   
            

            if price_for_2_text[0:2] == 'Rp':
                price_for_2.append(price_for_2_text)
            else:
                try:
                    price_for_2_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/p[2]""")
                    price_for_2_text = price_for_2_anchor.text
                    price_for_2.append(price_for_2_text)
                except NoSuchElementException:
                    driver.get(line)
                    time.sleep(60)
                    price_for_2.append("Need to rerun browser")

            print(f'Scraping Restaurant Price for Two - {name} - OK')


            # 8. Scrape Restaurant Address
            rest_address_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/article/section/p""")
            rest_address.append(rest_address_anchor.text)
            print(f'Scraping Restaurant Address {name} - OK')


            # 9. Scrape Restaurant Additional Information
            additional_infos = []
            for index in list(range(1,30)):
                try:
                    additional_infos_anchor = driver.find_element_by_xpath(f"""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/div[3]/div[{index}]/p""")
                    additional_infos.append(additional_infos_anchor.text)
                except NoSuchElementException:
                    pass

            rest_info.append(additional_infos)
            print(f'Scraping Additional Info - {name} - OK')


            # 10. Scrape Restaurant Latitude and Longitude
            get_url = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/article/section/div[2]/a""").get_attribute("href")
            lati = get_url[-28:-15]
            lont = get_url[-14:-1]
            rest_lat.append(lati)
            rest_long.append(lont)
            print(f'Scraping Restaurant Latitude-Longitude - {name} - OK')


        print('-------------------------------------------------------------------------------------------------------------------------------------------')


    driver.close()

Accessing Webpage OK
Scraping Restaurant Name - WAKI Japanese BBQ Dining - OK
Scraping Restaurant Type - WAKI Japanese BBQ Dining - Restoran Kasual - OK
Scraping Cuisine Type - WAKI Japanese BBQ Dining - Restoran Kasual - OK
Scraping Restaurant Area - WAKI Japanese BBQ Dining - Thamrin - OK
Scraping Restaurant Rating & Review Counts - WAKI Japanese BBQ Dining - 4.9 - 2,971 Ulasan - OK
Scraping Restaurant Price for Two - WAKI Japanese BBQ Dining - OK
Scraping Restaurant Address WAKI Japanese BBQ Dining - OK
Scraping Additional Info - WAKI Japanese BBQ Dining - OK
Scraping Restaurant Latitude-Longitude - WAKI Japanese BBQ Dining - OK
-------------------------------------------------------------------------------------------------------------------------------------------
Accessing Webpage OK
Scraping Restaurant Name - Acta Brasserie - OK
Scraping Restaurant Type - Acta Brasserie - Restoran Kasual - OK
Scraping Cuisine Type - Acta Brasserie - Restoran Kasual - OK
Scraping Restaurant Area 

In [32]:
# Make A New DataFrame - by converting previous restaurant lists to tuples, then convert it to dataframe, containing all the data that we need (rdf - Restaurant DataFrame)

data_tuples = list(zip(rest_name[0:],rest_type[0:], cuisine_type[0:], rest_area[0:], rest_address[0:], rest_rating[0:], review_counts[0:], price_for_2[0:], rest_info[0:], rest_lat[0:], rest_long[0:]))
rdf = pd.DataFrame(data_tuples, columns=['Restaurant Name','Restaurant Type', 'Cuisine Type', 'Restaurant Area', 'Restaurant Address', 'Restaurant Rating', 'Review Counts', 'Price for 2', 'Additional Info', 'Latitude', 'Longitude'])

In [36]:
# If We Run the Data Scraping multiple times (separately), let's concatenate all the DataFrames to a huge, longer DataFrame

total_frame = [rdf, rdf2]
rdf_complete = pd.concat(total_frame, ignore_index=True)
rdf_complete

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address,Restaurant Rating,Review Counts,Price for 2,Additional Info,Latitude,Longitude
0,Acta Brasserie,Casual Dining,"['Barat', 'Asia', 'Grill House']",Senayan,"The MAJ Senayan, Senayan National Golf Club, J...",3.7,80 Ulasan,Rp350.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Bawa Pulang Tersedia', 'Alkohol Tersedia', '...",-6.2167980000,106.797294000
1,Furusato Izakaya,Casual Dining,['Jepang'],Sudirman,"Jl. Jend Sudirman No. 36, Sudirman, Jakarta 10210",3.9,55 Ulasan,Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Pesan Antar', 'Hanya Wine dan Bir', 'Area Me...",-6.2146764771,106.816816322
2,Medja Restaurant,Casual Dining,['Indonesia'],Bogor Timur,"Jl. Pajajaran Indah V No. 6, Bogor Timur, Bogor",4.0,259 Ulasan,Rp200.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",-6.6096150000,106.812364000
3,Animale Restaurant,Casual Dining,"['Amerika', 'Mediterania']",Setiabudi,"MD Place Bulding, Lantai 11, Jl. Setiabudi Sel...",4.3,113 Ulasan,Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Reserva...",-6.2086409900,106.828408353
4,Lawless Burgerbar,Casual Dining,['Burger'],Kemang,"Jl. Kemang Selatan VIII No. 67H-67I, Kemang, J...",4.7,"1,868 Ulasan",Rp150.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Reserva...",-6.2679110938,106.814406551
...,...,...,...,...,...,...,...,...,...,...,...
14511,Essence Coffee,Toko Minuman,"[Kopi, Snacks]",Pondok Bambu,"Jl. Bambu Ori Raya, Pondok Bambu, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp80.000 untuk 2 orang (perkiraan),"[Bawa Pulang Tersedia, Tempat duduk di luar, D...",-6.2376240000,106.907457000
14512,Senja Toast,Quick Bites,[Kue & Roti],Jagakarsa,"Jl. Raya Lenteng Agung, Jagakarsa, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp60.000 untuk 2 orang (perkiraan),"[Sarapan, Bawa Pulang Tersedia, Di dalam ruangan]",-6.3443890000,106.832833000
14513,Express Juice,Toko Minuman,[Jus],Bojongsari,"Jl. Raya Ciputat Parung no 60, Bojongsari, Depok",Not Rated Yet,Not Reviewed Yet,Rp50.000 untuk 2 orang (perkiraan),"[Bawa Pulang Tersedia, Di dalam ruangan]",-6.3611110000,106.747747000
14514,Ayam Selebriti,Quick Bites,[Indonesia],Meruya,"Jl. Meruya Selatan No.21, Meruya, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp50.000 untuk 2 orang (perkiraan),"[Bawa Pulang Tersedia, Area Merokok, Tempat du...",-6.2109790000,106.737538000


In [53]:
# Merge - Restaurant DataFrame with Website DataFrame

rdf_full = rdf.join(out_df_nd)
rdf_full

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address,Restaurant Rating,Review Counts,Price for 2,Additional Info,Latitude,Longitude,Website
0,Acta Brasserie,Casual Dining,"['Barat', 'Asia', 'Grill House']",Senayan,"The MAJ Senayan, Senayan National Golf Club, J...",3.7,80 Ulasan,Rp350.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Bawa Pulang Tersedia', 'Alkohol Tersedia', '...",-6.2167980000,106.797294000,https://www.zomato.com/id/jakarta/acta-brasser...
1,Furusato Izakaya,Casual Dining,['Jepang'],Sudirman,"Jl. Jend Sudirman No. 36, Sudirman, Jakarta 10210",3.9,55 Ulasan,Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Pesan Antar', 'Hanya Wine dan Bir', 'Area Me...",-6.2146764771,106.816816322,https://www.zomato.com/id/jakarta/furusato-iza...
2,Medja Restaurant,Casual Dining,['Indonesia'],Bogor Timur,"Jl. Pajajaran Indah V No. 6, Bogor Timur, Bogor",4.0,259 Ulasan,Rp200.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",-6.6096150000,106.812364000,https://www.zomato.com/id/jakarta/medja-restau...
3,Animale Restaurant,Casual Dining,"['Amerika', 'Mediterania']",Setiabudi,"MD Place Bulding, Lantai 11, Jl. Setiabudi Sel...",4.3,113 Ulasan,Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Reserva...",-6.2086409900,106.828408353,https://www.zomato.com/id/jakarta/animale-rest...
4,Lawless Burgerbar,Casual Dining,['Burger'],Kemang,"Jl. Kemang Selatan VIII No. 67H-67I, Kemang, J...",4.7,"1,868 Ulasan",Rp150.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Reserva...",-6.2679110938,106.814406551,https://www.zomato.com/id/jakarta/lawless-burg...
...,...,...,...,...,...,...,...,...,...,...,...,...
14511,Essence Coffee,Toko Minuman,"['Kopi', 'Snacks']",Pondok Bambu,"Jl. Bambu Ori Raya, Pondok Bambu, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp80.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",-6.2376240000,106.907457000,https://www.zomato.com/id/jakarta/essence-coff...
14512,Senja Toast,Quick Bites,['Kue & Roti'],Jagakarsa,"Jl. Raya Lenteng Agung, Jagakarsa, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp60.000 untuk 2 orang (perkiraan),"['Sarapan', 'Bawa Pulang Tersedia', 'Di dalam ...",-6.3443890000,106.832833000,https://www.zomato.com/id/jakarta/senja-toast-...
14513,Express Juice,Toko Minuman,['Jus'],Bojongsari,"Jl. Raya Ciputat Parung no 60, Bojongsari, Depok",Not Rated Yet,Not Reviewed Yet,Rp50.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.3611110000,106.747747000,https://www.zomato.com/id/jakarta/express-juic...
14514,Ayam Selebriti,Quick Bites,['Indonesia'],Meruya,"Jl. Meruya Selatan No.21, Meruya, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp50.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.2109790000,106.737538000,https://www.zomato.com/id/jakarta/ayam-selebri...


In [54]:
# If we haven't removed the duplicate from URL textfile/dataframe in previous step, we can do so here
# Data Cleaning - Remove the Duplicate Entries by Column [Restaurant Name, Latitude, Longitude]

rdf_fullnd = rdf_full[~rdf_full.duplicated(['Restaurant Name','Latitude','Longitude'], keep='last')]
rdf_fullnd.reset_index(drop=True)

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address,Restaurant Rating,Review Counts,Price for 2,Additional Info,Latitude,Longitude,Website
0,Holy Smokes,Casual Dining,"['Grill House', 'Steak']",Senopati,"Jl. Wolter Monginsidi No. 27, Senopati, Jakarta",4.4,"1,437 Ulasan",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Hanya Wine dan Bir', 'Di dalam ruangan', 'Wi...",-6.2391432684,106.807954162,https://www.zomato.com/id/jakarta/holy-smokes-...
1,Pempek Megaria,Quick Bites,['Palembang'],Cikini,"Metropole 21, Jl. Pegangsaan Timur No. 21, Cik...",4.3,461 Ulasan,Rp70.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.1999000000,106.843739000,https://www.zomato.com/id/jakarta/pempek-megar...
2,Tjikinii Lima,Casual Dining,"['Barat', 'Indonesia']",Cikini,"Jl. Cikini I No. 5, Cikini, Jakarta 10330",4.0,391 Ulasan,Rp300.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.1898019937,106.837150007,https://www.zomato.com/id/jakarta/tjikinii-lim...
3,Gado-Gado Bon-Bin,Quick Bites,['Jawa'],Cikini,"Jl. Cikini 4 No. 5, Cikini, Jakarta",4.0,177 Ulasan,Rp90.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.1929590000,106.838377000,https://www.zomato.com/id/jakarta/gado-gado-bo...
4,Koffie Fictie,Kafe,['Kopi'],Bekasi Selatan,"Grand Galaxy Park, Ruko RSOD No. 026, Jl. Lotu...",Not Rated Yet,TIdak cukup Ulasan,Rp100.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.2720590000,106.970932000,https://www.zomato.com/id/jakarta/koffie-ficti...
...,...,...,...,...,...,...,...,...,...,...,...,...
11263,Essence Coffee,Toko Minuman,"['Kopi', 'Snacks']",Pondok Bambu,"Jl. Bambu Ori Raya, Pondok Bambu, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp80.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",-6.2376240000,106.907457000,https://www.zomato.com/id/jakarta/essence-coff...
11264,Senja Toast,Quick Bites,['Kue & Roti'],Jagakarsa,"Jl. Raya Lenteng Agung, Jagakarsa, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp60.000 untuk 2 orang (perkiraan),"['Sarapan', 'Bawa Pulang Tersedia', 'Di dalam ...",-6.3443890000,106.832833000,https://www.zomato.com/id/jakarta/senja-toast-...
11265,Express Juice,Toko Minuman,['Jus'],Bojongsari,"Jl. Raya Ciputat Parung no 60, Bojongsari, Depok",Not Rated Yet,Not Reviewed Yet,Rp50.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.3611110000,106.747747000,https://www.zomato.com/id/jakarta/express-juic...
11266,Ayam Selebriti,Quick Bites,['Indonesia'],Meruya,"Jl. Meruya Selatan No.21, Meruya, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp50.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.2109790000,106.737538000,https://www.zomato.com/id/jakarta/ayam-selebri...


In [55]:
# Data Cleaning - Remove Entries with 404 error

rdf_fullnd = rdf_fullnd[~(rdf_fullnd['Restaurant Name'] == '404 error')].reset_index(drop=True)

In [56]:
# Observe - Missing Values 

for var in rdf_fullnd.columns:
    print(var, '- Percentage of Missing Values : ', rdf_fullnd[var].isnull().mean().round(3))

Restaurant Name - Percentage of Missing Values :  0.0
Restaurant Type - Percentage of Missing Values :  0.0
Cuisine Type - Percentage of Missing Values :  0.0
Restaurant Area - Percentage of Missing Values :  0.0
Restaurant Address - Percentage of Missing Values :  0.007
Restaurant Rating - Percentage of Missing Values :  0.0
Review Counts - Percentage of Missing Values :  0.0
Price for 2 - Percentage of Missing Values :  0.007
Additional Info - Percentage of Missing Values :  0.0
Latitude - Percentage of Missing Values :  0.0
Longitude - Percentage of Missing Values :  0.0
Website - Percentage of Missing Values :  0.0


In [57]:
# Observe - Sum of Missing Value in Restaurant Address

rdf_fullnd['Restaurant Address'].isna().sum()

83

In [58]:
# Observe - Sum of Missing Value in Price for 2

rdf_fullnd['Price for 2'].isna().sum()

83

In [5]:
# Store - the location of Missing Values in Restaurant Address to a new variable

condition_1 = rdf_fullnd['Restaurant Address'].isna()

In [6]:
# Observe - for Price for 2, do we get the wrong data? Let's check that by slice the first 2 letter in this feature

price_for_2_check = rdf_fullnd['Price for 2'].str.slice(0,2)
price_for_2_check

0        Rp
1        Rp
2        Rp
3        Rp
4        Rp
         ..
11262    Rp
11263    Rp
11264    Rp
11265    Rp
11266    Rp
Name: Price for 2, Length: 11267, dtype: object

In [7]:
# Observe - how many entries of 'Price for 2' are not showing price ('Rp') and showing no price found ('No')

sum(~((price_for_2_check == 'Rp') | (price_for_2_check == 'No')))

201

In [8]:
# Store - the location of previous entries to a new variable

condition_2 = ~((price_for_2_check == 'Rp') | (price_for_2_check == 'No'))

In [66]:
# Observe - Sum of Empty Additional Info

sum(rdf_fullnd['Additional Info'].str[0:2] == "[]")

204

In [9]:
# Store - the location of empty additional info to a new variable

condition_3 = (rdf_fullnd['Additional Info'].str[0:2] == "[]")

In [17]:
# Observe

rdf_fullnd[condition_1 | condition_2]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address,Restaurant Rating,Review Counts,Price for 2,Additional Info,Latitude,Longitude,Website
968,Tenterem Cafe,Quick Bites,['Indonesia'],Taman Mini,"Taman Mini Indonesia Indah, Jl. Taman Prasasti...",Not Rated Yet,TIdak cukup Ulasan,"Cafe, photography studio and salon nearby Tama...","['Bawa Pulang Tersedia', 'Di dalam ruangan', '...",-6.3001420000,106.890422000,https://www.zomato.com/id/jakarta/tenterem-caf...
1201,Indah Seafood 94,Food Court,['Seafood'],Kelapa Gading,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.1701543248,106.924605108,https://www.zomato.com/id/jakarta/indah-seafoo...
1219,Mama-Do,Food Court,['Manado'],Pulo Gadung,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.1830380000,106.891640000,https://www.zomato.com/id/jakarta/mama-do-1-pu...
1240,DÉJÀ VU Resto - Delua Hotel,Casual Dining,"['Indonesia', 'Barat']",Mangga Besar,,Not Rated Yet,TIdak cukup Ulasan,,"['', '', '']",-6.1488740000,106.825881000,https://www.zomato.com/id/jakarta/déjà-vu-rest...
1556,Tahu Gejrot & Rujak Buah,Food Court,"['Makanan Jalanan', 'Indonesia']",Kelapa Gading,,Not Rated Yet,TIdak cukup Ulasan,,"['', '', '']",-6.1464234204,106.891523748,https://www.zomato.com/id/jakarta/tahu-gejrot-...
...,...,...,...,...,...,...,...,...,...,...,...,...
11116,Nasi Goreng Jaya,Quick Bites,['Chinese'],Pulo Gadung,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.1894180071,106.877879016,https://www.zomato.com/id/jakarta/nasi-goreng-...
11124,BarBarRibs,Casual Dining,['Steak'],Kelapa Gading,,3.7,41 Ulasan,,"['', '', '', '', '']",-6.1494060000,106.903117000,https://www.zomato.com/id/jakarta/barbarribs-k...
11128,K-Kitchen,Food Court,['Korea'],Senayan,,3.8,265 Ulasan,,"['', '']",-6.2269760327,106.796928606,https://www.zomato.com/id/jakarta/k-kitchen-se...
11183,Nock Nock Cafe & Resto,Quick Bites,['Indonesia'],Kelapa Gading,,2.3,13 Ulasan,,"['', '', '']",-6.1517729750,106.891749724,https://www.zomato.com/id/jakarta/nock-nock-ca...


In [314]:
# Observe

rdf_fullnd[condition_2 | condition_3]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address,Restaurant Rating,Review Counts,Price for 2,Additional Info,Latitude,Longitude,Website
22,Siomay Batagor Satya Tenggiri,Kaki Lima,['Indonesia'],Sunter,"Jl. Agung Utara 24 No. 23, Sunter, Jakarta",Not Rated Yet,TIdak cukup Ulasan,Rp50.000 untuk 2 orang (perkiraan),[],-6.1387440000,106.861207000,https://www.zomato.com/id/jakarta/siomay-batag...
305,Sop & Bakmi Paton,Food Court,['Bakmi'],Bogor Barat,"Food Court Pulen Kitchens, Jl. Raya Semplak No...",Not Rated Yet,TIdak cukup Ulasan,Rp50.000 untuk 2 orang (perkiraan),[],-6.5477290000,106.760553000,https://www.zomato.com/id/jakarta/sop-bakmi-pa...
452,Mee_Oowfè,Quick Bites,"['Indonesia', 'Kopi']",Bogor Utara,"Ruko 2C, Jl. Achmad Sobana No. 28, Bogor Utara...",Not Rated Yet,TIdak cukup Ulasan,No Price Data Found,[],-6.5820490000,106.814839000,https://www.zomato.com/id/jakarta/mee-oowfè-2-...
453,Turn On Coffee & Eatery,Quick Bites,"['Indonesia', 'Barat', 'Kopi']",Kebon Jeruk,"Harvia Suites Building, Jl. Raya Perjuangan No...",Not Rated Yet,TIdak cukup Ulasan,No Price Data Found,[],-6.1972550000,106.764359000,https://www.zomato.com/id/jakarta/turn-on-coff...
454,One More Shot Coffee House,Toko Minuman,['Kopi'],Pantai Indah Kapuk,"Fresh Market, Jl. Pantai Indah Kapuk Boulevard...",Not Rated Yet,Not Reviewed Yet,No Price Data Found,[],-6.1028360000,106.741529000,https://www.zomato.com/id/jakarta/one-more-sho...
...,...,...,...,...,...,...,...,...,...,...,...,...
11200,Tiga Wonton,Casual Dining,['Chinese'],Sudirman,"fX Sudirman, Lantai F2, Jl. Jenderal Sudirman,...",Not Rated Yet,Not Reviewed Yet,Rp120.000 untuk 2 orang (perkiraan),[],-6.2246310000,106.803995000,https://www.zomato.com/id/jakarta/tiga-wonton-...
11201,Tiga Wonton,Quick Bites,['Chinese'],Gatot Subroto,"Wisma Mulia City Plaza, Lantai Lower Ground, J...",Not Rated Yet,Not Reviewed Yet,Rp120.000 untuk 2 orang (perkiraan),[],-6.2360723229,106.823362782,https://www.zomato.com/id/jakarta/tiga-wonton-...
11202,Tiga Wonton,Quick Bites,['Chinese'],Serpong Utara,"Living World, Lantai Ground, Jl. Alam Sutera B...",Not Rated Yet,Not Reviewed Yet,Rp120.000 untuk 2 orang (perkiraan),[],-6.2418638889,106.657019444,https://www.zomato.com/id/jakarta/tiga-wonton-...
11204,Pagi Sore,Casual Dining,['Padang'],Kemang,"Jl. Kemang Raya, Kemang, Jakarta",Not Rated Yet,Not Reviewed Yet,Rp150.000 untuk 2 orang (perkiraan),[],-6.2611180000,106.815419000,https://www.zomato.com/id/jakarta/pagi-sore-ke...


In [323]:
# Store - Current DataFrame to  CSV

rdf_fullnd.to_csv('Zomato Restaurants DataFrame - No Duplicate.csv', index=False)
rdf_fullnd.drop(['Website'], axis=1).to_csv('Zomato Restaurants DataFrame - No Website & Duplicate.csv', index=False)

In [78]:
# Store - Current DataFrame that fulfill Condition 1 & 2 (to do re-scrape later) to  CSV

rdf_fullnd[condition_1 | condition_2].drop(['Website'], axis=1).to_csv("Zomato Restaurants DataFrame - Remainder Condition 1 & 2.csv", index=False)
rdf_fullnd[condition_1 | condition_2].to_csv("Zomato Restaurants DataFrame - Remainder Condition 1 & 2 w Website.csv", index=False)

# STEP 3
## Scrape Data for Restaurants that have Delivery Service

In [49]:
# Initialize Web Driver, Text File (as storage), and List Object (as storage)
driver = webdriver.Chrome(chromepath)
out_file = open("jkt_restaurant_with_delivery_address.txt", "ab")
delivery_web = []

# Iterate through pages 1-224
for i in range(1, 225):
    #Write the notification to reader/programmer
    print('Accesing Search Result Page ', str(i), ' is succesful' '\n')
    #Accessing Search Pages through looping
    driver.get('https://www.zomato.com/id/jakarta/restoran-pesan-antar?page={}'.format(i))
    #Giving time to let browser load succesfully
    time.sleep(1)
    #Find all elements by class name and store in anchor variable
    delivery_web_anchor = driver.find_elements_by_class_name("result-title")
    
    #Iterate through anchor variable, then extract the website string, then add it to List Object & Text File
    for web in delivery_web_anchor:
        delivery_web.append(web.get_attribute('href'))
        out_file.write(web.get_attribute('href').encode('utf-8') + b'\n')

out_file.close()
driver.close()

Accesing Search Result Page  1  is succesful

Accesing Search Result Page  2  is succesful

Accesing Search Result Page  3  is succesful

Accesing Search Result Page  4  is succesful

Accesing Search Result Page  5  is succesful

Accesing Search Result Page  6  is succesful

Accesing Search Result Page  7  is succesful

Accesing Search Result Page  8  is succesful

Accesing Search Result Page  9  is succesful

Accesing Search Result Page  10  is succesful

Accesing Search Result Page  11  is succesful

Accesing Search Result Page  12  is succesful

Accesing Search Result Page  13  is succesful

Accesing Search Result Page  14  is succesful

Accesing Search Result Page  15  is succesful

Accesing Search Result Page  16  is succesful

Accesing Search Result Page  17  is succesful

Accesing Search Result Page  18  is succesful

Accesing Search Result Page  19  is succesful

Accesing Search Result Page  20  is succesful

Accesing Search Result Page  21  is succesful

Accesing Search Result

In [54]:
#Make A New DataFrame - by converting previous list

rdf_wdlv = pd.DataFrame(delivery_web, columns=['Website'])
rdf_wdlv

Unnamed: 0,Website
0,https://www.zomato.com/id/jakarta/gioi-senopati
1,https://www.zomato.com/id/jakarta/holywings-be...
2,https://www.zomato.com/id/jakarta/the-garden-p...
3,https://www.zomato.com/id/jakarta/glass-house-...
4,https://www.zomato.com/id/jakarta/rucis-joint-...
...,...
3343,https://www.zomato.com/id/jakarta/malabar-moun...
3344,https://www.zomato.com/id/jakarta/martabak-pec...
3345,https://www.zomato.com/id/jakarta/bruno-cafe-i...
3346,https://www.zomato.com/id/jakarta/hotaru-deli-...


In [57]:
#Add - 'Delivery Service' Column to indicate these are the list of restaurants that have delivey service. This will be useful later on.

rdf_wdlv['Delivery Service'] = 'Yes'
rdf_wdlv

Unnamed: 0,Website,Delivery Service
0,https://www.zomato.com/id/jakarta/gioi-senopati,Yes
1,https://www.zomato.com/id/jakarta/holywings-be...,Yes
2,https://www.zomato.com/id/jakarta/the-garden-p...,Yes
3,https://www.zomato.com/id/jakarta/glass-house-...,Yes
4,https://www.zomato.com/id/jakarta/rucis-joint-...,Yes
...,...,...
3343,https://www.zomato.com/id/jakarta/malabar-moun...,Yes
3344,https://www.zomato.com/id/jakarta/martabak-pec...,Yes
3345,https://www.zomato.com/id/jakarta/bruno-cafe-i...,Yes
3346,https://www.zomato.com/id/jakarta/hotaru-deli-...,Yes


In [71]:
#Data Cleaning - Remove the Duplicate Entries in DataFrame

rdf_wdlv = rdf_wdlv[~rdf_wdlv.duplicated()]
rdf_wdlv.to_csv("Restaurants - Delivery Service.csv", index=False)

# STEP 4
## Re-Scrape Data for Remaining Restaurants that has incomplete Data
## First, Restaurants that fulfill Condition 1 & 2

In [362]:
#Initialize Rescrape Variable, depends on what condition we want to rescrape
rescrape = rdf_fullnd[condition_1 | condition_2]['Website']
rescrape = rescrape.reset_index(drop=True)
rescrape

0    https://www.zomato.com/id/jakarta/fresh-tau-fu...
Name: Website, dtype: object

In [365]:
#Scrape Needed Data

rest_name = []
price_for_2 = []
rest_address = []
rest_url = []
rest_info = []
    
driver = webdriver.Chrome(chromepath)

for url in rescrape[:]:

    #OPEN BROWSER & RESTAURANT WEB PAGE
    driver.get(url)
    time.sleep(12)
    html_text = driver.page_source
    soup = BeautifulSoup(html_text, 'html.parser')
    print('Accessing Webpage OK')


    name_anchor = soup.find("h1") 
    name = name_anchor.text.strip()
    rest_name.append(name)

    if name == '502 Bad Gateway':
        driver.get(url)
        time.sleep(20)
        html_text = driver.page_source
        soup = BeautifulSoup(html_text, 'html.parser')
        print('Accessing Webpage OK')
        name_anchor = soup.find("h1") 
        name = name_anchor.text.strip()
        rest_name.append(name)
    else:
        pass
    
    print(f'Scraping Restaurant Name - {name} - OK')

    # # Scrape Restaurant's Price for Two
    # try:
    #     price_for_2_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/p[1]""")
    #     price_for_2_text = price_for_2_anchor.text
            
    # except NoSuchElementException:
    #     driver.get(url)
    #     time.sleep(40)

    #     try:
    #         price_for_2_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/p[1]""")
    #         price_for_2_text = price_for_2_anchor.text

    #     except NoSuchElementException:
    #         price_for_2_text = "No Price Data Found"   
            

    # if (price_for_2_text[0:2] == 'Rp') or (price_for_2_text[0:2] == 'No'):
    #     price_for_2.append(price_for_2_text)
    # else:
    #     try:
    #         price_for_2_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/p[2]""")
    #         price_for_2_text = price_for_2_anchor.text
    #         price_for_2.append(price_for_2_text)
    #     except NoSuchElementException:
    #         driver.get(url)
    #         time.sleep(40)
    #         price_for_2_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/p[2]""")
    #         price_for_2_text = price_for_2_anchor.text
    #         price_for_2.append(price_for_2_text)

    # print(f'Scraping Restaurant Price for Two - {name} - {price_for_2_text} - OK')


    #Scrape Restaurant Address
    rest_address_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/article/section/p""")
    rest_address.append(rest_address_anchor.text)
    print(f'Scraping Restaurant Address - {name} - {rest_address_anchor.text} - OK')

    #Scrape Restaurant's Additional Information
    additional_infos = []
    try:
        for index in list(range(1,30)):
            additional_infos_anchor = driver.find_element_by_xpath(f"""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/div[3]/div[{index}]/p""")
            additional_info = additional_infos_anchor.text
            additional_infos.append(additional_info)

    except NoSuchElementException:
        driver.get(url)
        time.sleep(15)

        try:
            for index in list(range(1,30)):
                additional_infos_anchor = driver.find_element_by_xpath(f"""/html/body/div[1]/div[2]/main/div/section[4]/section/section/article[1]/section[2]/div[3]/div[{index}]/p""")
                additional_info = additional_infos_anchor.text
                additional_infos.append(additional_info)
        except NoSuchElementException:
            additional_info = "No Additional Info"
            additional_infos.append(additional_info)
    
    rest_info.append(additional_infos)
    print(f'Scraping Additional Info - {name} - OK')
    print(additional_infos)

    #Write Restaurant Website
    rest_url.append(url)
    print(f'Writing Restaurant Website - {url} - OK')

    print('-------------------------------------------------------------------------------------------------------------------------------------------')
    
driver.close()

Accessing Webpage OK
Scraping Restaurant Name - Fresh Tau Fu - OK
Scraping Restaurant Address - Fresh Tau Fu - fX Sudirman, Lantai F5, Eat & Eat, Jl. Jenderal Sudirman, Sudirman, Jakarta - OK
Scraping Additional Info - Fresh Tau Fu - OK
['Bawa Pulang Tersedia', 'Di dalam ruangan']
Writing Restaurant Website - https://www.zomato.com/id/jakarta/fresh-tau-fu-sudirman
 - OK
-------------------------------------------------------------------------------------------------------------------------------------------


## In General, all the codes below are to re-scrapes all the data that is missing from previous DataFrame

In [366]:
# data_tuples = list(zip(rest_name[0:], rest_address[0:], price_for_2[0:], rest_url[0:]))
# rdfscrape = pd.DataFrame(data_tuples, columns=['Restaurant Name', 'Restaurant Address', 'Price for 2', 'Website'])
# rdfscrape

data_tuples = list(zip(rest_name[0:], rest_address[0:], rest_info[0:], rest_url[0:]))
rdfscrape = pd.DataFrame(data_tuples, columns=['Restaurant Name', 'Restaurant Address', 'Additional Info', 'Website'])
rdfscrape

Unnamed: 0,Restaurant Name,Restaurant Address,Additional Info,Website
0,Fresh Tau Fu,"fX Sudirman, Lantai F5, Eat & Eat, Jl. Jendera...","[Bawa Pulang Tersedia, Di dalam ruangan]",https://www.zomato.com/id/jakarta/fresh-tau-fu...


In [265]:
rdfscrape[rdfscrape['Restaurant Address'] == '']

Unnamed: 0,Restaurant Name,Restaurant Address,Price for 2,Website


In [266]:
rdfscrape[rdfscrape['Price for 2'] != '']

Unnamed: 0,Restaurant Name,Restaurant Address,Price for 2,Website
0,DÉJÀ VU Resto - Delua Hotel,"Delua Hotel, Jl. Mangga Besar Raya No. 82, Man...",Rp200.000 untuk 2 orang (perkiraan),https://www.zomato.com/id/jakarta/déjà-vu-rest...


In [267]:
rdfs_full2 = pd.merge(left=rdfs_full2, right=rdfscrape, how='left' ,left_on=['Restaurant Name','Website'], right_on=['Restaurant Name', 'Website'], suffixes=('_a','_b'))
rdfs_full2

Unnamed: 0,Restaurant Name,Additional Info,Website,Restaurant Address_a,Price for 2_a,Restaurant Address_b,Price for 2_b
0,Tenterem Cafe,[No Additional Info],https://www.zomato.com/id/jakarta/tenterem-caf...,"Taman Mini Indonesia Indah, Jl. Taman Prasasti...",Rp70.000 untuk 2 orang (perkiraan),,
1,Indah Seafood 94,[No Additional Info],https://www.zomato.com/id/jakarta/indah-seafoo...,"Gading Festival, Jl. Pegangsaan, Blok S No. 17...",Rp150.000 untuk 2 orang (perkiraan),,
2,Mama-Do,"[Bawa Pulang Tersedia, Di dalam ruangan, Area ...",https://www.zomato.com/id/jakarta/mama-do-1-pu...,"Taman Kuliner Kayuputih, Jl. Kayu Putih Tengah...",Rp70.000 untuk 2 orang (perkiraan),,
3,DÉJÀ VU Resto - Delua Hotel,"[Bawa Pulang Tersedia, Hanya Bir, Tempat duduk...",https://www.zomato.com/id/jakarta/déjà-vu-rest...,0,0,"Delua Hotel, Jl. Mangga Besar Raya No. 82, Man...",Rp200.000 untuk 2 orang (perkiraan)
4,Tahu Gejrot & Rujak Buah,"[Bawa Pulang Tersedia, Tempat duduk di luar, A...",https://www.zomato.com/id/jakarta/tahu-gejrot-...,"Mall Artha Gading, Lantai 2, West Food Court,J...",Rp50.000 untuk 2 orang (perkiraan),,
...,...,...,...,...,...,...,...
196,Nasi Goreng Jaya,"[Hanya Bir, Wifi Tersedia, Reservasi meja dire...",https://www.zomato.com/id/jakarta/nasi-goreng-...,"Jl. H. Ten Raya No. 27, Pulogadung, Jakarta",Rp80.000 untuk 2 orang (perkiraan),,
197,BarBarRibs,"[Pesan Antar, Bawa Pulang Tersedia, Alkohol Te...",https://www.zomato.com/id/jakarta/barbarribs-k...,"Mahaka Square, Lantai Ground, Jl. Kelapa Nias ...",Rp250.000 untuk 2 orang (perkiraan)Tanpa alkohol,,
198,K-Kitchen,"[, , , , , , , Sarapan, Pesan Antar, Bawa Pula...",https://www.zomato.com/id/jakarta/k-kitchen-se...,"Senayan City, Lantai 5, Delicaé, Jl. Asia Afri...",Rp150.000 untuk 2 orang (perkiraan),,
199,Nock Nock Cafe & Resto,"[Bawa Pulang Tersedia, Alkohol Tersedia, Di da...",https://www.zomato.com/id/jakarta/nock-nock-ca...,"Mall of Indonesia, Rukan Resort City Home, Blo...",Rp120.000 untuk 2 orang (perkiraan),,


In [268]:
rdfs_full2.replace((np.nan,''), 0, inplace=True)

In [231]:
def res_address(row):
    if row['Restaurant Address_a'] == 0:
        return row['Restaurant Address_b']
    else:
        return row['Restaurant Address_a']

def res_price(row):
    if row['Price for 2_a'] == 0:
        return row['Price for 2_b']
    else:
        return row['Price for 2_a']


In [269]:
rdfs_full2['Restaurant Address'] = rdfs_full2.apply(lambda row: res_address(row), axis=1)
rdfs_full2['Price for 2'] = rdfs_full2.apply(lambda row: res_price(row), axis=1)

In [270]:
rdfs_full2.drop(['Restaurant Address_a', 'Restaurant Address_b', 'Price for 2_a', 'Price for 2_b'], axis=1,inplace=True)

In [271]:
rdfs_full2

Unnamed: 0,Restaurant Name,Additional Info,Website,Restaurant Address,Price for 2
0,Tenterem Cafe,[No Additional Info],https://www.zomato.com/id/jakarta/tenterem-caf...,"Taman Mini Indonesia Indah, Jl. Taman Prasasti...",Rp70.000 untuk 2 orang (perkiraan)
1,Indah Seafood 94,[No Additional Info],https://www.zomato.com/id/jakarta/indah-seafoo...,"Gading Festival, Jl. Pegangsaan, Blok S No. 17...",Rp150.000 untuk 2 orang (perkiraan)
2,Mama-Do,"[Bawa Pulang Tersedia, Di dalam ruangan, Area ...",https://www.zomato.com/id/jakarta/mama-do-1-pu...,"Taman Kuliner Kayuputih, Jl. Kayu Putih Tengah...",Rp70.000 untuk 2 orang (perkiraan)
3,DÉJÀ VU Resto - Delua Hotel,"[Bawa Pulang Tersedia, Hanya Bir, Tempat duduk...",https://www.zomato.com/id/jakarta/déjà-vu-rest...,"Delua Hotel, Jl. Mangga Besar Raya No. 82, Man...",Rp200.000 untuk 2 orang (perkiraan)
4,Tahu Gejrot & Rujak Buah,"[Bawa Pulang Tersedia, Tempat duduk di luar, A...",https://www.zomato.com/id/jakarta/tahu-gejrot-...,"Mall Artha Gading, Lantai 2, West Food Court,J...",Rp50.000 untuk 2 orang (perkiraan)
...,...,...,...,...,...
196,Nasi Goreng Jaya,"[Hanya Bir, Wifi Tersedia, Reservasi meja dire...",https://www.zomato.com/id/jakarta/nasi-goreng-...,"Jl. H. Ten Raya No. 27, Pulogadung, Jakarta",Rp80.000 untuk 2 orang (perkiraan)
197,BarBarRibs,"[Pesan Antar, Bawa Pulang Tersedia, Alkohol Te...",https://www.zomato.com/id/jakarta/barbarribs-k...,"Mahaka Square, Lantai Ground, Jl. Kelapa Nias ...",Rp250.000 untuk 2 orang (perkiraan)Tanpa alkohol
198,K-Kitchen,"[, , , , , , , Sarapan, Pesan Antar, Bawa Pula...",https://www.zomato.com/id/jakarta/k-kitchen-se...,"Senayan City, Lantai 5, Delicaé, Jl. Asia Afri...",Rp150.000 untuk 2 orang (perkiraan)
199,Nock Nock Cafe & Resto,"[Bawa Pulang Tersedia, Alkohol Tersedia, Di da...",https://www.zomato.com/id/jakarta/nock-nock-ca...,"Mall of Indonesia, Rukan Resort City Home, Blo...",Rp120.000 untuk 2 orang (perkiraan)


In [306]:
rdfs_full2[rdfs_full2['Restaurant Address'] == 0]

Unnamed: 0,Restaurant Name,Additional Info,Website,Restaurant Address,Price for 2


In [312]:
rdfs_full2.to_csv('Restaurant DataFrame Incomplete Scrape.csv', index=False)
rdfs_full2.drop(['Website'], axis=1).to_csv('Restaurant DataFrame Incomplete Scrape without Web.csv', index=False)

# STEP 4 - Ch. 2

In [100]:
rdfscrape3.to_csv('Additional Info Missing.csv', index=False)
rdfscrape3.drop(['Website'], axis=1).to_csv('Additional Info Missing without Web.csv', index=False)

In [117]:
rdfscrape3

Unnamed: 0,Restaurant Name,Additional Info,Website,Restaurant Address
0,Siomay Batagor Satya Tenggiri,['No Additional Info'],https://www.zomato.com/id/jakarta/siomay-batag...,
1,Sop & Bakmi Paton,['No Additional Info'],https://www.zomato.com/id/jakarta/sop-bakmi-pa...,
2,Mee_Oowfè,"['', '', '', 'Bawa Pulang Tersedia', 'Di dalam...",https://www.zomato.com/id/jakarta/mee-oowfè-2-...,
3,Turn On Coffee & Eatery,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",https://www.zomato.com/id/jakarta/turn-on-coff...,
4,One More Shot Coffee House,"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",https://www.zomato.com/id/jakarta/one-more-sho...,
...,...,...,...,...
339,Hanna Bakes,[No Additional Info],https://www.zomato.com/id/jakarta/hanna-bakes-...,"Jl. Pegangsaan Indah Bar. No.7, RT.2/RW.16, Pe..."
340,Kedai Kopi GG,"[Bawa Pulang Tersedia, Di dalam ruangan, Wifi ...",https://www.zomato.com/id/jakarta/kedai-kopi-g...,"Foresta Business Loft 5, Jl. BSD Boulevard Uta..."
341,Paladin Coffee + Kitchen,"[, , , , , , , , Sarapan, Bawa Pulang Tersedia...",https://www.zomato.com/id/jakarta/paladin-coff...,
342,Hokkaido Izakaya,"[Pesan Antar, Bawa Pulang Tersedia, Hanya Wine...",https://www.zomato.com/id/jakarta/hokkaido-iza...,"Pavilion Retail Arcade, Lantai Dasar, Jl. K.H...."


In [5]:
rdfscrape2 = pd.read_csv('Additional Info Missing.csv')
rdfscrape2

Unnamed: 0,Restaurant Name,Additional Info,Website
0,Siomay Batagor Satya Tenggiri,['No Additional Info'],https://www.zomato.com/id/jakarta/siomay-batag...
1,Sop & Bakmi Paton,['No Additional Info'],https://www.zomato.com/id/jakarta/sop-bakmi-pa...
2,Mee_Oowfè,"['', '', '', 'Bawa Pulang Tersedia', 'Di dalam...",https://www.zomato.com/id/jakarta/mee-oowfè-2-...
3,Turn On Coffee & Eatery,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",https://www.zomato.com/id/jakarta/turn-on-coff...
4,One More Shot Coffee House,"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",https://www.zomato.com/id/jakarta/one-more-sho...
...,...,...,...
339,Tiga Wonton,['No Additional Info'],https://www.zomato.com/id/jakarta/tiga-wonton-...
340,Tiga Wonton,['No Additional Info'],https://www.zomato.com/id/jakarta/tiga-wonton-...
341,Tiga Wonton,['No Additional Info'],https://www.zomato.com/id/jakarta/tiga-wonton-...
342,Pagi Sore,['No Additional Info'],https://www.zomato.com/id/jakarta/pagi-sore-ke...


In [13]:
rdfscrape2.iloc[[54,95,98,111,123,124,126,131,132,133,134,139,146,147,149,158,163,184,190,191,223,225,226,230,231,242,243,272,298,329,342,343]]

Unnamed: 0,Restaurant Name,Additional Info,Website
54,LilSpago Cafe & Game House,['No Additional Info'],https://www.zomato.com/id/jakarta/lilspago-caf...
95,Spumante,['No Additional Info'],https://www.zomato.com/id/jakarta/spumante-men...
98,Blue Terrace - AYANA Midplaza,['No Additional Info'],https://www.zomato.com/id/jakarta/blue-terrace...
111,Hideaway - JW Marriott Hotel,['No Additional Info'],https://www.zomato.com/id/jakarta/hideaway-jw-...
123,Kenjiro,['No Additional Info'],https://www.zomato.com/id/jakarta/kenjiro-seno...
...,...,...,...
272,Koun Eatery,['No Additional Info'],https://www.zomato.com/id/jakarta/koun-eatery-...
298,Bakul Nusantara,['No Additional Info'],https://www.zomato.com/id/jakarta/bakul-nusant...
329,Bebek Mercon,['No Additional Info'],https://www.zomato.com/id/jakarta/bebek-mercon...
342,Pagi Sore,['No Additional Info'],https://www.zomato.com/id/jakarta/pagi-sore-ke...


In [15]:
rdfscrape3 = rdfscrape2.iloc[[54,95,98,111,123,124,126,131,132,133,134,139,146,147,149,158,163,184,190,191,223,225,226,230,231,242,243,272,298,329,342,343]]

In [27]:
rescrape = rdfscrape3['Website']
rescrape

54     https://www.zomato.com/id/jakarta/lilspago-caf...
95     https://www.zomato.com/id/jakarta/spumante-men...
98     https://www.zomato.com/id/jakarta/blue-terrace...
111    https://www.zomato.com/id/jakarta/hideaway-jw-...
123    https://www.zomato.com/id/jakarta/kenjiro-seno...
                             ...                        
272    https://www.zomato.com/id/jakarta/koun-eatery-...
298    https://www.zomato.com/id/jakarta/bakul-nusant...
329    https://www.zomato.com/id/jakarta/bebek-mercon...
342    https://www.zomato.com/id/jakarta/pagi-sore-ke...
343    https://www.zomato.com/id/jakarta/hanna-bakes-...
Name: Website, Length: 32, dtype: object

In [43]:
rdfscrape2

Unnamed: 0,Restaurant Name,Additional Info,Website
0,Siomay Batagor Satya Tenggiri,['No Additional Info'],https://www.zomato.com/id/jakarta/siomay-batag...
1,Sop & Bakmi Paton,['No Additional Info'],https://www.zomato.com/id/jakarta/sop-bakmi-pa...
2,Mee_Oowfè,"['', '', '', 'Bawa Pulang Tersedia', 'Di dalam...",https://www.zomato.com/id/jakarta/mee-oowfè-2-...
3,Turn On Coffee & Eatery,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",https://www.zomato.com/id/jakarta/turn-on-coff...
4,One More Shot Coffee House,"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",https://www.zomato.com/id/jakarta/one-more-sho...
...,...,...,...
337,Tiga Wonton,['No Additional Info'],https://www.zomato.com/id/jakarta/tiga-wonton-...
338,Tiga Wonton,['No Additional Info'],https://www.zomato.com/id/jakarta/tiga-wonton-...
339,Tiga Wonton,['No Additional Info'],https://www.zomato.com/id/jakarta/tiga-wonton-...
340,Tiga Wonton,['No Additional Info'],https://www.zomato.com/id/jakarta/tiga-wonton-...


In [96]:
rdfscrape3 = pd.concat((rdfscrape2, rdfscrape)).reset_index()

In [71]:
rdfscrape2[rdfscrape2['Restaurant Name'] == 'Kedai Kopi GG']

Unnamed: 0,Restaurant Name,Additional Info,Website,Restaurant Address
329,Kedai Kopi GG,[No Additional Info],https://www.zomato.com/id/jakarta/kedai-kopi-g...,"Foresta Business Loft 5, Jl. BSD Boulevard Uta..."
344,Kedai Kopi GG,"[Bawa Pulang Tersedia, Di dalam ruangan, Wifi ...",https://www.zomato.com/id/jakarta/kedai-kopi-g...,"Foresta Business Loft 5, Jl. BSD Boulevard Uta..."


In [81]:
rescrape = rdfscrape2.loc[[93,119,205]]['Website']

In [93]:
rdfscrape2.drop(rdfscrape2.index[[93,119,205]], axis=0,inplace=True)

In [95]:
rdfscrape2.reset_index()

Unnamed: 0,index,Restaurant Name,Additional Info,Website,Restaurant Address
0,0,Siomay Batagor Satya Tenggiri,['No Additional Info'],https://www.zomato.com/id/jakarta/siomay-batag...,
1,1,Sop & Bakmi Paton,['No Additional Info'],https://www.zomato.com/id/jakarta/sop-bakmi-pa...,
2,2,Mee_Oowfè,"['', '', '', 'Bawa Pulang Tersedia', 'Di dalam...",https://www.zomato.com/id/jakarta/mee-oowfè-2-...,
3,3,Turn On Coffee & Eatery,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",https://www.zomato.com/id/jakarta/turn-on-coff...,
4,4,One More Shot Coffee House,"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",https://www.zomato.com/id/jakarta/one-more-sho...,
...,...,...,...,...,...
336,340,Bakul Nusantara,"[Pesan Antar, Bawa Pulang Tersedia, Di dalam r...",https://www.zomato.com/id/jakarta/bakul-nusant...,"Ruko Citra Garden 6, Jl. Citra Garden 6 Blok H..."
337,341,Bebek Mercon,[No Additional Info],https://www.zomato.com/id/jakarta/bebek-mercon...,"Jl. Raya KSU, Sukmajaya, Depok"
338,342,Pagi Sore,[No Additional Info],https://www.zomato.com/id/jakarta/pagi-sore-ke...,"Jl. Kemang Raya, Kemang, Jakarta"
339,343,Hanna Bakes,[No Additional Info],https://www.zomato.com/id/jakarta/hanna-bakes-...,"Jl. Pegangsaan Indah Bar. No.7, RT.2/RW.16, Pe..."


In [99]:
rdfscrape3.drop(['index'], axis=1, inplace=True)

In [115]:
rdfscrape2

Unnamed: 0,Restaurant Name,Additional Info,Website,Restaurant Address
0,Siomay Batagor Satya Tenggiri,['No Additional Info'],https://www.zomato.com/id/jakarta/siomay-batag...,
1,Sop & Bakmi Paton,['No Additional Info'],https://www.zomato.com/id/jakarta/sop-bakmi-pa...,
2,Mee_Oowfè,"['', '', '', 'Bawa Pulang Tersedia', 'Di dalam...",https://www.zomato.com/id/jakarta/mee-oowfè-2-...,
3,Turn On Coffee & Eatery,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",https://www.zomato.com/id/jakarta/turn-on-coff...,
4,One More Shot Coffee House,"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",https://www.zomato.com/id/jakarta/one-more-sho...,
...,...,...,...,...
340,Bakul Nusantara,"[Pesan Antar, Bawa Pulang Tersedia, Di dalam r...",https://www.zomato.com/id/jakarta/bakul-nusant...,"Ruko Citra Garden 6, Jl. Citra Garden 6 Blok H..."
341,Bebek Mercon,[No Additional Info],https://www.zomato.com/id/jakarta/bebek-mercon...,"Jl. Raya KSU, Sukmajaya, Depok"
342,Pagi Sore,[No Additional Info],https://www.zomato.com/id/jakarta/pagi-sore-ke...,"Jl. Kemang Raya, Kemang, Jakarta"
343,Hanna Bakes,[No Additional Info],https://www.zomato.com/id/jakarta/hanna-bakes-...,"Jl. Pegangsaan Indah Bar. No.7, RT.2/RW.16, Pe..."


# STEP 4 - Ch. 3
## Combine the whole data

In [None]:
rdf_fullnd = pd.read_csv('Zomato Restaurants DataFrame - No Duplicate.csv')
rdfscrape3 = pd.read_csv('Additional Info Missing.csv')
rdfs_full2 = pd.read_csv('Restaurant DataFrame Incomplete Scrape.csv')

In [222]:
rdf_fullnd.head()

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address,Restaurant Rating,Review Counts,Price for 2,Additional Info,Latitude,Longitude,Website
0,Holy Smokes,Casual Dining,"['Grill House', 'Steak']",Senopati,"Jl. Wolter Monginsidi No. 27, Senopati, Jakarta",4.4,"1,437 Ulasan",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Hanya Wine dan Bir', 'Di dalam ruangan', 'Wi...",-6.2391432684,106.807954162,https://www.zomato.com/id/jakarta/holy-smokes-...
1,Pempek Megaria,Quick Bites,['Palembang'],Cikini,"Metropole 21, Jl. Pegangsaan Timur No. 21, Cik...",4.3,461 Ulasan,Rp70.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.1999,106.843739,https://www.zomato.com/id/jakarta/pempek-megar...
2,Tjikinii Lima,Casual Dining,"['Barat', 'Indonesia']",Cikini,"Jl. Cikini I No. 5, Cikini, Jakarta 10330",4.0,391 Ulasan,Rp300.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.1898019937,106.837150007,https://www.zomato.com/id/jakarta/tjikinii-lim...
3,Gado-Gado Bon-Bin,Quick Bites,['Jawa'],Cikini,"Jl. Cikini 4 No. 5, Cikini, Jakarta",4.0,177 Ulasan,Rp90.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.192959,106.838377,https://www.zomato.com/id/jakarta/gado-gado-bo...
4,Koffie Fictie,Kafe,['Kopi'],Bekasi Selatan,"Grand Galaxy Park, Ruko RSOD No. 026, Jl. Lotu...",Not Rated Yet,TIdak cukup Ulasan,Rp100.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.272059,106.970932,https://www.zomato.com/id/jakarta/koffie-ficti...


In [121]:
rdfscrape3

Unnamed: 0,Restaurant Name,Additional Info,Website,Restaurant Address
0,Siomay Batagor Satya Tenggiri,['No Additional Info'],https://www.zomato.com/id/jakarta/siomay-batag...,
1,Sop & Bakmi Paton,['No Additional Info'],https://www.zomato.com/id/jakarta/sop-bakmi-pa...,
2,Mee_Oowfè,"['', '', '', 'Bawa Pulang Tersedia', 'Di dalam...",https://www.zomato.com/id/jakarta/mee-oowfè-2-...,
3,Turn On Coffee & Eatery,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",https://www.zomato.com/id/jakarta/turn-on-coff...,
4,One More Shot Coffee House,"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",https://www.zomato.com/id/jakarta/one-more-sho...,
...,...,...,...,...
339,Hanna Bakes,[No Additional Info],https://www.zomato.com/id/jakarta/hanna-bakes-...,"Jl. Pegangsaan Indah Bar. No.7, RT.2/RW.16, Pe..."
340,Kedai Kopi GG,"[Bawa Pulang Tersedia, Di dalam ruangan, Wifi ...",https://www.zomato.com/id/jakarta/kedai-kopi-g...,"Foresta Business Loft 5, Jl. BSD Boulevard Uta..."
341,Paladin Coffee + Kitchen,"[, , , , , , , , Sarapan, Bawa Pulang Tersedia...",https://www.zomato.com/id/jakarta/paladin-coff...,
342,Hokkaido Izakaya,"[Pesan Antar, Bawa Pulang Tersedia, Hanya Wine...",https://www.zomato.com/id/jakarta/hokkaido-iza...,"Pavilion Retail Arcade, Lantai Dasar, Jl. K.H...."


In [122]:
rdfs_full2

Unnamed: 0,Restaurant Name,Additional Info,Website,Restaurant Address,Price for 2
0,Tenterem Cafe,['No Additional Info'],https://www.zomato.com/id/jakarta/tenterem-caf...,"Taman Mini Indonesia Indah, Jl. Taman Prasasti...",Rp70.000 untuk 2 orang (perkiraan)
1,Indah Seafood 94,['No Additional Info'],https://www.zomato.com/id/jakarta/indah-seafoo...,"Gading Festival, Jl. Pegangsaan, Blok S No. 17...",Rp150.000 untuk 2 orang (perkiraan)
2,Mama-Do,"['Bawa Pulang Tersedia', 'Di dalam ruangan', '...",https://www.zomato.com/id/jakarta/mama-do-1-pu...,"Taman Kuliner Kayuputih, Jl. Kayu Putih Tengah...",Rp70.000 untuk 2 orang (perkiraan)
3,DÉJÀ VU Resto - Delua Hotel,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",https://www.zomato.com/id/jakarta/déjà-vu-rest...,"Delua Hotel, Jl. Mangga Besar Raya No. 82, Man...",Rp200.000 untuk 2 orang (perkiraan)
4,Tahu Gejrot & Rujak Buah,"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",https://www.zomato.com/id/jakarta/tahu-gejrot-...,"Mall Artha Gading, Lantai 2, West Food Court,J...",Rp50.000 untuk 2 orang (perkiraan)
...,...,...,...,...,...
196,Nasi Goreng Jaya,"['Hanya Bir', 'Wifi Tersedia', 'Reservasi meja...",https://www.zomato.com/id/jakarta/nasi-goreng-...,"Jl. H. Ten Raya No. 27, Pulogadung, Jakarta",Rp80.000 untuk 2 orang (perkiraan)
197,BarBarRibs,"['Pesan Antar', 'Bawa Pulang Tersedia', 'Alkoh...",https://www.zomato.com/id/jakarta/barbarribs-k...,"Mahaka Square, Lantai Ground, Jl. Kelapa Nias ...",Rp250.000 untuk 2 orang (perkiraan)Tanpa alkohol
198,K-Kitchen,"['', '', '', '', '', '', '', 'Sarapan', 'Pesan...",https://www.zomato.com/id/jakarta/k-kitchen-se...,"Senayan City, Lantai 5, Delicaé, Jl. Asia Afri...",Rp150.000 untuk 2 orang (perkiraan)
199,Nock Nock Cafe & Resto,"['Bawa Pulang Tersedia', 'Alkohol Tersedia', '...",https://www.zomato.com/id/jakarta/nock-nock-ca...,"Mall of Indonesia, Rukan Resort City Home, Blo...",Rp120.000 untuk 2 orang (perkiraan)


In [123]:
condition_1 = rdf_fullnd['Restaurant Address'].isna()

In [124]:
rdf_fullnd[condition_1]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address,Restaurant Rating,Review Counts,Price for 2,Additional Info,Latitude,Longitude,Website
1201,Indah Seafood 94,Food Court,['Seafood'],Kelapa Gading,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.1701543248,106.924605108,https://www.zomato.com/id/jakarta/indah-seafoo...
1219,Mama-Do,Food Court,['Manado'],Pulo Gadung,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.1830380000,106.891640000,https://www.zomato.com/id/jakarta/mama-do-1-pu...
1240,DÉJÀ VU Resto - Delua Hotel,Casual Dining,"['Indonesia', 'Barat']",Mangga Besar,,Not Rated Yet,TIdak cukup Ulasan,,"['', '', '']",-6.1488740000,106.825881000,https://www.zomato.com/id/jakarta/déjà-vu-rest...
1556,Tahu Gejrot & Rujak Buah,Food Court,"['Makanan Jalanan', 'Indonesia']",Kelapa Gading,,Not Rated Yet,TIdak cukup Ulasan,,"['', '', '']",-6.1464234204,106.891523748,https://www.zomato.com/id/jakarta/tahu-gejrot-...
1599,Republik Nasi Lemak Khas Medan,Food Court,['Indonesia'],SCBD,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.2267587224,106.811620071,https://www.zomato.com/id/jakarta/republik-nas...
...,...,...,...,...,...,...,...,...,...,...,...,...
11109,Tasty Ala Untar,Quick Bites,['Chinese'],Serpong Utara,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.2381040000,106.629841000,https://www.zomato.com/id/jakarta/tasty-ala-un...
11116,Nasi Goreng Jaya,Quick Bites,['Chinese'],Pulo Gadung,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.1894180071,106.877879016,https://www.zomato.com/id/jakarta/nasi-goreng-...
11124,BarBarRibs,Casual Dining,['Steak'],Kelapa Gading,,3.7,41 Ulasan,,"['', '', '', '', '']",-6.1494060000,106.903117000,https://www.zomato.com/id/jakarta/barbarribs-k...
11128,K-Kitchen,Food Court,['Korea'],Senayan,,3.8,265 Ulasan,,"['', '']",-6.2269760327,106.796928606,https://www.zomato.com/id/jakarta/k-kitchen-se...


In [125]:
price_for_2_check = rdf_fullnd['Price for 2'].str.slice(0,2)
price_for_2_check

0        Rp
1        Rp
2        Rp
3        Rp
4        Rp
         ..
11262    Rp
11263    Rp
11264    Rp
11265    Rp
11266    Rp
Name: Price for 2, Length: 11267, dtype: object

In [126]:
sum(~((price_for_2_check == 'Rp') | (price_for_2_check == 'No')))

201

In [127]:
condition_2 = ~((price_for_2_check == 'Rp') | (price_for_2_check == 'No'))

In [140]:
rdf_fullnd[condition_2]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address,Restaurant Rating,Review Counts,Price for 2,Additional Info,Latitude,Longitude,Website
968,Tenterem Cafe,Quick Bites,['Indonesia'],Taman Mini,"Taman Mini Indonesia Indah, Jl. Taman Prasasti...",Not Rated Yet,TIdak cukup Ulasan,"Cafe, photography studio and salon nearby Tama...","['Bawa Pulang Tersedia', 'Di dalam ruangan', '...",-6.3001420000,106.890422000,https://www.zomato.com/id/jakarta/tenterem-caf...
1201,Indah Seafood 94,Food Court,['Seafood'],Kelapa Gading,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.1701543248,106.924605108,https://www.zomato.com/id/jakarta/indah-seafoo...
1219,Mama-Do,Food Court,['Manado'],Pulo Gadung,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.1830380000,106.891640000,https://www.zomato.com/id/jakarta/mama-do-1-pu...
1240,DÉJÀ VU Resto - Delua Hotel,Casual Dining,"['Indonesia', 'Barat']",Mangga Besar,,Not Rated Yet,TIdak cukup Ulasan,,"['', '', '']",-6.1488740000,106.825881000,https://www.zomato.com/id/jakarta/déjà-vu-rest...
1556,Tahu Gejrot & Rujak Buah,Food Court,"['Makanan Jalanan', 'Indonesia']",Kelapa Gading,,Not Rated Yet,TIdak cukup Ulasan,,"['', '', '']",-6.1464234204,106.891523748,https://www.zomato.com/id/jakarta/tahu-gejrot-...
...,...,...,...,...,...,...,...,...,...,...,...,...
11116,Nasi Goreng Jaya,Quick Bites,['Chinese'],Pulo Gadung,,Not Rated Yet,TIdak cukup Ulasan,,"['', '']",-6.1894180071,106.877879016,https://www.zomato.com/id/jakarta/nasi-goreng-...
11124,BarBarRibs,Casual Dining,['Steak'],Kelapa Gading,,3.7,41 Ulasan,,"['', '', '', '', '']",-6.1494060000,106.903117000,https://www.zomato.com/id/jakarta/barbarribs-k...
11128,K-Kitchen,Food Court,['Korea'],Senayan,,3.8,265 Ulasan,,"['', '']",-6.2269760327,106.796928606,https://www.zomato.com/id/jakarta/k-kitchen-se...
11183,Nock Nock Cafe & Resto,Quick Bites,['Indonesia'],Kelapa Gading,,2.3,13 Ulasan,,"['', '', '']",-6.1517729750,106.891749724,https://www.zomato.com/id/jakarta/nock-nock-ca...


In [142]:
rdfs_full2.drop(['Additional Info'], axis=1, inplace=True)

In [143]:
rdfs_full2

Unnamed: 0,Restaurant Name,Website,Restaurant Address,Price for 2
0,Tenterem Cafe,https://www.zomato.com/id/jakarta/tenterem-caf...,"Taman Mini Indonesia Indah, Jl. Taman Prasasti...",Rp70.000 untuk 2 orang (perkiraan)
1,Indah Seafood 94,https://www.zomato.com/id/jakarta/indah-seafoo...,"Gading Festival, Jl. Pegangsaan, Blok S No. 17...",Rp150.000 untuk 2 orang (perkiraan)
2,Mama-Do,https://www.zomato.com/id/jakarta/mama-do-1-pu...,"Taman Kuliner Kayuputih, Jl. Kayu Putih Tengah...",Rp70.000 untuk 2 orang (perkiraan)
3,DÉJÀ VU Resto - Delua Hotel,https://www.zomato.com/id/jakarta/déjà-vu-rest...,"Delua Hotel, Jl. Mangga Besar Raya No. 82, Man...",Rp200.000 untuk 2 orang (perkiraan)
4,Tahu Gejrot & Rujak Buah,https://www.zomato.com/id/jakarta/tahu-gejrot-...,"Mall Artha Gading, Lantai 2, West Food Court,J...",Rp50.000 untuk 2 orang (perkiraan)
...,...,...,...,...
196,Nasi Goreng Jaya,https://www.zomato.com/id/jakarta/nasi-goreng-...,"Jl. H. Ten Raya No. 27, Pulogadung, Jakarta",Rp80.000 untuk 2 orang (perkiraan)
197,BarBarRibs,https://www.zomato.com/id/jakarta/barbarribs-k...,"Mahaka Square, Lantai Ground, Jl. Kelapa Nias ...",Rp250.000 untuk 2 orang (perkiraan)Tanpa alkohol
198,K-Kitchen,https://www.zomato.com/id/jakarta/k-kitchen-se...,"Senayan City, Lantai 5, Delicaé, Jl. Asia Afri...",Rp150.000 untuk 2 orang (perkiraan)
199,Nock Nock Cafe & Resto,https://www.zomato.com/id/jakarta/nock-nock-ca...,"Mall of Indonesia, Rukan Resort City Home, Blo...",Rp120.000 untuk 2 orang (perkiraan)


In [146]:
rdf_comb_nd = pd.merge(left=rdf_fullnd, right=rdfs_full2, how='left' ,left_on=['Restaurant Name','Website'], right_on=['Restaurant Name', 'Website'], suffixes=('_a','_b'))

In [221]:
rdf_comb_nd.head()

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address_a,Restaurant Rating,Review Counts,Price for 2_a,Additional Info,Latitude,Longitude,Website,Restaurant Address_b,Price for 2_b,Restaurant Address,Price for 2
0,Holy Smokes,Casual Dining,"['Grill House', 'Steak']",Senopati,"Jl. Wolter Monginsidi No. 27, Senopati, Jakarta",4.4,"1,437 Ulasan",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Hanya Wine dan Bir', 'Di dalam ruangan', 'Wi...",-6.2391432684,106.807954162,https://www.zomato.com/id/jakarta/holy-smokes-...,,,"Jl. Wolter Monginsidi No. 27, Senopati, Jakarta",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol
1,Pempek Megaria,Quick Bites,['Palembang'],Cikini,"Metropole 21, Jl. Pegangsaan Timur No. 21, Cik...",4.3,461 Ulasan,Rp70.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.1999,106.843739,https://www.zomato.com/id/jakarta/pempek-megar...,,,"Metropole 21, Jl. Pegangsaan Timur No. 21, Cik...",Rp70.000 untuk 2 orang (perkiraan)
2,Tjikinii Lima,Casual Dining,"['Barat', 'Indonesia']",Cikini,"Jl. Cikini I No. 5, Cikini, Jakarta 10330",4.0,391 Ulasan,Rp300.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.1898019937,106.837150007,https://www.zomato.com/id/jakarta/tjikinii-lim...,,,"Jl. Cikini I No. 5, Cikini, Jakarta 10330",Rp300.000 untuk 2 orang (perkiraan)
3,Gado-Gado Bon-Bin,Quick Bites,['Jawa'],Cikini,"Jl. Cikini 4 No. 5, Cikini, Jakarta",4.0,177 Ulasan,Rp90.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.192959,106.838377,https://www.zomato.com/id/jakarta/gado-gado-bo...,,,"Jl. Cikini 4 No. 5, Cikini, Jakarta",Rp90.000 untuk 2 orang (perkiraan)
4,Koffie Fictie,Kafe,['Kopi'],Bekasi Selatan,"Grand Galaxy Park, Ruko RSOD No. 026, Jl. Lotu...",Not Rated Yet,TIdak cukup Ulasan,Rp100.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.272059,106.970932,https://www.zomato.com/id/jakarta/koffie-ficti...,,,"Grand Galaxy Park, Ruko RSOD No. 026, Jl. Lotu...",Rp100.000 untuk 2 orang (perkiraan)


In [157]:
price_for_2_check2 = rdf_comb_nd['Price for 2_a'].str.slice(0,2)
price_for_2_check2

0        Rp
1        Rp
2        Rp
3        Rp
         ..
11263    Rp
11264    Rp
11265    Rp
11266    Rp
Name: Price for 2_a, Length: 11267, dtype: object

In [158]:
condition_2 = ~((price_for_2_check == 'Rp') | (price_for_2_check == 'No'))
condition_1 = rdf_comb_nd['Restaurant Address_a'].isna()

In [214]:
def res_address2(row):
    string = str(row['Price for 2_a'])
    if (string[0:2] != 'Rp') and (string[0:2] != 'No') :
        return row['Restaurant Address_b']
    else:
        return row['Restaurant Address_a']

def res_price2(row):
    string = str(row['Price for 2_a'])
    if (string[0:2] != 'Rp') and (string[0:2] != 'No'):
        return row['Price for 2_b']
    else:
        return row['Price for 2_a']

In [215]:
rdf_comb_nd['Restaurant Address'] = rdf_comb_nd.apply(lambda row: res_address2(row), axis=1)
rdf_comb_nd['Price for 2'] = rdf_comb_nd.apply(lambda row: res_price2(row), axis=1)

In [216]:
rdf_comb_nd.head()

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Address_a,Restaurant Rating,Review Counts,Price for 2_a,Additional Info,Latitude,Longitude,Website,Restaurant Address_b,Price for 2_b,Restaurant Address,Price for 2
0,Holy Smokes,Casual Dining,"['Grill House', 'Steak']",Senopati,"Jl. Wolter Monginsidi No. 27, Senopati, Jakarta",4.4,"1,437 Ulasan",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Hanya Wine dan Bir', 'Di dalam ruangan', 'Wi...",-6.2391432684,106.807954162,https://www.zomato.com/id/jakarta/holy-smokes-...,,,"Jl. Wolter Monginsidi No. 27, Senopati, Jakarta",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol
1,Pempek Megaria,Quick Bites,['Palembang'],Cikini,"Metropole 21, Jl. Pegangsaan Timur No. 21, Cik...",4.3,461 Ulasan,Rp70.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.1999,106.843739,https://www.zomato.com/id/jakarta/pempek-megar...,,,"Metropole 21, Jl. Pegangsaan Timur No. 21, Cik...",Rp70.000 untuk 2 orang (perkiraan)
2,Tjikinii Lima,Casual Dining,"['Barat', 'Indonesia']",Cikini,"Jl. Cikini I No. 5, Cikini, Jakarta 10330",4.0,391 Ulasan,Rp300.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.1898019937,106.837150007,https://www.zomato.com/id/jakarta/tjikinii-lim...,,,"Jl. Cikini I No. 5, Cikini, Jakarta 10330",Rp300.000 untuk 2 orang (perkiraan)
3,Gado-Gado Bon-Bin,Quick Bites,['Jawa'],Cikini,"Jl. Cikini 4 No. 5, Cikini, Jakarta",4.0,177 Ulasan,Rp90.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.192959,106.838377,https://www.zomato.com/id/jakarta/gado-gado-bo...,,,"Jl. Cikini 4 No. 5, Cikini, Jakarta",Rp90.000 untuk 2 orang (perkiraan)
4,Koffie Fictie,Kafe,['Kopi'],Bekasi Selatan,"Grand Galaxy Park, Ruko RSOD No. 026, Jl. Lotu...",Not Rated Yet,TIdak cukup Ulasan,Rp100.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.272059,106.970932,https://www.zomato.com/id/jakarta/koffie-ficti...,,,"Grand Galaxy Park, Ruko RSOD No. 026, Jl. Lotu...",Rp100.000 untuk 2 orang (perkiraan)


In [223]:
rdf_comb_nd.drop(['Restaurant Address_a', 'Restaurant Address_b', 'Price for 2_a', 'Price for 2_b'], axis=1,inplace=True)

In [241]:
condition_1 = rdf_comb_nd['Restaurant Address'].isna()

price_for_2_check3 = rdf_comb_nd['Price for 2'].str.slice(0,2)
condition_2 = ~((price_for_2_check3 == 'Rp') | (price_for_2_check3 == 'No'))


In [226]:
rdf_comb_nd[condition_1]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Rating,Review Counts,Additional Info,Latitude,Longitude,Website,Restaurant Address,Price for 2
6126,Hokkaido Ice Cream Puff,Dessert Parlor,"['Desserts', 'Ice Cream']",Serpong Utara,3.3,21 Ulasan,"['', '', '']",-6.240889,106.628398,https://www.zomato.com/id/jakarta/hokkaido-ice...,,
9207,Pempek Kenari,Quick Bites,['Palembang'],Kec. Tangerang,3.7,13 Ulasan,[],-6.196442,106.637898,https://www.zomato.com/id/jakarta/pempek-kenar...,,
9923,Kupat Tahu Magelang,Quick Bites,['Jawa'],Lebak Bulus,3.4,27 Ulasan,[],-6.297563,106.781898,https://www.zomato.com/id/jakarta/kupat-tahu-m...,,


In [227]:
rdf_comb_nd[condition_2]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Rating,Review Counts,Additional Info,Latitude,Longitude,Website,Restaurant Address,Price for 2
6126,Hokkaido Ice Cream Puff,Dessert Parlor,"['Desserts', 'Ice Cream']",Serpong Utara,3.3,21 Ulasan,"['', '', '']",-6.240889,106.628398,https://www.zomato.com/id/jakarta/hokkaido-ice...,,
9207,Pempek Kenari,Quick Bites,['Palembang'],Kec. Tangerang,3.7,13 Ulasan,[],-6.196442,106.637898,https://www.zomato.com/id/jakarta/pempek-kenar...,,
9923,Kupat Tahu Magelang,Quick Bites,['Jawa'],Lebak Bulus,3.4,27 Ulasan,[],-6.297563,106.781898,https://www.zomato.com/id/jakarta/kupat-tahu-m...,,


In [235]:
rdfscrape

Unnamed: 0,Restaurant Name,Restaurant Address,Price for 2,Website
0,Hokkaido Ice Cream Puff,"Summarecon Mall Serpong, Lantai 2, Jl. Bouleva...",Rp80.000 untuk 2 orang (perkiraan),https://www.zomato.com/id/jakarta/hokkaido-ice...
1,Pempek Kenari,"Ruko Modernland Metro, Blok B No. 19, Jl. Hart...",Rp100.000 untuk 2 orang (perkiraan),https://www.zomato.com/id/jakarta/pempek-kenar...
2,Kupat Tahu Magelang,"Jl. Lebak Bulus 1 No. 18, Lebak Bulus, Jakarta",Rp60.000 untuk 2 orang (perkiraan),https://www.zomato.com/id/jakarta/kupat-tahu-m...


In [236]:
rdf_comb_nd = pd.merge(left=rdf_comb_nd, right=rdfscrape, how='left' ,left_on=['Restaurant Name','Website'], right_on=['Restaurant Name', 'Website'], suffixes=('_a','_b'))

In [237]:
rdf_comb_nd

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Rating,Review Counts,Additional Info,Latitude,Longitude,Website,Restaurant Address_a,Price for 2_a,Restaurant Address_b,Price for 2_b
0,Holy Smokes,Casual Dining,"['Grill House', 'Steak']",Senopati,4.4,"1,437 Ulasan","['Hanya Wine dan Bir', 'Di dalam ruangan', 'Wi...",-6.2391432684,106.807954162,https://www.zomato.com/id/jakarta/holy-smokes-...,"Jl. Wolter Monginsidi No. 27, Senopati, Jakarta",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,,
1,Pempek Megaria,Quick Bites,['Palembang'],Cikini,4.3,461 Ulasan,"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.1999000000,106.843739000,https://www.zomato.com/id/jakarta/pempek-megar...,"Metropole 21, Jl. Pegangsaan Timur No. 21, Cik...",Rp70.000 untuk 2 orang (perkiraan),,
2,Tjikinii Lima,Casual Dining,"['Barat', 'Indonesia']",Cikini,4.0,391 Ulasan,"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.1898019937,106.837150007,https://www.zomato.com/id/jakarta/tjikinii-lim...,"Jl. Cikini I No. 5, Cikini, Jakarta 10330",Rp300.000 untuk 2 orang (perkiraan),,
3,Gado-Gado Bon-Bin,Quick Bites,['Jawa'],Cikini,4.0,177 Ulasan,"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.1929590000,106.838377000,https://www.zomato.com/id/jakarta/gado-gado-bo...,"Jl. Cikini 4 No. 5, Cikini, Jakarta",Rp90.000 untuk 2 orang (perkiraan),,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11263,Senja Toast,Quick Bites,['Kue & Roti'],Jagakarsa,Not Rated Yet,Not Reviewed Yet,"['Sarapan', 'Bawa Pulang Tersedia', 'Di dalam ...",-6.3443890000,106.832833000,https://www.zomato.com/id/jakarta/senja-toast-...,"Jl. Raya Lenteng Agung, Jagakarsa, Jakarta",Rp60.000 untuk 2 orang (perkiraan),,
11264,Express Juice,Toko Minuman,['Jus'],Bojongsari,Not Rated Yet,Not Reviewed Yet,"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.3611110000,106.747747000,https://www.zomato.com/id/jakarta/express-juic...,"Jl. Raya Ciputat Parung no 60, Bojongsari, Depok",Rp50.000 untuk 2 orang (perkiraan),,
11265,Ayam Selebriti,Quick Bites,['Indonesia'],Meruya,Not Rated Yet,Not Reviewed Yet,"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.2109790000,106.737538000,https://www.zomato.com/id/jakarta/ayam-selebri...,"Jl. Meruya Selatan No.21, Meruya, Jakarta",Rp50.000 untuk 2 orang (perkiraan),,
11266,Langit Coffee,Toko Minuman,['Kopi'],Kampung Melayu,Not Rated Yet,Not Reviewed Yet,"['Bawa Pulang Tersedia', 'Di dalam ruangan', '...",-6.2305090000,106.866974000,https://www.zomato.com/id/jakarta/langit-coffe...,"Jl. Otista Raya No. 60A, Kampung Melayu, Jakarta",Rp50.000 untuk 2 orang (perkiraan),,


In [238]:
rdf_comb_nd['Restaurant Address'] = rdf_comb_nd.apply(lambda row: res_address2(row), axis=1)
rdf_comb_nd['Price for 2'] = rdf_comb_nd.apply(lambda row: res_price2(row), axis=1)

In [239]:
rdf_comb_nd.drop(['Restaurant Address_a', 'Restaurant Address_b', 'Price for 2_a', 'Price for 2_b'], axis=1,inplace=True)

In [None]:
(rdf_comb_nd['Additional Info'].str[0:2] == "[]") 

In [265]:
condition_3 = (rdf_comb_nd['Additional Info'].str[0:3] == "[''") | (rdf_comb_nd['Additional Info'].str[0:2] == "[]") 

In [266]:
rdf_comb_nd[condition_3]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Rating,Review Counts,Additional Info,Latitude,Longitude,Website,Restaurant Address,Price for 2
22,Siomay Batagor Satya Tenggiri,Kaki Lima,['Indonesia'],Sunter,Not Rated Yet,TIdak cukup Ulasan,[],-6.1387440000,106.861207000,https://www.zomato.com/id/jakarta/siomay-batag...,"Jl. Agung Utara 24 No. 23, Sunter, Jakarta",Rp50.000 untuk 2 orang (perkiraan)
305,Sop & Bakmi Paton,Food Court,['Bakmi'],Bogor Barat,Not Rated Yet,TIdak cukup Ulasan,[],-6.5477290000,106.760553000,https://www.zomato.com/id/jakarta/sop-bakmi-pa...,"Food Court Pulen Kitchens, Jl. Raya Semplak No...",Rp50.000 untuk 2 orang (perkiraan)
452,Mee_Oowfè,Quick Bites,"['Indonesia', 'Kopi']",Bogor Utara,Not Rated Yet,TIdak cukup Ulasan,[],-6.5820490000,106.814839000,https://www.zomato.com/id/jakarta/mee-oowfè-2-...,"Ruko 2C, Jl. Achmad Sobana No. 28, Bogor Utara...",No Price Data Found
453,Turn On Coffee & Eatery,Quick Bites,"['Indonesia', 'Barat', 'Kopi']",Kebon Jeruk,Not Rated Yet,TIdak cukup Ulasan,[],-6.1972550000,106.764359000,https://www.zomato.com/id/jakarta/turn-on-coff...,"Harvia Suites Building, Jl. Raya Perjuangan No...",No Price Data Found
...,...,...,...,...,...,...,...,...,...,...,...,...
11201,Tiga Wonton,Quick Bites,['Chinese'],Gatot Subroto,Not Rated Yet,Not Reviewed Yet,[],-6.2360723229,106.823362782,https://www.zomato.com/id/jakarta/tiga-wonton-...,"Wisma Mulia City Plaza, Lantai Lower Ground, J...",Rp120.000 untuk 2 orang (perkiraan)
11202,Tiga Wonton,Quick Bites,['Chinese'],Serpong Utara,Not Rated Yet,Not Reviewed Yet,[],-6.2418638889,106.657019444,https://www.zomato.com/id/jakarta/tiga-wonton-...,"Living World, Lantai Ground, Jl. Alam Sutera B...",Rp120.000 untuk 2 orang (perkiraan)
11204,Pagi Sore,Casual Dining,['Padang'],Kemang,Not Rated Yet,Not Reviewed Yet,[],-6.2611180000,106.815419000,https://www.zomato.com/id/jakarta/pagi-sore-ke...,"Jl. Kemang Raya, Kemang, Jakarta",Rp150.000 untuk 2 orang (perkiraan)
11215,Hanna Bakes,Toko Kue & Roti,"['Kue & Roti', 'Desserts']",Kelapa Gading,Not Rated Yet,Not Reviewed Yet,[],-6.1685020000,106.915511000,https://www.zomato.com/id/jakarta/hanna-bakes-...,"Jl. Pegangsaan Indah Bar. No.7, RT.2/RW.16, Pe...",No Price Data Found


In [276]:
rdfscrape3.head()

Unnamed: 0,Restaurant Name,Additional Info,Website
0,Siomay Batagor Satya Tenggiri,['No Additional Info'],https://www.zomato.com/id/jakarta/siomay-batag...
1,Sop & Bakmi Paton,['No Additional Info'],https://www.zomato.com/id/jakarta/sop-bakmi-pa...
2,Mee_Oowfè,"['', '', '', 'Bawa Pulang Tersedia', 'Di dalam...",https://www.zomato.com/id/jakarta/mee-oowfè-2-...
3,Turn On Coffee & Eatery,"['Bawa Pulang Tersedia', 'Hanya Bir', 'Tempat ...",https://www.zomato.com/id/jakarta/turn-on-coff...
4,One More Shot Coffee House,"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",https://www.zomato.com/id/jakarta/one-more-sho...


In [311]:
rdf_comb_nd2 = pd.merge(left=rdf_comb_nd, right=rdfscrape4, how='left' ,left_on=['Restaurant Name','Website'], right_on=['Restaurant Name', 'Website'], suffixes=('_a','_b'))

In [312]:
def add_info(row):
    string = str(row['Additional Info_a'])
    if (string[0:3] == "[''") or (string[0:2] == "[]") :
        return row['Additional Info_b']
    else:
        return row['Additional Info_a']


In [313]:
rdf_comb_nd2['Additional Info'] = rdf_comb_nd2.apply(lambda row: add_info(row), axis=1)

In [314]:
rdf_comb_nd2[condition_3]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Rating,Review Counts,Additional Info_a,Latitude,Longitude,Website,Restaurant Address,Price for 2,Additional Info_b,Additional Info
22,Siomay Batagor Satya Tenggiri,Kaki Lima,['Indonesia'],Sunter,Not Rated Yet,TIdak cukup Ulasan,[],-6.1387440000,106.861207000,https://www.zomato.com/id/jakarta/siomay-batag...,"Jl. Agung Utara 24 No. 23, Sunter, Jakarta",Rp50.000 untuk 2 orang (perkiraan),['No Additional Info'],['No Additional Info']
305,Sop & Bakmi Paton,Food Court,['Bakmi'],Bogor Barat,Not Rated Yet,TIdak cukup Ulasan,[],-6.5477290000,106.760553000,https://www.zomato.com/id/jakarta/sop-bakmi-pa...,"Food Court Pulen Kitchens, Jl. Raya Semplak No...",Rp50.000 untuk 2 orang (perkiraan),['No Additional Info'],['No Additional Info']
452,Mee_Oowfè,Quick Bites,"['Indonesia', 'Kopi']",Bogor Utara,Not Rated Yet,TIdak cukup Ulasan,[],-6.5820490000,106.814839000,https://www.zomato.com/id/jakarta/mee-oowfè-2-...,"Ruko 2C, Jl. Achmad Sobana No. 28, Bogor Utara...",No Price Data Found,"['', '', '', 'Bawa Pulang Tersedia', 'Di dalam...","['', '', '', 'Bawa Pulang Tersedia', 'Di dalam..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11202,Tiga Wonton,Quick Bites,['Chinese'],Serpong Utara,Not Rated Yet,Not Reviewed Yet,[],-6.2418638889,106.657019444,https://www.zomato.com/id/jakarta/tiga-wonton-...,"Living World, Lantai Ground, Jl. Alam Sutera B...",Rp120.000 untuk 2 orang (perkiraan),['No Additional Info'],['No Additional Info']
11204,Pagi Sore,Casual Dining,['Padang'],Kemang,Not Rated Yet,Not Reviewed Yet,[],-6.2611180000,106.815419000,https://www.zomato.com/id/jakarta/pagi-sore-ke...,"Jl. Kemang Raya, Kemang, Jakarta",Rp150.000 untuk 2 orang (perkiraan),[No Additional Info],[No Additional Info]
11215,Hanna Bakes,Toko Kue & Roti,"['Kue & Roti', 'Desserts']",Kelapa Gading,Not Rated Yet,Not Reviewed Yet,[],-6.1685020000,106.915511000,https://www.zomato.com/id/jakarta/hanna-bakes-...,"Jl. Pegangsaan Indah Bar. No.7, RT.2/RW.16, Pe...",No Price Data Found,[No Additional Info],[No Additional Info]


In [315]:
rdf_comb_nd2

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Rating,Review Counts,Additional Info_a,Latitude,Longitude,Website,Restaurant Address,Price for 2,Additional Info_b,Additional Info
0,Holy Smokes,Casual Dining,"['Grill House', 'Steak']",Senopati,4.4,"1,437 Ulasan","['Hanya Wine dan Bir', 'Di dalam ruangan', 'Wi...",-6.2391432684,106.807954162,https://www.zomato.com/id/jakarta/holy-smokes-...,"Jl. Wolter Monginsidi No. 27, Senopati, Jakarta",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,,"['Hanya Wine dan Bir', 'Di dalam ruangan', 'Wi..."
1,Pempek Megaria,Quick Bites,['Palembang'],Cikini,4.3,461 Ulasan,"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.1999000000,106.843739000,https://www.zomato.com/id/jakarta/pempek-megar...,"Metropole 21, Jl. Pegangsaan Timur No. 21, Cik...",Rp70.000 untuk 2 orang (perkiraan),,"['Bawa Pulang Tersedia', 'Di dalam ruangan']"
2,Tjikinii Lima,Casual Dining,"['Barat', 'Indonesia']",Cikini,4.0,391 Ulasan,"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.1898019937,106.837150007,https://www.zomato.com/id/jakarta/tjikinii-lim...,"Jl. Cikini I No. 5, Cikini, Jakarta 10330",Rp300.000 untuk 2 orang (perkiraan),,"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11264,Express Juice,Toko Minuman,['Jus'],Bojongsari,Not Rated Yet,Not Reviewed Yet,"['Bawa Pulang Tersedia', 'Di dalam ruangan']",-6.3611110000,106.747747000,https://www.zomato.com/id/jakarta/express-juic...,"Jl. Raya Ciputat Parung no 60, Bojongsari, Depok",Rp50.000 untuk 2 orang (perkiraan),,"['Bawa Pulang Tersedia', 'Di dalam ruangan']"
11265,Ayam Selebriti,Quick Bites,['Indonesia'],Meruya,Not Rated Yet,Not Reviewed Yet,"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",-6.2109790000,106.737538000,https://www.zomato.com/id/jakarta/ayam-selebri...,"Jl. Meruya Selatan No.21, Meruya, Jakarta",Rp50.000 untuk 2 orang (perkiraan),,"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp..."
11266,Langit Coffee,Toko Minuman,['Kopi'],Kampung Melayu,Not Rated Yet,Not Reviewed Yet,"['Bawa Pulang Tersedia', 'Di dalam ruangan', '...",-6.2305090000,106.866974000,https://www.zomato.com/id/jakarta/langit-coffe...,"Jl. Otista Raya No. 60A, Kampung Melayu, Jakarta",Rp50.000 untuk 2 orang (perkiraan),,"['Bawa Pulang Tersedia', 'Di dalam ruangan', '..."


In [382]:
rdf_comb_nd2.drop(['Additional Info_a', 'Additional Info_b'], axis=1, inplace=True)

In [317]:
rdf_comb_nd2.to_csv("Zomato Restaurants DataFrame - No Duplicate - Final.csv", index=False)
rdf_comb_nd2.drop(['Website'], axis=1).to_csv("Zomato Restaurants DataFrame - No Duplicate - Final Without Website.csv", index=False)

In [320]:
prefixes = ["['1" , "['2"  , "['3"  , "['4"  , "['5"  , "['6"  , "['7"  , "['8"  , "['9"  ,]

def info_test(row) :
    if str(row['Additional Info']).startswith(tuple(prefixes)):
        return True
    else:
        return False

In [321]:
rdf_comb_nd2['Additional Info Check'] = rdf_comb_nd2.apply(lambda row: info_test(row), axis=1)

In [326]:
rdf_comb_nd2[rdf_comb_nd2['Additional Info Check'] == True]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Rating,Review Counts,Latitude,Longitude,Website,Restaurant Address,Price for 2,Additional Info,Additional Info Check
2339,Street Kings,Quick Bites,"['Kebab', 'Barat']",Karet,4.0,89 Ulasan,-6.224062,106.823137,https://www.zomato.com/id/jakarta/street-kings...,"Ciputra World, Lotte Shopping Avenue, Lantai L...",Rp160.000 untuk 2 orang (perkiraan),"['19th Dec, Thu - 31st Dec, Thu | 09:00 am - 1...",True
2748,Semusim Coffee Garden,Kafe,"['Kopi', 'Tea', 'Indonesia']",Bintaro,4.0,120 Ulasan,-6.272642,106.762113,https://www.zomato.com/id/jakarta/semusim-coff...,"Jl. Kesehatan V No. 39, Bintaro, Jakarta 12330",Rp150.000 untuk 2 orang (perkiraan),"['3rd Dec, Tue - 3rd Dec, Thu | 12:00 am - 12:...",True
2982,The Neighbourhood,Fine Dining,['Indonesia'],Dharmawangsa,4.4,747 Ulasan,-6.239552882,106.814613081,https://www.zomato.com/id/jakarta/the-neighbou...,"Jl. Cipaku I No. 85, Dharmawangsa, Jakarta 12170",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['6th Aug, Thu - 31st Oct, Sat | 07:00 pm - 08...",True
3010,Cutt & Grill,Casual Dining,"['Grill House', 'Burger', 'Steak', 'Kebab']",Senopati,4.1,93 Ulasan,-6.235830686,106.813607253,https://www.zomato.com/id/jakarta/cutt-grill-s...,"Jl. Suryo No. 34, Senopati, Jakarta 12180",Rp350.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['3rd Aug, Mon - 29th Sep, Tue | 11:00 am - 12...",True
3013,Tucano's Churrascaria - Brazilian BBQ & Buffet,Casual Dining,"['Steak', 'Grill House', 'Barbekyu', 'Amerika ...",Sudirman,4.7,758 Ulasan,-6.2079620378,106.81772679,https://www.zomato.com/id/jakarta/tucanos-chur...,"Pavilion Retail Arcade, Lantai Dasar, Jl. K.H ...",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['3rd Aug, Mon - 30th Sep, Wed | 11:00 am - 10...",True
3076,B'Steak Grill & Pancake,Casual Dining,"['Steak', 'Grill House']",Green Ville,4.5,497 Ulasan,-6.169438,106.771378,https://www.zomato.com/id/jakarta/b-steak-gril...,"Jl. Green Ville AS No. 32, Green Ville, Jakart...",Rp300.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['5th Sep, Sat - 30th Sep, Wed | 11:00 am - 08...",True
3162,Sailendra - JW Marriott Hotel,Casual Dining,"['Indonesia', 'Jepang', 'India', 'Barat', 'Asia']",Kuningan,4.1,941 Ulasan,-6.2276599597,106.827097758,https://www.zomato.com/id/jakarta/sailendra-jw...,"JW Marriott Hotel Jakarta, Kawasan Mega Kuning...",Rp600.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['3rd Aug, Mon - 31st Dec, Thu | 07:00 am - 11...",True
3200,Liberta,Casual Dining,['Itali'],Sudirman,3.9,66 Ulasan,-6.2075190686,106.817686893,https://www.zomato.com/id/jakarta/liberta-sudi...,"Apartemen Pavilion, Jl. KH. Mas Mansyur, Sudir...",Rp200.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['1st Sep, Tue - 30th Sep, Wed | 11:30 am - 08...",True
3361,The Chinese National - Swissôtel Jakarta PIK A...,Casual Dining,"['Asia', 'Barat']",Pantai Indah Kapuk,3.1,34 Ulasan,-6.1079215775,106.740075834,https://www.zomato.com/id/jakarta/the-chinese-...,"Swissôtel Jakarta PIK Avenue, Jl. Pantai Indah...",Rp300.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['21st Feb, Fri - 18th Dec, Fri | 06:00 pm - 0...",True
3364,BASQUE Bar de Tapas,Casual Dining,"['Bar', 'Spanyol']",Kuningan,4.5,787 Ulasan,-6.228218,106.825387,https://www.zomato.com/id/jakarta/basque-bar-d...,"Noble House Building, Lantai 8, Jl. Mega Kunin...",Rp750.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['3rd Sep, Thu - 31st Oct, Sat | 06:00 pm - 11...",True


In [329]:
rescrape = rdf_comb_nd2[rdf_comb_nd2['Additional Info Check'] == True]['Website']

In [336]:
rdfscrape.head()

Unnamed: 0,Restaurant Name,Additional Info,Website
0,Street Kings,"[Bawa Pulang Tersedia, Ramah untuk Anak, Di da...",https://www.zomato.com/id/jakarta/street-kings...
1,Semusim Coffee Garden,"[Sarapan, Bawa Pulang Tersedia, Tempat duduk d...",https://www.zomato.com/id/jakarta/semusim-coff...
2,The Neighbourhood,"[Bawa Pulang Tersedia, Alkohol Tersedia, Ruang...",https://www.zomato.com/id/jakarta/the-neighbou...
3,Cutt & Grill,"[Pesan Antar, Bawa Pulang Tersedia, Hanya Bir,...",https://www.zomato.com/id/jakarta/cutt-grill-s...
4,Tucano's Churrascaria - Brazilian BBQ & Buffet,"[Pesan Antar, Bawa Pulang Tersedia, Hanya Wine...",https://www.zomato.com/id/jakarta/tucanos-chur...


In [337]:
rdf_comb_nd2 = pd.merge(left=rdf_comb_nd2, right=rdfscrape, how='left' ,left_on=['Restaurant Name','Website'], right_on=['Restaurant Name', 'Website'], suffixes=('_a','_b'))

In [340]:
def info_test2(row) :
    if row['Additional Info Check'] == True:
        return row['Additional Info_b']
    else:
        return row['Additional Info_a']


In [341]:
rdf_comb_nd2['Additional Info'] = rdf_comb_nd2.apply(lambda row: info_test2(row), axis=1)


In [344]:
rdf_comb_nd2[rdf_comb_nd2['Additional Info Check'] == False]

Unnamed: 0,Restaurant Name,Restaurant Type,Cuisine Type,Restaurant Area,Restaurant Rating,Review Counts,Latitude,Longitude,Website,Restaurant Address,Price for 2,Additional Info_a,Additional Info Check,Additional Info_b,Additional Info
0,Holy Smokes,Casual Dining,"['Grill House', 'Steak']",Senopati,4.4,"1,437 Ulasan",-6.2391432684,106.807954162,https://www.zomato.com/id/jakarta/holy-smokes-...,"Jl. Wolter Monginsidi No. 27, Senopati, Jakarta",Rp500.000 untuk 2 orang (perkiraan)Tanpa alkohol,"['Hanya Wine dan Bir', 'Di dalam ruangan', 'Wi...",False,,"['Hanya Wine dan Bir', 'Di dalam ruangan', 'Wi..."
1,Pempek Megaria,Quick Bites,['Palembang'],Cikini,4.3,461 Ulasan,-6.1999000000,106.843739000,https://www.zomato.com/id/jakarta/pempek-megar...,"Metropole 21, Jl. Pegangsaan Timur No. 21, Cik...",Rp70.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",False,,"['Bawa Pulang Tersedia', 'Di dalam ruangan']"
2,Tjikinii Lima,Casual Dining,"['Barat', 'Indonesia']",Cikini,4.0,391 Ulasan,-6.1898019937,106.837150007,https://www.zomato.com/id/jakarta/tjikinii-lim...,"Jl. Cikini I No. 5, Cikini, Jakarta 10330",Rp300.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",False,,"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp..."
3,Gado-Gado Bon-Bin,Quick Bites,['Jawa'],Cikini,4.0,177 Ulasan,-6.1929590000,106.838377000,https://www.zomato.com/id/jakarta/gado-gado-bo...,"Jl. Cikini 4 No. 5, Cikini, Jakarta",Rp90.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",False,,"['Bawa Pulang Tersedia', 'Di dalam ruangan']"
4,Koffie Fictie,Kafe,['Kopi'],Bekasi Selatan,Not Rated Yet,TIdak cukup Ulasan,-6.2720590000,106.970932000,https://www.zomato.com/id/jakarta/koffie-ficti...,"Grand Galaxy Park, Ruko RSOD No. 026, Jl. Lotu...",Rp100.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",False,,"['Bawa Pulang Tersedia', 'Di dalam ruangan']"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11262,Essence Coffee,Toko Minuman,"['Kopi', 'Snacks']",Pondok Bambu,Not Rated Yet,Not Reviewed Yet,-6.2376240000,106.907457000,https://www.zomato.com/id/jakarta/essence-coff...,"Jl. Bambu Ori Raya, Pondok Bambu, Jakarta",Rp80.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Tempat duduk di luar...",False,,"['Bawa Pulang Tersedia', 'Tempat duduk di luar..."
11263,Senja Toast,Quick Bites,['Kue & Roti'],Jagakarsa,Not Rated Yet,Not Reviewed Yet,-6.3443890000,106.832833000,https://www.zomato.com/id/jakarta/senja-toast-...,"Jl. Raya Lenteng Agung, Jagakarsa, Jakarta",Rp60.000 untuk 2 orang (perkiraan),"['Sarapan', 'Bawa Pulang Tersedia', 'Di dalam ...",False,,"['Sarapan', 'Bawa Pulang Tersedia', 'Di dalam ..."
11264,Express Juice,Toko Minuman,['Jus'],Bojongsari,Not Rated Yet,Not Reviewed Yet,-6.3611110000,106.747747000,https://www.zomato.com/id/jakarta/express-juic...,"Jl. Raya Ciputat Parung no 60, Bojongsari, Depok",Rp50.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Di dalam ruangan']",False,,"['Bawa Pulang Tersedia', 'Di dalam ruangan']"
11265,Ayam Selebriti,Quick Bites,['Indonesia'],Meruya,Not Rated Yet,Not Reviewed Yet,-6.2109790000,106.737538000,https://www.zomato.com/id/jakarta/ayam-selebri...,"Jl. Meruya Selatan No.21, Meruya, Jakarta",Rp50.000 untuk 2 orang (perkiraan),"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp...",False,,"['Bawa Pulang Tersedia', 'Area Merokok', 'Temp..."


In [347]:
rdf_comb_nd2.drop(['Additional Info Check'], axis=1, inplace=True)

In [388]:
rdf_comb_nd2.to_csv("Zomato Restaurants DataFrame - No Duplicate - Final.csv", index=False)
rdf_comb_nd2.drop(['Website'], axis=1).to_csv("Zomato Restaurants DataFrame - No Duplicate - Final Without Website.csv", index=False)

In [361]:
rescrape = rdf_comb_nd2[rdf_comb_nd2['Additional Info'].isna()]['Website']

In [370]:
rdf_comb_nd2 = pd.merge(left=rdf_comb_nd2, right=rdfscrape, how='left' ,left_on=['Restaurant Name','Website'], right_on=['Restaurant Name', 'Website'], suffixes=('_a','_b'))

In [375]:
rdf_comb_nd2['Additional Info_a'].replace(np.nan,'No', inplace=True)

In [376]:
rdf_comb_nd2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11267 entries, 0 to 11266
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Restaurant Name     11267 non-null  object
 1   Restaurant Type     11267 non-null  object
 2   Cuisine Type        11267 non-null  object
 3   Restaurant Area     11267 non-null  object
 4   Restaurant Rating   11267 non-null  object
 5   Review Counts       11267 non-null  object
 6   Latitude            11267 non-null  object
 7   Longitude           11267 non-null  object
 8   Website             11267 non-null  object
 9   Restaurant Address  11267 non-null  object
 10  Price for 2         11267 non-null  object
 11  Additional Info_a   11267 non-null  object
 12  Additional Info_b   1 non-null      object
dtypes: object(13)
memory usage: 1.5+ MB


In [378]:
def info_test2(row) :
    if row['Additional Info_a'] == 'No':
        return row['Additional Info_b']
    else:
        return row['Additional Info_a']

In [379]:
rdf_comb_nd2['Additional Info'] = rdf_comb_nd2.apply(lambda row: info_test2(row), axis=1)