In [4]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 
import urllib.parse 
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import geopandas as gpd
from geopandas.tools import geocode
from shapely.geometry import Point

### Put all together

In [63]:
# create dataframe (in the beginning it is empty)
df_restaurant = pd.DataFrame(columns=['Name','Address', 'Phone', 'Website', 'Operating_Hours','Dinner_cost', 'Lunch_cost', 'Rating', 'Reviews'])

# sequence of numbers - 1 to 59 
for page in range(1,60):

    # varibale for pages withing website
    website = "https://tabelog.com/en/tokyo/rstLst/" + str(page) + "/?LstCatD=RC0102&LstCat=RC01&Cat=RC "

    # request
    response = requests.get(website)
    
    # create soup object
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # result container
    result_container = soup.find_all('li',{'class':'list-rst js-list-item'})  
    
    # loop through results
    restaurant_link = []
    for result in result_container:
        restaurant_link.append(result.find('a').get('href'))
    restaurant_link
    
    # loop through all joined links
    for link in restaurant_link:
        response = requests.get(link)
        soup = BeautifulSoup(response.content, 'html.parser')
        try:
            Name = soup.find('a',{'class':'rd-header__rst-name-main'}).get_text()
        except:
            Name = 'n/a'
        try:
            Address = soup.find('p',{'class':'rd-detail-info__rst-address'}).get_text().strip()
        except:
            Address = 'n/a'
        try:
            Phone = soup.find('p',{'class':'rd-detail-info__rst-tel rd-detail-info__rst-tel--ppc'}).get_text().strip().split('\n')[0]
        except:
            Phone = 'n/a'
        try:
            Website = soup.find('a',{'class':'rd-header__rst-name-main'}).get('href')
        except:
            Website = 'n/a'
        try:
            Operating_Hours = soup.find('th',text = "Operating Hours").findNext('p',{'class':'translate'}).get_text().strip()
        except:
            Operating_Hours = 'n/a'
        try:
            Dinner_cost = soup.find('span', {'class':'c-rating__time c-rating__time--dinner'}).findNext('b').get_text()
        except:
            Dinner_cost = 'n/a'
        try:
            Lunch_cost = soup.find('span', {'class':'c-rating__time c-rating__time--lunch'}).findNext('b').get_text()
        except:
            Lunch_cost = 'n/a'
        try:
            Rating = soup.find('b', {'class':'c-rating__val c-rating__val--strong'}).get_text()
        except:
            Rating = 'n/a'
        try:
            Reviews = soup.find('a', {'class': 'rd-header__rst-reviews-target gly-b-review'}).get_text().strip().split('\n')[0]
        except:
            Reviews = 'n/a'
            
        # Pandas Dataframe
        df_restaurant = df_restaurant.append({'Name':Name,'Address':Address, 'Phone':Phone, 'Website':Website, 'Operating_Hours':Operating_Hours,'Dinner_cost':Dinner_cost, 'Lunch_cost':Lunch_cost, 'Rating':Rating, 'Reviews':Reviews}, ignore_index=True)
    


In [78]:
df = pd.read_csv('sushidata.csv')

### Output Pandas Dataframe

In [79]:
df 


Unnamed: 0,Name,Address,Phone,Website,Operating_Hours,Dinner_cost,Lunch_cost,Rating,Reviews
0,Sushizammai,1-chome-1-15 Ōkubo Shinjuku City Tokyo,050-5593-9051,https://tabelog.com/en/tokyo/A1304/A130401/130...,11：00～翌7：00,"￥3,000～￥3,999","￥1,000～￥1,999",3.18,97
1,Sushiichikan,22-8 Sakuragaokacho Shibuya City Tokyo-to,050-5595-2988,https://tabelog.com/en/tokyo/A1303/A130301/132...,ランチタイム(月曜、火曜、木曜、金曜のみ)11:30~14:00ディナータイム17:30~2...,"￥10,000～￥14,999","￥2,000～￥2,999",3.09,14
2,ITAMAE SUSHI,1-chome-19-1 Kabukicho Shinjuku City Tokyo-to,050-5589-2295,https://tabelog.com/en/tokyo/A1304/A130401/131...,【店内営業/テイクアウト】 全日08:00～翌05:00［ランチタイム］11:30～15:00,"￥3,000～￥3,999",～￥999,3.25,111
3,sushishiorianyamashiro,5-chome-12-10 Shiba Minato City Tokyo-to,050-5456-8458,https://tabelog.com/en/tokyo/A1314/A131402/132...,ランチ 12:00~14:00ディナー　17:00~23:00,"￥10,000～￥14,999","￥1,000～￥1,999",3.41,48
4,Ginzasushikouhonten,2-chome-4-1 Marunouchi Chiyoda City Tokyo-to,050-5596-3820,https://tabelog.com/en/tokyo/A1302/A130201/130...,月～土 ランチ11:00～14:30（L.O.14:00）ディナー17:30～23:00（L...,"￥10,000～￥14,999","￥5,000～￥5,999",3.53,48
...,...,...,...,...,...,...,...,...,...
1175,Kaitenzushitaisei,6-12 Asahicho Hachioji Tokyo-to,,https://tabelog.com/en/tokyo/A1329/A132904/131...,11:00～20:45,"￥1,000～￥1,999",～￥999,3.28,20
1176,Nigirinoippo,Senju Adachi City Tokyo,050-5868-5967,https://tabelog.com/en/tokyo/A1324/A132402/131...,17:00～23:00(フードL.O.22:00 ドリンクL.O.22:30),"￥6,000～￥7,999",-,3.53,60
1177,Akasakasushishiorianyamashiro,3-chome-15-13 Akasaka Minato City Tokyo,,https://tabelog.com/en/tokyo/A1308/A130801/132...,通常営業時間16:00 ～ 24:00（23:00）,"￥8,000～￥9,999","￥6,000～￥7,999",3.09,15
1178,sushinomidorisouhonten,1 Chome-20-7 Umegaoka Setagaya-ku Tokyo-to,,https://tabelog.com/en/tokyo/A1318/A131812/130...,[月]11:00～21:00（L.O.20:45）[火～金]11:00～14:00 16:3...,"￥3,000～￥3,999","￥2,000～￥2,999",3.53,345


### Store in Excel

In [80]:
df.describe()

Unnamed: 0,Rating,Reviews
count,1180.0,1180.0
mean,3.302864,44.850847
std,0.23628,58.168657
min,3.0,0.0
25%,3.09,14.0
50%,3.27,25.5
75%,3.44,52.0
max,4.63,559.0


In [136]:
df_restaurant['Reviews'] = df_restaurant['Reviews'].replace({'-':None})

In [143]:
df_restaurant.dtypes
df_restaurant['Reviews'] = (pd.to_numeric(df_restaurant['Reviews'])
                            .where(df_restaurant['Reviews'].notnull()))
df_restaurant['Reviews'].describe()

count    1180.000000
mean       44.850847
std        58.168657
min         0.000000
25%        14.000000
50%        25.500000
75%        52.000000
max       559.000000
Name: Reviews, dtype: float64

In [None]:
df_restaurant.dtypes
df_restaurant['Rating'] = (pd.to_numeric(df_restaurant['Rating'], downcast='float')
                            .where(df_restaurant['Rating'].notnull()))
df_restaurant['Rating'].describe()

In [139]:
df_restaurant['Rating'].mean()
df_restaurant['Reviews'].mean()

44.85084745762712

In [111]:
top10_rating = df_restaurant[['Name', 'Address', 'Rating', 'Reviews']].nlargest(10,'Rating').sort_values(by = 'Rating',ascending=False)
top10_rating['Average_Rating']  = df_restaurant['Rating'].mean()
top10_rating

Unnamed: 0,Name,Address,Rating,Reviews,Average_Rating
812,Sugita,1-chome-33-6 Nihonbashikakigaracho Chuo City T...,4.63,365,3.302864
931,Sushinamba,1-chome-1-2 Yurakucho Chiyoda City Tokyo-to,4.55,269,3.302864
611,Mitani,1-chome-1-22 Yotsuya Shinjuku City Tokyo-to,4.51,199,3.302864
783,Sawada,5-chome-9-19 Ginza Chuo City Tokyo-to,4.49,257,3.302864
1039,Kimura,3-chome-21-8 Tamagawa Setagaya City Tokyo-to,4.45,252,3.302864
712,Takamitsu,1-chome-28-2 Aobadai Meguro City Tokyo-to,4.43,233,3.302864
1021,Hatsunezushi,Nishikamata Ota City Tokyo,4.38,343,3.302864
1122,Sushi Sho,Yotsuya Shinjuku City Tokyo,4.32,539,3.302864
863,sushishunsuke,3-chome-44-4 Asagayaminami Suginami City Tokyo-to,4.31,236,3.302864
702,Sushifujinaga,2 Chome-9-5 Azabujuban Minato City Tokyo,4.25,100,3.302864


In [102]:
px.line(top10_rating, x = 'Name', y = ['Rating', 'Average_Rating'])

In [134]:
# fig = go.Figure(data = [
#     go.Bar(name = 'Top Rating', x = 'Name', y  = 'Rating'),
#     go.Bar(name = 'Number of Rating', x = 'Name', y = 'Reviews')
# ])
# fig.update_layout(barmode = 'group')
# fig.show()
subfig = make_subplots(specs=[[{"secondary_y": True}]])
Ratingchart = px.bar(top10_rating, x = 'Name', y = 'Rating', color= 'Rating')
No_rating_chart = px.bar(top10_rating, x = 'Name', y = 'Reviews')
No_rating_chart.update_traces(yaxis="y2")
subfig.add_traces(Ratingchart.data + No_rating_chart.data)
subfig.layout.xaxis.title = 'Restaurants'
subfig.layout.yaxis.title = 'Rating'
subfig.layout.yaxis2.title = 'Number of Review'
#subfig.for_each_trace(lambda t: t.update(color = t.marker.color))
subfig

In [146]:
top10_review = df_restaurant[['Name', 'Address', 'Rating', 'Reviews']].nlargest(10,'Reviews').sort_values(by = 'Reviews',ascending=False)
top10_review['Review_in_avg']  = df_restaurant['Reviews'].mean()
top10_review

Unnamed: 0,Name,Address,Rating,Reviews,Review_in_avg
367,Sushizammai,4-chome-11-9 Tsukiji Chuo City Tokyo-to,3.52,559,44.850847
1122,Sushi Sho,Yotsuya Shinjuku City Tokyo,4.32,539,44.850847
1063,Tsukijisushidai,6-chome-21-2 Tsukiji Chuo City Tokyo-to,3.76,472,44.850847
159,akasakakintan,3-chome-11-7 Akasaka Minato City Tokyo-to,3.74,430,44.850847
1036,Sakaezushi,1-chome-18-5 Tateishi Katsushika City Tokyo-to,3.75,414,44.850847
773,Umegaokasushinomidorisouhonten,7 Chome-2 Ginza Chuo City Tokyo,3.55,371,44.850847
812,Sugita,1-chome-33-6 Nihonbashikakigaracho Chuo City T...,4.63,365,44.850847
857,Umegaokasushinomidorisouhonten,1-chome-12-3 Dogenzaka Shibuya City Tokyo-to,3.49,352,44.850847
1178,sushinomidorisouhonten,1 Chome-20-7 Umegaoka Setagaya-ku Tokyo-to,3.53,345,44.850847
1002,Shimizu,2-chome-15-10 Shinbashi Minato City Tokyo-to,4.1,343,44.850847


In [188]:
px.bar(top10_review, x = 'Name', y= ['Reviews', 'Review_in_avg'],barmode='group')

## Cheap place recommendation

In [50]:
df_restaurant = pd.read_csv('sushidata.csv')
df_restaurant

Unnamed: 0,Name,Address,Phone,Website,Operating_Hours,Dinner_cost,Lunch_cost,Rating,Reviews
0,Sushizammai,1-chome-1-15 Ōkubo Shinjuku City Tokyo,050-5593-9051,https://tabelog.com/en/tokyo/A1304/A130401/130...,11：00～翌7：00,"￥3,000～￥3,999","￥1,000～￥1,999",3.18,97
1,Sushiichikan,22-8 Sakuragaokacho Shibuya City Tokyo-to,050-5595-2988,https://tabelog.com/en/tokyo/A1303/A130301/132...,ランチタイム(月曜、火曜、木曜、金曜のみ)11:30~14:00ディナータイム17:30~2...,"￥10,000～￥14,999","￥2,000～￥2,999",3.09,14
2,ITAMAE SUSHI,1-chome-19-1 Kabukicho Shinjuku City Tokyo-to,050-5589-2295,https://tabelog.com/en/tokyo/A1304/A130401/131...,【店内営業/テイクアウト】 全日08:00～翌05:00［ランチタイム］11:30～15:00,"￥3,000～￥3,999",～￥999,3.25,111
3,sushishiorianyamashiro,5-chome-12-10 Shiba Minato City Tokyo-to,050-5456-8458,https://tabelog.com/en/tokyo/A1314/A131402/132...,ランチ 12:00~14:00ディナー　17:00~23:00,"￥10,000～￥14,999","￥1,000～￥1,999",3.41,48
4,Ginzasushikouhonten,2-chome-4-1 Marunouchi Chiyoda City Tokyo-to,050-5596-3820,https://tabelog.com/en/tokyo/A1302/A130201/130...,月～土 ランチ11:00～14:30（L.O.14:00）ディナー17:30～23:00（L...,"￥10,000～￥14,999","￥5,000～￥5,999",3.53,48
...,...,...,...,...,...,...,...,...,...
1175,Kaitenzushitaisei,6-12 Asahicho Hachioji Tokyo-to,,https://tabelog.com/en/tokyo/A1329/A132904/131...,11:00～20:45,"￥1,000～￥1,999",～￥999,3.28,20
1176,Nigirinoippo,Senju Adachi City Tokyo,050-5868-5967,https://tabelog.com/en/tokyo/A1324/A132402/131...,17:00～23:00(フードL.O.22:00 ドリンクL.O.22:30),"￥6,000～￥7,999",-,3.53,60
1177,Akasakasushishiorianyamashiro,3-chome-15-13 Akasaka Minato City Tokyo,,https://tabelog.com/en/tokyo/A1308/A130801/132...,通常営業時間16:00 ～ 24:00（23:00）,"￥8,000～￥9,999","￥6,000～￥7,999",3.09,15
1178,sushinomidorisouhonten,1 Chome-20-7 Umegaoka Setagaya-ku Tokyo-to,,https://tabelog.com/en/tokyo/A1318/A131812/130...,[月]11:00～21:00（L.O.20:45）[火～金]11:00～14:00 16:3...,"￥3,000～￥3,999","￥2,000～￥2,999",3.53,345


In [53]:
df_restaurant['Dinner_cost'] = df_restaurant['Dinner_cost'].str.replace('￥', '')
df_restaurant['Dinner_cost'] = df_restaurant['Dinner_cost'].str.replace(',', '')
df_restaurant['Dinner_cost']
seperate_dinner_cost  = df_restaurant['Dinner_cost'].str.split('～', n = 2, expand = True)
for i in range((len(seperate_dinner_cost.columns))):
    seperate_dinner_cost[i]  = seperate_dinner_cost[i].replace({'-':None})
    seperate_dinner_cost[i] = (pd.to_numeric(seperate_dinner_cost[i], downcast='float')
                            .where(seperate_dinner_cost[i].notnull()))
#seperate_dinner_cost[0].apply(type)


In [55]:
df_restaurant['Lunch_cost'] = df_restaurant['Lunch_cost'].str.replace('￥', '')
df_restaurant['Lunch_cost'] = df_restaurant['Lunch_cost'].str.replace(',', '')
df_restaurant['Lunch_cost']
seperate_lunch_cost  = df_restaurant['Lunch_cost'].str.split('～', n = 2, expand = True)
for i in range((len(seperate_lunch_cost.columns))):
    seperate_lunch_cost[i]  = seperate_lunch_cost[i].replace({'-':None})
    seperate_lunch_cost[i] = (pd.to_numeric(seperate_lunch_cost[i], downcast='float')
                            .where(seperate_lunch_cost[i].notnull()))

In [57]:
 df_restaurant['Min_dinner_cost'] = seperate_dinner_cost[0]
 df_restaurant['Max_dinner_cost'] = seperate_dinner_cost[1]
 df_restaurant['Min_lunch_cost'] = seperate_lunch_cost[0]
 df_restaurant['Max_lunch_cost'] = seperate_lunch_cost[1]
 df_restaurant

Unnamed: 0,Name,Address,Phone,Website,Operating_Hours,Dinner_cost,Lunch_cost,Rating,Reviews,Min_dinner_cost,Max_dinner_cost,Min_lunch_cost,Max_lunch_cost
0,Sushizammai,1-chome-1-15 Ōkubo Shinjuku City Tokyo,050-5593-9051,https://tabelog.com/en/tokyo/A1304/A130401/130...,11：00～翌7：00,3000～3999,1000～1999,3.18,97,3000.0,3999.0,1000.0,1999.0
1,Sushiichikan,22-8 Sakuragaokacho Shibuya City Tokyo-to,050-5595-2988,https://tabelog.com/en/tokyo/A1303/A130301/132...,ランチタイム(月曜、火曜、木曜、金曜のみ)11:30~14:00ディナータイム17:30~2...,10000～14999,2000～2999,3.09,14,10000.0,14999.0,2000.0,2999.0
2,ITAMAE SUSHI,1-chome-19-1 Kabukicho Shinjuku City Tokyo-to,050-5589-2295,https://tabelog.com/en/tokyo/A1304/A130401/131...,【店内営業/テイクアウト】 全日08:00～翌05:00［ランチタイム］11:30～15:00,3000～3999,～999,3.25,111,3000.0,3999.0,,999.0
3,sushishiorianyamashiro,5-chome-12-10 Shiba Minato City Tokyo-to,050-5456-8458,https://tabelog.com/en/tokyo/A1314/A131402/132...,ランチ 12:00~14:00ディナー　17:00~23:00,10000～14999,1000～1999,3.41,48,10000.0,14999.0,1000.0,1999.0
4,Ginzasushikouhonten,2-chome-4-1 Marunouchi Chiyoda City Tokyo-to,050-5596-3820,https://tabelog.com/en/tokyo/A1302/A130201/130...,月～土 ランチ11:00～14:30（L.O.14:00）ディナー17:30～23:00（L...,10000～14999,5000～5999,3.53,48,10000.0,14999.0,5000.0,5999.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1175,Kaitenzushitaisei,6-12 Asahicho Hachioji Tokyo-to,,https://tabelog.com/en/tokyo/A1329/A132904/131...,11:00～20:45,1000～1999,～999,3.28,20,1000.0,1999.0,,999.0
1176,Nigirinoippo,Senju Adachi City Tokyo,050-5868-5967,https://tabelog.com/en/tokyo/A1324/A132402/131...,17:00～23:00(フードL.O.22:00 ドリンクL.O.22:30),6000～7999,-,3.53,60,6000.0,7999.0,,
1177,Akasakasushishiorianyamashiro,3-chome-15-13 Akasaka Minato City Tokyo,,https://tabelog.com/en/tokyo/A1308/A130801/132...,通常営業時間16:00 ～ 24:00（23:00）,8000～9999,6000～7999,3.09,15,8000.0,9999.0,6000.0,7999.0
1178,sushinomidorisouhonten,1 Chome-20-7 Umegaoka Setagaya-ku Tokyo-to,,https://tabelog.com/en/tokyo/A1318/A131812/130...,[月]11:00～21:00（L.O.20:45）[火～金]11:00～14:00 16:3...,3000～3999,2000～2999,3.53,345,3000.0,3999.0,2000.0,2999.0


In [61]:
#create new data_frame for friendly budget recommendation

price_df = df_restaurant[['Name', 'Address', 'Rating', 'Reviews','Min_lunch_cost', 'Max_lunch_cost', 'Min_dinner_cost','Max_dinner_cost']]

#delete if there is any missing value
price_df.dropna()

Unnamed: 0,Name,Address,Rating,Reviews,Min_lunch_cost,Max_lunch_cost,Min_dinner_cost,Max_dinner_cost
0,Sushizammai,1-chome-1-15 Ōkubo Shinjuku City Tokyo,3.18,97,1000.0,1999.0,3000.0,3999.0
1,Sushiichikan,22-8 Sakuragaokacho Shibuya City Tokyo-to,3.09,14,2000.0,2999.0,10000.0,14999.0
3,sushishiorianyamashiro,5-chome-12-10 Shiba Minato City Tokyo-to,3.41,48,1000.0,1999.0,10000.0,14999.0
4,Ginzasushikouhonten,2-chome-4-1 Marunouchi Chiyoda City Tokyo-to,3.53,48,5000.0,5999.0,10000.0,14999.0
5,Yasaka,2-chome-8-13 Ebisu Shibuya City Tokyo-to,3.18,21,2000.0,2999.0,10000.0,14999.0
...,...,...,...,...,...,...,...,...
1170,Kanazawamaimonsushitamahime,3 Chome-17-1 Tamagawa Setagaya City Tokyo-to,3.36,90,2000.0,2999.0,4000.0,4999.0
1171,Toshiya,3-chome-12-19 Miyasaka Setagaya City Tokyo-to,3.53,65,1000.0,1999.0,10000.0,14999.0
1172,Tsukijitamazushi,1-chome-23-1 Kichijoji Honcho Musashino Tokyo-to,3.04,12,1000.0,1999.0,5000.0,5999.0
1177,Akasakasushishiorianyamashiro,3-chome-15-13 Akasaka Minato City Tokyo,3.09,15,6000.0,7999.0,8000.0,9999.0


In [65]:
#caculate the average range cost in lunch and dinner
mean_price  = price_df[['Min_lunch_cost', 'Max_lunch_cost', 'Min_dinner_cost','Max_dinner_cost']].mean()
mean_price['Min_lunch_cost']

3261.0498