In [1]:
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu
from bs4 import BeautifulSoup
import requests
import time
from googletrans import Translator

In [2]:
def connect_url(target_url):
    """
    対象のURLにアクセスする関数
    アクセスできない等のエラーが発生したら例外を投げる
    """
    # 接続確立の待機時間、応答待機時間を10秒とし、それぞれの値を超えた場合は例外が発生（ConnectTimeout）
    data = requests.get(target_url, timeout=10)
    data.encoding = data.apparent_encoding
    # アクセス過多を避けるため、2秒スリープ
    time.sleep(2)

    # レスポンスのステータスコードが正常(200番台)以外の場合は、例外を発生させる(HTTPError)
    if data.status_code == requests.codes.ok:
        return data
    else:
        data.raise_for_status()

In [3]:
url = 'https://www.tripadvisor.com/Restaurant_Review-g187147-d10085290-Reviews-Kodawari_Ramen_Yokocho-Paris_Ile_de_France.html'
      'https://www.tripadvisor.com/Restaurant_Review-g187147-d10085290-Reviews-or10-Kodawari_Ramen_Yokocho-Paris_Ile_de_France.html'
response = connect_url(url)


In [4]:
soup = BeautifulSoup(response.text, 'lxml')

In [5]:
reviews = []
review_container = soup.find_all(class_='review-container')
for i in range(len(review_container)):
    review = review_container[i].find_all("p", class_='partial_entry')[0].text
    reviews.append(review)

In [8]:
translator = Translator(service_urls=['translate.googleapis.com'])

In [9]:
reviews_translated = []
translations = translator.translate(reviews, dest='ja')
for translation in translations:
    reviews_translated.append(translation.text)

In [10]:
zipped = zip(reviews, reviews_translated)

In [11]:
trip_advisor_reviews_df = pd.DataFrame(set(zipped), columns=["en", "jp"])

In [12]:
# Write recipe outputs
your_trip_advisor = dataiku.Dataset("your_trip_advisor")
your_trip_advisor.write_with_schema(trip_advisor_reviews_df)

10 rows successfully written (6VdZp2MatJ)


In [13]:
trip_advisor_reviews_df

Unnamed: 0,en,jp
0,"We arrived for the opening time of 12h00, wait...","We arrived for the opening time of 12h00, wait..."
1,"Really excellent food, and fab service. Also r...","Really excellent food, and fab service. Also r..."
2,We decided to try due to the long queue we saw...,We decided to try due to the long queue we saw...
3,This is a small ramen restaurant. I waited so ...,This is a small ramen restaurant. I waited so ...
4,"Ambiance is great. The food however, needs som...","Ambiance is great. The food however, needs som..."
5,Worth the wait! Had to queue for about 1 hour ...,Worth the wait! Had to queue for about 1 hour ...
6,The atmosphere of this restaurant was great an...,The atmosphere of this restaurant was great an...
7,We didn't get to go to Japan like we usually d...,We didn't get to go to Japan like we usually d...
8,"The best ramen I’ve ever had. Tender meat, fre...","The best ramen I’ve ever had. Tender meat, fre..."
9,"I’ve been there many time, I tried pretty much...","I’ve been there many time, I tried pretty much..."
