In [None]:
from selenium import webdriver
import time 
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import (
    NoSuchElementException, 
    ElementNotInteractableException,
    TimeoutException,
    ElementClickInterceptedException
)
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
URL ='https://www.tripadvisor.com/Search?q=Restaurant+in+Marrakech'

In [None]:
def scrape(url):
    driver = webdriver.Chrome()
    driver.get(url)
    
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((
            By.XPATH, 
            '//*[contains(@data-test-attribute, "all-results-section")]'
        ))
    )
    
    try:
        driver.find_element(
            By.XPATH,
            '//button[contains(text(), "Accept")]'
        ).click()
        time.sleep(2)
    except NoSuchElementException:
        pass
    
    # Find and click "Show more" button with multiple strategies
        show_more_clicked = False
        
        # Strategy 1: Wait for element to be clickable
        try:
            show_more_button = WebDriverWait(driver, 15).until(
                EC.element_to_be_clickable((
                    By.XPATH,
                    '//button//*[contains(text(), "Show more")]/ancestor::button'
                ))
            )
            show_more_button.click()
            show_more_clicked = True
            print("Show more button clicked successfully")
        except (TimeoutException, ElementNotInteractableException, ElementClickInterceptedException):
            pass
        
        # Strategy 2: Try alternative XPath
        if not show_more_clicked:
            try:
                show_more_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((
                        By.XPATH,
                        '//button[contains(text(), "Show more")]'
                    ))
                )
                show_more_button.click()
                show_more_clicked = True
                print("Show more button clicked with alternative XPath")
            except (TimeoutException, ElementNotInteractableException, ElementClickInterceptedException):
                pass
        
        # Strategy 3: Scroll to element and use ActionChains
        if not show_more_clicked:
            try:
                show_more_elements = driver.find_elements(
                    By.XPATH,
                    '//*[contains(text(), "Show more")]'
                )
                
                for element in show_more_elements:
                    try:
                        # Scroll to element
                        driver.execute_script("arguments[0].scrollIntoView(true);", element)
                        time.sleep(1)
                        
                        # Try clicking with ActionChains
                        actions = ActionChains(driver)
                        actions.move_to_element(element).click().perform()
                        show_more_clicked = True
                        print("Show more button clicked with ActionChains")
                        break
                    except Exception as e:
                        continue
            except Exception as e:
                print(f"ActionChains strategy failed: {e}")
        
        # Strategy 4: JavaScript click as last resort
        if not show_more_clicked:
            try:
                show_more_elements = driver.find_elements(
                    By.XPATH,
                    '//*[contains(text(), "Show more")]'
                )
                
                for element in show_more_elements:
                    try:
                        driver.execute_script("arguments[0].click();", element)
                        show_more_clicked = True
                        print("Show more button clicked with JavaScript")
                        break
                    except Exception as e:
                        continue
            except Exception as e:
                print(f"JavaScript click failed: {e}")
        
        if not show_more_clicked:
            print("Warning: Could not click 'Show more' button")
        
        # Wait for additional content to load after clicking
        time.sleep(5)
    page_source = driver.page_source
    driver.quit()
    return page_source

In [None]:
def parse(html):
    soup = BeautifulSoup(html, 'html.parser')
    listings = []
    
    for listing in soup.select('[data-test-attribute="location-results-card"]'):
        title = listing.select_one('.FGwzt')
        rating = listing.select_one('title')
        reviews = listing.select_one('[data-automation="bubbleReviewCount"]')
        href = listing.select_one('a').get('href')

        listings.append({
            'title': title.text,
            'rating': float(rating.text.split(' ')[0]),
            'reviews': int(str(reviews.text.replace(',', '').replace('(','').replace(')','')).split(' ')[0]),
            'link': 'https://www.tripadvisor.com' + href
        })
    
    return listings


In [None]:
def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)


In [None]:
url1='https://www.tripadvisor.com/Search?q=Restaurant+in+Marrakech'
url2='https://www.tripadvisor.com/Search?q=Tourist+Attraction+in+Marrakech'

In [None]:
# if __name__ == '__main__':
#     html1 = scrape(url1)
#     results1 = parse(html1)
#     save_to_csv(results1, 'restaurant.csv')
#     html2=scrape(url2)
#     results2= parse(html2)
#     save_to_csv(results2, 'touristique.csv')


In [None]:
cities = ["Marrakech"]
categories = ["Hotel", "Restaurant", "Tourist Attraction"]

In [None]:
if __name__ == '__main__':
    for city in cities:
        for cat in categories:
            cat=cat.replace(' ','+')
            query =f'{cat}+in+{city}'
            url=f'https://www.tripadvisor.com/Search?q={query}'
            html=scrape(url)
            results=parse(html)
            save_to_csv(results ,f"{cat}_test.csv")

In [None]:
df=pd.read_csv('./restaurant.csv')
df.head()