In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import random
import pandas as pd

In [4]:


# Chrome binary and driver paths
chrome_binary_path = r"chrome-win64/chrome-win64/chrome.exe"
chromedriver_path = r"chromedriver-win64/chromedriver-win64/chromedriver.exe"

# Selenium options
options = Options()
options.binary_location = chrome_binary_path
service = Service(chromedriver_path)

def scrape_trail_reviews(trail_url):
    """Scrapes reviews from a given trail page."""
    driver = webdriver.Chrome(service=service, options=options)
    driver.get(trail_url)
    
    def click_view_more_button():
        """Clicks 'View More' button until all reviews are loaded."""
        while True:
            try:
                wait = WebDriverWait(driver, 3)
                review_button = wait.until(EC.element_to_be_clickable((By.ID, "rar_more_reviews_link")))
                driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", review_button)
                driver.execute_script("arguments[0].click();", review_button)
                time.sleep(2)
            except:
                break
    
    click_view_more_button()
    
    # Parse page content
    html_context = driver.page_source
    soup = BeautifulSoup(html_context, "html.parser")
    comments = soup.find_all("div", class_="comment-content media-body")
    
    coms = []
    for comment in comments:
        comment_data = {}
        paragraphs = comment.find_all("p")
        comment_text = " ".join([p.get_text() for p in paragraphs])
        comment_data["text"] = comment_text
        
        author = comment.find("a", class_="st_comment_author").get_text() if comment.find("a", class_="st_comment_author") else "Unknown"
        date = comment.find("time").get_text() if comment.find("time") else "Unknown"
        
        trail_link = comment.find('a', href=lambda x: x and 'bike-trails' in x)       
        trail_name = trail_link.text if trail_link else "Unknown"   
        
        stars = comment.find("span", class_="st_rating_stars")
        if stars:
            full_stars = len(stars.find_all("img", src="https://images.singletracks.com/graphics/icons/star_full_large.png"))
            half_stars = len(stars.find_all("img", src="https://images.singletracks.com/graphics/icons/star_half_large.png"))
            star_rating = full_stars + 0.5 * half_stars
        else:
            star_rating = "Unknown"
            
        source = trail_url
        
        comment_data.update({"author": author, "date": date, "stars": star_rating, "trail": trail_name, "source": source})
        coms.append(comment_data)
    driver.quit()
    return coms

# List of trail URLs to scrape
trail_urls = [
    "https://www.singletracks.com/bike-trails/balm-boyette-scrub-preserve/",
    "https://www.singletracks.com/bike-trails/santos/",
    "https://www.singletracks.com/bike-trails/alafia-river-state-park/",
    "https://www.singletracks.com/bike-trails/markham-park-2/",
    "https://www.singletracks.com/bike-trails/caloosahatchee-regional-park/",
    "https://www.singletracks.com/bike-trails/grassy-island-trail/",
    "https://www.singletracks.com/bike-trails/fort-pierce-mountain-bike-trail/",
    "https://www.singletracks.com/bike-trails/withlacoochee-state-forest-croom-sec/",
    "https://www.singletracks.com/bike-trails/sanfalasco/",
    "https://www.singletracks.com/bike-trails/oleta-state-park/",
    "https://www.singletracks.com/bike-trails/grapefruit-trail/",
    "https://www.singletracks.com/bike-trails/timberlake/",
    "https://www.singletracks.com/bike-trails/carlton-preserve/",
    "https://www.singletracks.com/bike-trails/kathryn-abby-hanna-park/",
    "https://www.singletracks.com/bike-trails/orlando-wetlands-park/",
    "https://www.singletracks.com/bike-trails/fort-clinch-13749/",
    "https://www.singletracks.com/bike-trails/tom-brown-park/",
    "https://www.singletracks.com/bike-trails/uwf-mountain-bike-trails/",
    "https://www.singletracks.com/bike-trails/lake-overstreet/",
    "https://www.singletracks.com/bike-trails/little-big-econ-state-forest/",
    "https://www.singletracks.com/bike-trails/jimmie-cottons-trail/",
    "https://www.singletracks.com/bike-trails/loyce-e-harpe-park-carter-road/",
    "https://www.singletracks.com/bike-trails/chuck-lennon-park/",
    "https://www.singletracks.com/bike-trails/malabar-scrub-sanctuary/",
    "https://www.singletracks.com/bike-trails/halpatiokee/",
    "https://www.singletracks.com/bike-trails/boldlygo/",
    "https://www.singletracks.com/bike-trails/fort-pierce-mountain-bike-trail/,"
    "https://www.singletracks.com/bike-trails/camp-murphy-mtb-trails/",
    "https://www.singletracks.com/bike-trails/graham-swamp/",
    "https://www.singletracks.com/bike-trails/chuck-lennon-park/",
    "https://www.singletracks.com/bike-trails/jonathon-dickenson-state-park-2/"
    
]

# Loop through each trail and scrape reviews
all_reviews = []
for url in trail_urls:
    reviews = scrape_trail_reviews(url)
    all_reviews.extend(reviews)
    time.sleep(random.uniform(1,3))
    

df = pd.DataFrame(all_reviews)


In [5]:
df.to_csv(r"C:\Users\dtafm\OneDrive\Desktop\data.science\danielpy\fellowshipPy\trailComments.csv", index=False)