In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Setup browser
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(options=options)

# Step 1: Manual login
driver.get("https://www.wattpad.com/login")
input("⏳ Please log in manually in the browser, then press ENTER here to continue...")

# Step 2: Historical Fiction story URLs
story_urls = [
    "https://www.wattpad.com/story/350539177-%F0%9D%90%8D%F0%9D%90%9A%F0%9D%90%B2%F0%9D%90%9A%F0%9D%90%A7%F0%9D%90%AD%F0%9D%90%9A%F0%9D%90%AB%F0%9D%90%9A-%7E-%F0%9D%90%93%F0%9D%90%A1%F0%9D%90%9E-%F0%9D%90%84%F0%9D%90%A9%F0%9D%90%A2%F0%9D%90%AD%F0%9D%90%A8%F0%9D%90%A6%F0%9D%90%9E-%F0%9D%90%8E%F0%9D%90%9F-%F0%9D%90%81%F0%9D%90%9E%F0%9D%90%9A%F0%9D%90%AE%F0%9D%90%AD%F0%9D%90%B2-%F0%9D%9F%8F%F0%9D%9F%96%2B",
    "https://www.wattpad.com/story/270117777-khalifa",
    "https://www.wattpad.com/story/384852019-baby-tyrant"
]

# Step 3: Data collection
story_data = []

for url in story_urls:
    driver.get(url)
    print(f"🔍 Scraping: {url}")
    time.sleep(3)

    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.gF-N5")))
        title = driver.find_element(By.CSS_SELECTOR, "div.gF-N5").text

        # Reads & Votes
        stats = driver.find_elements(By.CSS_SELECTOR, "span[data-testid='stats-value']")
        reads = stats[0].text if len(stats) > 0 else "N/A"
        votes = stats[1].text if len(stats) > 1 else "N/A"

        # Published dates
        dates = driver.find_elements(By.CSS_SELECTOR, "div.bSGSB")
        first_publish = dates[0].text if len(dates) > 0 else "N/A"
        last_publish = dates[1].text if len(dates) > 1 else "N/A"

        story_data.append({
            "Title": title,
            "Genre": "Historical Fiction",
            "Number of Reads": reads,
            "Number of Votes": votes,
            "First Published Date": first_publish,
            "Last Published Date": last_publish,
            "Story URL": url
        })

    except Exception as e:
        print(f"❌ Error scraping {url}: {e}")

# Step 4: Save to Excel
df = pd.DataFrame(story_data)
df.to_excel("Wattpad_HistoricalFiction_Stories.xlsx", index=False)
print("✅ Done! Data saved to 'Wattpad_HistoricalFiction_Stories.xlsx'")


⏳ Please log in manually in the browser, then press ENTER here to continue... 


🔍 Scraping: https://www.wattpad.com/story/350539177-%F0%9D%90%8D%F0%9D%90%9A%F0%9D%90%B2%F0%9D%90%9A%F0%9D%90%A7%F0%9D%90%AD%F0%9D%90%9A%F0%9D%90%AB%F0%9D%90%9A-%7E-%F0%9D%90%93%F0%9D%90%A1%F0%9D%90%9E-%F0%9D%90%84%F0%9D%90%A9%F0%9D%90%A2%F0%9D%90%AD%F0%9D%90%A8%F0%9D%90%A6%F0%9D%90%9E-%F0%9D%90%8E%F0%9D%90%9F-%F0%9D%90%81%F0%9D%90%9E%F0%9D%90%9A%F0%9D%90%AE%F0%9D%90%AD%F0%9D%90%B2-%F0%9D%9F%8F%F0%9D%9F%96%2B
🔍 Scraping: https://www.wattpad.com/story/270117777-khalifa
🔍 Scraping: https://www.wattpad.com/story/384852019-baby-tyrant
✅ Done! Data saved to 'Wattpad_HistoricalFiction_Stories.xlsx'
