In [9]:
# Python Website Scraper, June 2025
# Scraper uses Selenium, and is set to the default website of "http://quotes.toscrape.com/"
# Current elements the scraper is tuned for are "author" and "quotes"
# final data is packaged in a neat little .csv file to export to Excel or Google Sheets


#Importing Tools
import time
import csv #tool for csv files
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException #Handles end of pages
from webdriver_manager.chrome import  ChromeDriverManager

# Set up Browser
driver_service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=driver_service)
wait = WebDriverWait(driver, 10)
driver.get("http://quotes.toscrape.com/")

# Preparation for Data Collection
all_quotes = []
page_number = 1
max_pages = 10 

#Scrape Loop
while True:
    if page_number > max_pages:
        print(f"Maximum number of pages, {max_pages}, reached!")
        break
    print(f"Scraping Page {page_number}")
    #Find all quote elements on the page
    try: 
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "quote"))
        )
    except TimeoutException:
        print("Timed out waiting for page to load. Ending scrape.")
        break
    
    quote_elements = driver.find_elements(By.CLASS_NAME, "quote")
    
    #Loop to locate quote elements
    for quote_element in quote_elements:
        #scrape quote text & author
        text = quote_element.find_element(By.CLASS_NAME, "text").text
        author = quote_element.find_element(By.CLASS_NAME, "author").text
    
        #store data in dictonary
        quote_data = {
            "author": author,
            "quote": text,
        }
        #add to list
        all_quotes.append(quote_data)

#Finding and clicking the next button to continue scraping the next page
    try:
        # Website labels the next button with the class, "next"
        next_button = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "next")))

        #next_button.click()
        driver.execute_script("arguments[0].click();", next_button)
        page_number += 1 # Increment our page counter
        
    
    except NoSuchElementException:
        # Runs if the try block fails, allowing the program to end once all pages have been scraped
        print("No more pages to scrape. Finished.")
        break # finalize the scrape

#Saving the data to a CSV file
print(f"Scraping complete, found {len(all_quotes)} total quotes")

#Defining the file
csv_file_name = "scraped_quotes.csv"

# Opens the file in write mode, "newline =" is the standard for csv files
with open(csv_file_name, 'w', newline='', encoding='utf-8') as file:

    #Define column headers for the file by matching it ot keys from "quote_data" dict
    fieldnames = ["author", "quote"]

    #Creates a writer object thatknows how to write rows to our file
    writer = csv.DictWriter(file, fieldnames=fieldnames)

    #Write the first row, that will be a header
    writer.writeheader()

    #write the rest of our data
    writer.writerows(all_quotes)

print(f"Data has been saved to {csv_file_name}.")

driver.quit()





Scraping Page 1
Scraping Page 2
Scraping Page 3
Scraping Page 4
Scraping Page 5
Scraping Page 6
Scraping Page 7
Scraping Page 8
Scraping Page 9
Scraping Page 10
Maximum number of pages, 10, reached!
Scraping complete, found 100 total quotes
Data has been saved to scraped_quotes.csv.
