In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# 🔁 Replace with your actual chromedriver.exe path
CHROMEDRIVER_PATH = rC:\Users\Gunjan\Downloads\chromedriver-win64\chromedriver.exe"

# Chrome options
options = Options()
# DO NOT headless for debugging
# options.add_argument("--headless")
options.add_argument("--start-maximized")

# Start driver with manual path
service = Service(CHROMEDRIVER_PATH)
driver = webdriver.Chrome(service=service, options=options)

try:
    # Go to Earth911
    url = "https://search.earth911.com/?what=Electronics&where=10001&max_distance=100"
    driver.get(url)

    # Wait for at least one result to appear
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "div.result-item"))
    )

    # Scroll to bottom to trigger full load
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(5)

    # Get result cards
    results = driver.find_elements(By.CSS_SELECTOR, "div.result-item")

    print(f"✅ Found {len(results)} results")

    if not results:
        raise Exception("No results loaded. Try increasing wait time.")

    data = []

    for card in results[:3]:
        try:
            name = card.find_element(By.CSS_SELECTOR, "h2.title a").text.strip()
        except:
            name = "N/A"

        try:
            updated = card.find_element(By.CSS_SELECTOR, "span.last-verified").text.strip().replace("Updated ", "")
        except:
            updated = "N/A"

        try:
            address = card.find_element(By.CSS_SELECTOR, "p.addr").text.strip()
        except:
            address = "N/A"

        try:
            materials = ", ".join([
                m.text.strip()
                for m in card.find_elements(By.CSS_SELECTOR, "span.material")
                if m.text.strip()
            ])
        except:
            materials = "N/A"

        data.append({
            "Business_name": name,
            "last_update_date": updated,
            "street_address": address,
            "materials_accepted": materials
        })

    # Save to CSV
    df = pd.DataFrame(data)
    df.to_csv("earth911_recycling_data.csv", index=False)
    print("✅ Data saved to earth911_recycling_data.csv")

except Exception as e:
    print(f"❌ Error: {e}")

finally:
    driver.quit()


❌ Error: Message: 
Stacktrace:
	GetHandleVerifier [0x0x7ff6c2dce925+77845]
	GetHandleVerifier [0x0x7ff6c2dce980+77936]
	(No symbol) [0x0x7ff6c2b89cda]
	(No symbol) [0x0x7ff6c2be06aa]
	(No symbol) [0x0x7ff6c2be095c]
	(No symbol) [0x0x7ff6c2c33d07]
	(No symbol) [0x0x7ff6c2c0890f]
	(No symbol) [0x0x7ff6c2c30b07]
	(No symbol) [0x0x7ff6c2c086a3]
	(No symbol) [0x0x7ff6c2bd1791]
	(No symbol) [0x0x7ff6c2bd2523]
	GetHandleVerifier [0x0x7ff6c30a683d+3059501]
	GetHandleVerifier [0x0x7ff6c30a0bfd+3035885]
	GetHandleVerifier [0x0x7ff6c30c03f0+3164896]
	GetHandleVerifier [0x0x7ff6c2de8c2e+185118]
	GetHandleVerifier [0x0x7ff6c2df053f+216111]
	GetHandleVerifier [0x0x7ff6c2dd72d4+113092]
	GetHandleVerifier [0x0x7ff6c2dd7489+113529]
	GetHandleVerifier [0x0x7ff6c2dbe288+10616]
	BaseThreadInitThunk [0x0x7ffd8cc3e8d7+23]
	RtlUserThreadStart [0x0x7ffd8cfdc34c+44]



In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import csv
import time

# Setup Chrome options
options = Options()
options.add_argument("--headless")  # Run in background
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

# Start driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

# URL to visit
search_url = "https://search.earth911.com/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitude=-73.99234262099&sponsor=&list_filter=all"
driver.get(search_url)

time.sleep(5)  # Let JS load

# Get all facility links on the search result page
facility_links = []
elements = driver.find_elements(By.CSS_SELECTOR, "h2.title a")
for el in elements:
    href = el.get_attribute("href")
    if href:
        facility_links.append(href)

print(f"Found {len(facility_links)} facility links")

# Prepare CSV output
output = []
for url in facility_links:
    print(f"Visiting: {url}")
    try:
        driver.get(url)
        time.sleep(3)

        # Title
        try:
            title = driver.find_element(By.CSS_SELECTOR, "h1, h2").text.strip()
        except:
            title = ""

        # Address
        try:
            address = driver.find_element(By.CSS_SELECTOR, "p.addr").text.strip()
        except:
            address = ""

        # Last updated
        try:
            updated = driver.find_element(By.CSS_SELECTOR, "span.last-verified").text.strip()
        except:
            updated = ""

        # Material accepted
        try:
            material = driver.find_element(By.CSS_SELECTOR, "span.material.no-link").text.strip()
        except:
            material = ""

        output.append({
            "Title": title,
            "Address": address,
            "Last Updated": updated,
            "Material Accepted": material,
            "URL": url
        })

    except Exception as e:
        print(f"⚠️ Error fetching details: {e}")

# Write to CSV
csv_file = "output.csv"
with open(csv_file, mode="w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["Title", "Address", "Last Updated", "Material Accepted", "URL"])
    writer.writeheader()
    for row in output:
        writer.writerow(row)

print(f"✅ Data saved to {csv_file}")

driver.quit()


Found 10 facility links
Visiting: https://search.earth911.com/program/Q1RQNVJYWFhKXA/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitude=-73.99234262099&sponsor=&list_filter=all
Visiting: https://search.earth911.com/location/Q1RQNVJeWFZAUw/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitude=-73.99234262099&sponsor=&list_filter=all
Visiting: https://search.earth911.com/location/Q1RQNVJbW1tAUA/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitude=-73.99234262099&sponsor=&list_filter=all
Visiting: https://search.earth911.com/location/Q1RQNVJeWldEVA/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitud

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import csv
import time

# Utility to clean encoding issues
def clean_text(text):
    try:
        return text.encode('latin1').decode('utf-8', 'ignore').strip()
    except:
        return text.strip()

# Setup Chrome options
options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

# Start driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

# Visit main search page 
search_url = "https://search.earth911.com/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitude=-73.99234262099&sponsor=&list_filter=all"
driver.get(search_url)
time.sleep(5)  # Allow JS to load

# Extract facility links 
facility_links = []
elements = driver.find_elements(By.CSS_SELECTOR, "h2.title a")
for el in elements:
    href = el.get_attribute("href")
    if href:
        facility_links.append(href)

print(f"Found {len(facility_links)} facility links")

# Scrape details from each facility 
output = []
for url in facility_links:
    print(f"Visiting: {url}")
    try:
        driver.get(url)
        time.sleep(3)

        try:
            title = clean_text(driver.find_element(By.CSS_SELECTOR, "h1, h2").text)
        except:
            title = ""

        try:
            address = clean_text(driver.find_element(By.CSS_SELECTOR, "p.addr").text)
        except:
            address = ""

        try:
            updated = clean_text(driver.find_element(By.CSS_SELECTOR, "span.last-verified").text)
        except:
            updated = ""

        try:
            material = clean_text(driver.find_element(By.CSS_SELECTOR, "span.material.no-link").text)
        except:
            material = ""

        output.append({
            "Title": title,
            "Address": address,
            "Last Updated": updated,
            "Material Accepted": material,
            
        })

    except Exception as e:
        print(f" Error fetching details: {e}")

# Save to CSV 
csv_file = "output.csv"
with open(csv_file, mode="w", newline="", encoding="utf-8-sig") as f:
    writer = csv.DictWriter(f, fieldnames=["Title", "Address", "Last Updated", "Material Accepted"])
    writer.writeheader()
    for row in output:
        writer.writerow(row)

print(f"Data saved to {csv_file}")

# Close browser 
driver.quit()


Found 10 facility links
Visiting: https://search.earth911.com/program/Q1RQNVJYWFhKXA/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitude=-73.99234262099&sponsor=&list_filter=all
Visiting: https://search.earth911.com/location/Q1RQNVJeWFZAUw/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitude=-73.99234262099&sponsor=&list_filter=all
Visiting: https://search.earth911.com/location/Q1RQNVJbW1tAUA/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitude=-73.99234262099&sponsor=&list_filter=all
Visiting: https://search.earth911.com/location/Q1RQNVJeWldEVA/?what=Electronics&where=10001&max_distance=100&country=US&province=NY&city=New+York&region=New+York&postal_code=10001&latitude=40.74807084035&longitud