In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
import pandas as pd

In [14]:
df = pd.read_csv(
    'data/RDC_Inventory_Core_Metrics_Zip_History.csv',
    dtype={'postal_code': 'str'},
    usecols=['postal_code']
)
zip_codes = list(filter(lambda x: x is not None, df.postal_code.unique()))

# Initialize the WebDriver
driver = webdriver.Chrome()

# Loop through each ZIP code
for zip_code in zip_codes:
    # Navigate to the initial URL
    driver.get("https://crimegrade.org/crime-by-zip-code/")

    # Find and enter the ZIP code
    zip_field = driver.find_element(By.ID, "zip-code")
    zip_field.send_keys(zip_code)

    # Click the submit button
    submit_button = driver.find_element(By.ID, "submit")
    submit_button.click()

    # Wait until the specific table is loaded on the redirected page
    try:
        # Wait until the table with the specific class appears
        table_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "gradeComponents"))
        )

        # Get the HTML content of the table
        table_html = table_element.get_attribute("outerHTML")

        # Save the extracted table HTML to a file, naming it by ZIP code
        file_name = f"extracted_table_{zip_code}.html"
        with open(file_name, "w", encoding="utf-8") as file:
            file.write(table_html)

        print(f"Saved table for ZIP code {zip_code} to {file_name}")

    except Exception as e:
        print(f"Error: Table did not load in time or was not found for ZIP code {zip_code}.", e)

    # Sleep between requests
    sleep(1)  # Adjust the sleep time as needed

# Close the WebDriver
driver.quit()

Saved table for ZIP code 11226 to extracted_table_11226.html
Saved table for ZIP code 10001 to extracted_table_10001.html
Saved table for ZIP code 90210 to extracted_table_90210.html
