In [1]:
# BLOCK 1: Setup

import os
import time
import datetime
import requests
import glob
import shutil
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException

# Paths and Directories
cwd = os.getcwd()
dependencies_dir = os.path.join(cwd, "dependencies")
download_folder_name = "google_trends_downloads"
download_dir = os.path.join(cwd, download_folder_name)

os.makedirs(dependencies_dir, exist_ok=True)
os.makedirs(download_dir, exist_ok=True)

driver_path = os.path.join(dependencies_dir, "geckodriver.exe")
service_log_path = os.path.join(dependencies_dir, "geckodriver.log")
firefox_path = r"C:\Program Files\Mozilla Firefox\firefox.exe"

# Firefox options
firefox_options = Options()
firefox_options.binary_location = firefox_path
firefox_options.set_preference("permissions.default.image", 2)
firefox_options.set_preference("media.autoplay.default", 5)
firefox_options.set_preference("browser.download.folderList", 2)
firefox_options.set_preference("browser.download.dir", download_dir)
firefox_options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
firefox_options.set_preference("browser.download.manager.showWhenStarting", False)
firefox_options.set_preference("pdfjs.disabled", True)

# WebDriver
firefox_service = FirefoxService(executable_path=driver_path, log_output=service_log_path)
driver = webdriver.Firefox(service=firefox_service, options=firefox_options)

def rename_latest_files(keyword):
    files = glob.glob(os.path.join(download_dir, "*.csv"))
    if not files:
        print("No new files detected, skipping rename.")
        return

    file_mapping = {
        "multiTimeline": "time",
        "geoMap": "geo",
        "relatedEntities": "ents",
        "relatedQueries": "quer"
    }

    for file in files:
        original_name = os.path.basename(file)
        for key, tag in file_mapping.items():
            if key in original_name:
                timestamp = datetime.datetime.now().strftime("%Y-%m-%d")
                new_filename = f"{keyword.replace(' ', '_')}_{tag}_{timestamp}.csv"
                new_filepath = os.path.join(download_dir, new_filename)
                os.rename(file, new_filepath)
                print(f" Renamed: {original_name} → {new_filename}")

def select_metro_resolution(driver):
    try:
        dropdown_xpath = "//md-select[contains(@class, 'resolution-select')]"
        WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, dropdown_xpath))).click()
        time.sleep(2)

        metro_option_xpath = "//md-option[@value='metro' and not(@aria-disabled='true')]"
        metro_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, metro_option_xpath)))
        metro_option.click()
        print("✅ Metro resolution selected.")
        time.sleep(7)
    except TimeoutException:
        print("⚠ Could not select Metro resolution: timeout.")
    except Exception as e:
        print(f"⚠ Error selecting Metro resolution: {e}")


In [2]:
# BLOCK 2: Core download logic — with Metro resolution selection

keywords = ["united airlines", "protest", "riot", "march", "demonstration", "rally", "strike"]

time_ranges = {
    "past_day": "now 1-d",
    "past_hour": "now 1-H",
    "past_4_hours": "now 4-H",
    "past_7_days": "now 7-d",
    "past_30_days": "today 1-m",
    "past_90_days": "today 3-m",
    "past_12_months": "today 12-m",
    "past_5_years": "today 5-y",
    "all_time": "all"
}

print("\nAvailable Time Ranges:")
for key in time_ranges:
    print(f" - {key}")

custom_time = input("\nEnter a time range from the list OR type 'custom' to enter a date range (YYYY-MM-DD YYYY-MM-DD): ").strip()
if custom_time == "custom":
    start_date = input("Enter start date (YYYY-MM-DD): ").strip()
    end_date = input("Enter end date (YYYY-MM-DD): ").strip()
    date_range = f"{start_date} {end_date}"
else:
    date_range = time_ranges.get(custom_time, "now 1-d")

print(f"\n📅 Using time range: {date_range}\n")

base_url = f"https://trends.google.com/trends/explore?date={date_range}&geo=US&q={{}}&hl=en"

for keyword in keywords:
    url = base_url.format(keyword.replace(" ", "%20"))
    print(f"🌐 Opening URL: {url}")
    driver.get(url)
    time.sleep(5)
    driver.refresh()
    time.sleep(15)

    attempts = 0
    max_attempts = 2

    while attempts < max_attempts:
        try:
            # Select Metro resolution for geoMap downloads
            select_metro_resolution(driver)

            # Wait for export buttons to show
            export_buttons = WebDriverWait(driver, 15).until(
                EC.presence_of_all_elements_located((By.XPATH, "//button[contains(@class, 'widget-actions-item export')]"))
            )

            if len(export_buttons) < 4:
                print(f"⚠ Only found {len(export_buttons)} export buttons. Retrying ({attempts+1}/{max_attempts})...")
                attempts += 1
                time.sleep(5)
                driver.refresh()
                time.sleep(37)
                continue

            print(f"🟢 Found {len(export_buttons)} export buttons for '{keyword}', clicking...")

            for button in export_buttons:
                try:
                    WebDriverWait(driver, 10).until(EC.element_to_be_clickable(button))
                    button.click()
                    time.sleep(3)
                except Exception as e:
                    print(f"⚠ Error clicking export button: {e}")

            time.sleep(5)
            rename_latest_files(keyword)
            break

        except TimeoutException:
            print(f"❌ Timeout: Couldn't find export buttons for '{keyword}' (Attempt {attempts+1}/{max_attempts})")
            attempts += 1
            time.sleep(5)

    print(f"⏭ Finished attempting for '{keyword}'. Moving on...\n")



Available Time Ranges:
 - past_day
 - past_hour
 - past_4_hours
 - past_7_days
 - past_30_days
 - past_90_days
 - past_12_months
 - past_5_years
 - all_time

📅 Using time range: now 1-d

🌐 Opening URL: https://trends.google.com/trends/explore?date=now 1-d&geo=US&q=united%20airlines&hl=en
⚠ Error selecting Metro resolution: Message: Element <md-select id="select_27" class="resolution-selector ng-pristine ng-untouched ng-valid ng-not-empty"> is not clickable at point (988,77) because another element <div class="sticky-legend-terms-container"> obscures it
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:199:5
ElementClickInterceptedError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:351:5
webdriverClickElement@chrome://remote/content/marionette/interaction.sys.mjs:175:11
interaction.clickElement@chrome://remote/content/marionette/interaction.sys.mjs:134:11
clickElement@chrome://

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\danny\\OneDrive - The University of Chicago\\DSJ - UCh OneDrive\\UnitedHackathon\\scrapers\\google_trends_downloads\\geoMap.csv' -> 'c:\\Users\\danny\\OneDrive - The University of Chicago\\DSJ - UCh OneDrive\\UnitedHackathon\\scrapers\\google_trends_downloads\\united_airlines_geo_2025-06-19.csv'

In [None]:
# BLOCK 3: File organization and cleanup

rename_latest_files(keywords[-1])
driver.quit()

today_str = datetime.datetime.now().strftime("%Y-%m-%d")
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
destination_folder = os.path.join(download_dir, timestamp)

os.makedirs(destination_folder, exist_ok=True)

files_moved = 0
for file in glob.glob(os.path.join(download_dir, f"*{today_str}.csv")):
    shutil.move(file, os.path.join(destination_folder, os.path.basename(file)))
    files_moved += 1

if files_moved:
    print(f"📦 Moved {files_moved} files to: {destination_folder}")
else:
    print("📂 No files matched today’s date pattern.")

# Cleanup any empty leftover folders
for folder in os.listdir(download_dir):
    folder_path = os.path.join(download_dir, folder)
    if os.path.isdir(folder_path) and not os.listdir(folder_path):
        os.rmdir(folder_path)
