In [1]:
import os
import time
import datetime
import requests
import glob
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import TimeoutException
import shutil


In [2]:
# PATH SETUPS
cwd = os.getcwd() # where script is 
dependencies_dir = os.path.join(cwd, "dependencies")  # dependencies folder

download_folder_name = "google_trends_downloads"  # Change folder name as needed
download_dir = os.path.join(cwd, download_folder_name)  # Full path inside CWD


os.makedirs(dependencies_dir, exist_ok=True)
os.makedirs(download_dir, exist_ok=True) 

# GECKODRIVER AND LOG PATHS
driver_path = os.path.join(dependencies_dir, "geckodriver.exe")
service_log_path = os.path.join(dependencies_dir, "geckodriver.log")

# firefox exe path (modify if necessary)
firefox_path = r"C:\Program Files\Mozilla Firefox\firefox.exe"

In [3]:
### FIREFOX OPTIONS
firefox_options = Options()
firefox_options.binary_location = firefox_path
firefox_options.set_preference("permissions.default.image", 2)
firefox_options.set_preference("dom.ipc.plugins.enabled.libflashplayer.so", "false")
firefox_options.set_preference("media.autoplay.default", 5)
firefox_options.set_preference("media.autoplay.blocking_policy", 2)
firefox_options.set_preference("media.autoplay.allow-muted", False)
firefox_options.set_preference("media.autoplay.enabled.user-gestures-needed", False)
firefox_options.log.level = "trace"

# --- 📌 Set Firefox to Auto-Download to Custom Folder (Inside CWD) ---
firefox_options.set_preference("browser.download.folderList", 2)  # 2 = Use custom download path
firefox_options.set_preference("browser.download.dir", download_dir)  # Set dynamic download folder
firefox_options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")  # Auto-save CSV files
firefox_options.set_preference("browser.download.manager.showWhenStarting", False)  # Hide download popup
firefox_options.set_preference("pdfjs.disabled", True)  # Disable built-in PDF viewer

# FF WEBDRIVER
firefox_service = FirefoxService(executable_path=driver_path, log_output=service_log_path)
driver = webdriver.Firefox(service=firefox_service, options=firefox_options)


In [4]:
def rename_latest_files(keyword):
    """Finds the latest downloaded files and renames them based on type."""
    files = glob.glob(os.path.join(download_dir, "*.csv"))  # Get all CSV files
    if not files:
        print("No new files detected, skipping rename.")
        return

    file_mapping = {
        "multiTimeline": "time",
        "geoMap": "geo",
        "relatedEntities": "ents",
        "relatedQueries": "quer"
    }

    # find and rename files
    for file in files:
        original_name = os.path.basename(file)
        for key, tag in file_mapping.items():
            if key in original_name:
                timestamp = datetime.datetime.now().strftime("%Y-%m-%d")
                new_filename = f"{keyword.replace(' ', '_')}_{tag}_{timestamp}.csv"
                new_filepath = os.path.join(download_dir, new_filename)

                os.rename(file, new_filepath)
                print(f"✅ Renamed: {original_name} → {new_filename}")


In [5]:

# --- Keywords
keywords = ["united airlines",'protest','riot','march','demonstration','rally','strike']

# --- Base Google Trends URL ---
base_url = "https://trends.google.com/trends/explore?date=now%201-d&geo=US&q={}&hl=en"


In [None]:
time_ranges = {
    "past_day": "now 1-d",
    "past_hour": "now 1-H",
    "past_4_hours": "now 4-H",
    "past_7_days": "now 7-d",
    "past_30_days": "today 1-m",
    "past_90_days": "today 3-m",
    "past_12_months": "today 12-m",
    "past_5_years": "today 5-y",
    "all_time": "all"
}

print("\nAvailable Time Ranges:")
for key in time_ranges:
    print(f" - {key}")

custom_time = input("\nEnter a time range from the list OR type 'custom' to enter a date range (YYYY-MM-DD YYYY-MM-DD): ").strip()

if custom_time == "custom":
    start_date = input("Enter start date (YYYY-MM-DD): ").strip()
    end_date = input("Enter end date (YYYY-MM-DD): ").strip()
    date_range = f"{start_date} {end_date}"
else:
    date_range = time_ranges.get(custom_time, "now 1-d")  # default if input is invalid

print(f"\n📅 Using time range: {date_range}\n")


base_url = f"https://trends.google.com/trends/explore?date={date_range}&geo=US&q={{}}&hl=en"

## find buttons w/ retry logic 
for keyword in keywords:
    url = base_url.format(keyword.replace(" ", "%20"))
    print(f"Opening URL: {url}")
    
    driver.get(url)
    time.sleep(5) 
    
    driver.refresh()  
    time.sleep(15)  

    attempts = 0
    max_attempts = 2

    while attempts < max_attempts:
        try:
            export_buttons = WebDriverWait(driver, 15).until(
                EC.presence_of_all_elements_located((By.XPATH, "//button[contains(@class, 'widget-actions-item export')]"))
            )

            if len(export_buttons) < 4:
                print(f"⚠ Only found {len(export_buttons)} export buttons. Retrying ({attempts+1}/{max_attempts})...")
                attempts += 1
                time.sleep(5)  # Small delay
                driver.refresh()  # Refresh the page
                time.sleep(37)  
                continue  # Try again
            
            print(f" found {len(export_buttons)} export buttons for {keyword}, clicking..")

            for button in export_buttons:
                try:
                    WebDriverWait(driver, 10).until(EC.element_to_be_clickable(button))
                    button.click()
                    time.sleep(3)
                except Exception as e:
                    print(f"Error clicking export button: {e}")

            print(f"files for {keyword} should be downloaded.")

            # rename files after dl
            time.sleep(5)  
            rename_latest_files(keyword)
            break  # exit when it works

        except TimeoutException:
            print(f" Timeout: Couldn't find export buttons for {keyword} (Attempt {attempts+1}/{max_attempts})")
            attempts += 1
            time.sleep(5)  

    print(f" Moving on with available data for {keyword}.")


rename_latest_files(keywords[-1])

# Close browser
driver.quit()



Available Time Ranges:
 - past_day
 - past_hour
 - past_4_hours
 - past_7_days
 - past_30_days
 - past_90_days
 - past_12_months
 - past_5_years
 - all_time



📅 Using time range: now 1-d

Opening URL: https://trends.google.com/trends/explore?date=now 1-d&geo=US&q=united%20airlines&hl=en
⚠ Only found 3 export buttons. Retrying (1/2)...
 found 4 export buttons for united airlines, clicking..
files for united airlines should be downloaded.
✅ Renamed: geoMap.csv → united_airlines_geo_2025-06-17.csv
✅ Renamed: multiTimeline.csv → united_airlines_time_2025-06-17.csv
✅ Renamed: relatedEntities.csv → united_airlines_ents_2025-06-17.csv
✅ Renamed: relatedQueries.csv → united_airlines_quer_2025-06-17.csv
 Moving on with available data for united airlines.
Opening URL: https://trends.google.com/trends/explore?date=now 1-d&geo=US&q=protest&hl=en
⚠ Only found 3 export buttons. Retrying (1/2)...
 found 4 export buttons for protest, clicking..
files for protest should be downloaded.
✅ Renamed: geoMap.csv → protest_geo_2025-06-17.csv
✅ Renamed: multiTimeline.csv → protest_time_2025-06-17.csv
✅ Renamed: relatedEntities.csv → protest_ents_2025-06-17.csv
✅ Re

In [7]:
## quit driver
driver.quit()


In [8]:
today_str = datetime.datetime.now().strftime("%Y-%m-%d")
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")  # ensures unique folder name
destination_folder = os.path.join(download_dir, timestamp) 

os.makedirs(destination_folder, exist_ok=True)

files_moved = 0
for file in glob.glob(os.path.join(download_dir, f"*{today_str}.csv")):
    shutil.move(file, os.path.join(destination_folder, os.path.basename(file)))
    files_moved += 1

if files_moved:
    print(f" Moved {files_moved} files to: {destination_folder}")
else:
    print(" Nothing moved.")

 Moved 24 files to: c:\Users\danny\OneDrive - The University of Chicago\DSJ - UCh OneDrive\UnitedHackathon\google_trends_downloads\2025-06-17_10-15-07


In [9]:
### deletes any empty folders created by the above

for folder in os.listdir(download_dir):
    folder_path = os.path.join(download_dir, folder)
    if os.path.isdir(folder_path) and not os.listdir(folder_path):
        os.rmdir(folder_path)