In [1]:
import os
import time
import logging
from selenium import webdriver # type: ignore
from selenium.webdriver.common.by import By # type: ignore
from selenium.webdriver.support.ui import WebDriverWait # type: ignore
from selenium.webdriver.support import expected_conditions as EC # type: ignore
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

In [2]:
# Path to your Chrome WebDriver
CHROME_DRIVER_PATH = "/home/qubit/Downloads/chrome/chromedriver-linux64/chromedriver"

# URL of the repository
BASE_URL = "https://fermi.gsfc.nasa.gov/ssc/data/access/lat/LightCurveRepository/"

In [7]:
# Setup Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Run in background

# Create a Service object with the ChromeDriver path
service = Service(CHROME_DRIVER_PATH)

# Initialize Chrome WebDriver with the service object
driver = webdriver.Chrome(service=service)

driver.get("https://www.google.com")
wait = WebDriverWait(driver, 10)

# Step 2: Find all Source ID links
source_links = driver.find_elements(By.XPATH, "//table//a")  # Adjust XPath based on table structure
source_urls = [link.get_attribute("href") for link in source_links]

# Step 3: Loop through each Source ID page
for url in source_urls:
    driver.get(url)
    time.sleep(5)  # Wait for page to load

    # Step 4: Click the "three lines" menu (top right)
    try:
        menu_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'menu-class')]")))
        menu_button.click()
        time.sleep(2)

        # Step 5: Click "Download CSV"
        download_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(text(), 'Download CSV')]")))
        download_button.click()

        print(f"CSV downloaded from: {url}")

    except Exception as e:
        print(f"Failed for {url}: {e}")

# Close browser
driver.quit()


In [10]:

# ===== Setup Logging =====
logging.basicConfig(filename="download_log.txt", level=logging.INFO, format="%(asctime)s - %(message)s")

# ===== Set Download Folder =====
DOWNLOAD_FOLDER = os.path.join(os.getcwd(), "csv_downloads")
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)

# ===== Selenium WebDriver Setup =====
chrome_options = Options()

# Enable automatic downloads to the specified folder
chrome_prefs = {
    "download.default_directory": DOWNLOAD_FOLDER, 
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True
}
chrome_options.add_experimental_option("prefs", chrome_prefs)
chrome_options.add_argument("--headless")  # Run in background

# Initialize WebDriver
service = Service(CHROME_DRIVER_PATH)
driver = webdriver.Chrome(service=service, options=chrome_options)
wait = WebDriverWait(driver, 10)

# ===== Step 1: Open Main Light Curve Repository Page =====
BASE_URL = "https://fermi.gsfc.nasa.gov/ssc/data/access/lat/LightCurveRepository/"
driver.get(BASE_URL)

# ===== Step 2: Get Source ID Links =====
source_links = driver.find_elements(By.XPATH, "//td[@class='Source_Name']/a") # Adjust XPath based on the actual table structure
# source_links = driver.find_elements(By.XPATH, "//td[@class='Source_Name']/a") # Adjust XPath based on the actual table structure
source_urls = [link.get_attribute("href") for link in source_links]

logging.info(f"Found {len(source_urls)} source IDs.")
print(source_links)

# ===== Step 3: Loop Through Each Source ID Page =====
for url in source_urls:
    driver.get(url)
    time.sleep(5)  # Wait for page to load

    try:
        # Step 4: Click the "three lines" menu
        menu_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'menu-class')]")))
        menu_button.click()
        time.sleep(2)

        # Step 5: Click "Download CSV"
        download_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(text(), 'Download CSV')]")))
        download_button.click()

        logging.info(f"CSV downloaded successfully from: {url}")

    except Exception as e:
        logging.error(f"Failed to download CSV from {url}: {e}")

# ===== Cleanup =====
driver.quit()
logging.info("Download process completed.")

[]
