## Setup

In [6]:
import os
import re

import src.util as util
import pandas as pd
from dotenv import load_dotenv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from webdriver_manager.firefox import GeckoDriverManager
import time

In [7]:
load_dotenv()
save_path = os.getenv("XRAY_V2_PATH")

## Functions

In [8]:
def download_xray_nc_data_selenium(years_range_: range, save_path_) -> None:
    save_path_abs = os.path.abspath(save_path_)
    os.makedirs(save_path_abs, exist_ok=True)
    print(f"Arquivos serão salvos em: '{save_path_abs}'")

    options = Options()
    options.set_preference("browser.download.folderList", 2)
    options.set_preference("browser.download.dir", save_path_abs)
    options.set_preference("browser.download.manager.showWhenStarting", False)
    options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv,application/octet-stream")
    driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()), options=options)

    base_urls = ("https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes17/l2/data/xrsf-l2-avg1m_science/",
                 "https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes18/l2/data/xrsf-l2-avg1m_science/")

    try:
        for y_ in years_range_:
            if y_ <= 2022: base_url = base_urls[0]
            else: base_url = base_urls[1]

            print(f"Acessando a página principal: {base_url}")
            driver.get(base_url)
            time.sleep(2)

            links = driver.find_elements(By.XPATH, "//table/tbody/tr/td/a")
            # print(f"links:\n\n {links}")
            year_link = None
            for link in links:
                print(f"text: {link.text.strip()} |||||| y: {y_}" )
                if link.text.strip() == f"{y_}/":
                    year_link = link
                    break
            year_link = year_link.get_attribute("href")

            driver.get(year_link)
            time.sleep(1)

            month_elements = driver.find_elements(By.XPATH, "//table/tbody/tr/td/a")
            month_links = []
            for link in month_elements:
                if 'Parent Directory' not in link.text:
                    month_links.append(link.get_attribute("href"))

            for month_link in month_links:
                print(f"  [MÊS] Acessando mês: {month_link}")
                driver.get(month_link)
                time.sleep(1)

                day_elements = driver.find_elements(By.XPATH, "//table/tbody/tr/td/a")
                for file in day_elements:
                    if 'Parent Directory' not in file.text:
                        file_name = file.text.strip()
                        file_path = os.path.join(save_path_abs, file_name)
                        print(f"        -> Baixando {file_name}...")
                        file.click()
                        util.wait_download(file_path_=file_path, file_name_=file_name)

    except AttributeError as e:
        print(e)
    except Exception as e:
        print(e)
    finally:
        print("\nProcesso concluído. Fechando o navegador.")
        driver.quit()

    return None

## Main

In [None]:
nc_range = range(2022,2025+1)
# download_xray_nc_data_selenium(nc_range,save_path)
download_xray_nc_data_selenium(range(2022,2022+1),save_path)

Arquivos serão salvos em: 'G:\My Drive\Solar_Flares\Data\xray_V2\raw'
Acessando a página principal: https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes17/l2/data/xrsf-l2-avg1m_science/
text: Parent Directory |||||| y: 2022
text: 2018/ |||||| y: 2022
text: 2019/ |||||| y: 2022
text: 2020/ |||||| y: 2022
text: 2021/ |||||| y: 2022
text: 2022/ |||||| y: 2022
  [MÊS] Acessando mês: https://data.ngdc.noaa.gov/platforms/solar-space-observing-satellites/goes/goes17/l2/data/xrsf-l2-avg1m_science/2022/01/
        -> Baixando sci_xrsf-l2-avg1m_g17_d20220101_v2-2-0.nc...
        -> Aguardando conclusão do download...
        -> Download de 'sci_xrsf-l2-avg1m_g17_d20220101_v2-2-0.nc' concluído com sucesso!
        -> Baixando sci_xrsf-l2-avg1m_g17_d20220102_v2-2-0.nc...
        -> Aguardando conclusão do download...
        -> Download de 'sci_xrsf-l2-avg1m_g17_d20220102_v2-2-0.nc' concluído com sucesso!
        -> Baixando sci_xrsf-l2-avg1m_g17_d20220103_v2-2-0.nc...
 

In [None]:
util.create_dirs(save_path,nc_range)

In [3]:
origin_dir = save_path
for date in pd.date_range(start="2020-01-01", end="2025-12-31"):
    y = date.year
    m = f"{date.month:02d}"
    d = f"{date.day:02d}"
    destiny_dir = os.path.join(save_path, str(y))
    file_name_pattern = re.compile(f"^sci_xrsf-l2-avg1m_g\\d{{2}}_d{y}{m}{d}_v2-2-0\.nc$")

    util.move_file(origin_dir, destiny_dir, file_name_pattern)

FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220101_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220102_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220103_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220104_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220105_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220106_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220107_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220108_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220109_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220110_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220111_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-l2-avg1m_g\\d{2}_d20220112_v2-2-0\\.nc$')) NOT FOUND
FILE (re.compile('^sci_xrsf-