In [None]:
!pip install selenium webdriver_manager beautifulsoup4

In [None]:
import re
import io
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

**Código apenas print da página**

In [None]:

# Configura o navegador para rodar em modo headless (sem interface gráfica)
chrome_options = Options()
chrome_options.add_argument("--headless")  # Executa sem abrir a janela
chrome_options.add_argument("--disable-gpu")  # Desativa a aceleração gráfica (útil em alguns ambientes)
chrome_options.add_argument("--window-size=1920,1080")  # Define o tamanho da janela para evitar problemas de renderização
chrome_options.add_argument("--no-sandbox")  # Configuração útil em alguns ambientes Linux

# Inicializa o driver usando o ChromeDriver gerenciado automaticamente
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

try:
    # 1. Abre a página do Minor Planet Center
    driver.get("https://minorplanetcenter.net/iau/NEO/toconfirm_tabular.html")
    
    # 2. Localiza o radio button <input type="radio" name="W" value="a">, rola a página até ele e clica via JavaScript
    radio_button = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, "//input[@type='radio' and @name='W' and @value='a']"))
    )
    driver.execute_script("arguments[0].scrollIntoView(true);", radio_button)
    time.sleep(1)
    driver.execute_script("arguments[0].click();", radio_button)
    
    # 3. Localiza o campo do código do observatório e insere "Y28"
    obs_input = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//input[@name='obscode']"))
    )
    obs_input.clear()
    obs_input.send_keys("Y28")
    
    # 4. Localiza e clica no botão de submissão via JavaScript
    submit_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//input[@type='submit']"))
    )
    driver.execute_script("arguments[0].click();", submit_button)
    
    # 5. Aguarda alguns segundos para a página carregar
    time.sleep(5)
    
    # 6. Extrai e imprime todo o texto da página
    body_element = driver.find_element(By.TAG_NAME, "body")
    page_text = body_element.text
    print(page_text)
    
finally:
    driver.quit()

**Código obtem e cria tabela pandas**

In [None]:
def parse_extended_line(line):
    tokens = line.split()
    if len(tokens) < 20:
        return None
    
    date_val      = " ".join(tokens[0:3])         
    ut_val        = tokens[3]                    
    ra_val        = " ".join(tokens[4:7])         
    decl_val      = " ".join(tokens[7:10])        
    elong_val     = tokens[10]                    
    v_val         = tokens[11]                    
    motion_min    = tokens[12]                   
    motion_PA     = tokens[13]
    obj_azi       = tokens[14]                    
    obj_alt       = tokens[15]                    
    sun_alt       = tokens[16]                    
    moon_phase    = tokens[17]                    
    moon_dist     = tokens[18]                    
    moon_alt      = tokens[19]                    
                       
    return [date_val, ut_val, ra_val, decl_val, elong_val, v_val,
            motion_min, motion_PA, obj_azi, obj_alt, sun_alt, moon_phase, moon_dist,
            moon_alt]



chrome_options = Options()
chrome_options.add_argument("--headless")          # executa sem interface gráfica
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--no-sandbox")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

try:
    driver.get("https://minorplanetcenter.net/iau/NEO/toconfirm_tabular.html")
    
    
    radio_button = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, "//input[@type='radio' and @name='W' and @value='a']"))
    )
    driver.execute_script("arguments[0].scrollIntoView(true);", radio_button)
    time.sleep(1)
    driver.execute_script("arguments[0].click();", radio_button)
    
    
    obs_input = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//input[@name='obscode']"))
    )
    obs_input.clear()
    obs_input.send_keys("Y28")
    
    
    submit_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//input[@type='submit']"))
    )
    driver.execute_script("arguments[0].click();", submit_button)
    
    
    time.sleep(5)
    
    
    body_element = driver.find_element(By.TAG_NAME, "body")
    page_text = body_element.text
    #print("Texto completo extraído:")
    #print(page_text)
    
finally:
    driver.quit()


lines = page_text.splitlines()
blocks = {}  # dicionário: {nome_objeto: [linhas do bloco]}
current_object = None
current_block = []

for i, line in enumerate(lines):
    line_stripped = line.strip()
    
    if line_stripped and (i + 1 < len(lines)) and ("Get the observations" in lines[i+1]):
        if current_object and current_block:
            blocks[current_object] = current_block
        current_object = line_stripped
        current_block = []
        continue
    if current_object:
        
        if "Get the observations" in line_stripped:
            continue
        if line_stripped:
            current_block.append(line_stripped)
if current_object and current_block:
    blocks[current_object] = current_block


column_names = [
    "Date",
    "UT",
    "R.A. (J2000)",
    "Decl",
    "Elong",
    "V",
    'Motion min',
    'Motion PA',
    "Object Azi",
    "Object Alt",
    "Sun Alt",
    "Moon Phase",
    "Moon Dist",
    "Moon Alt"
]


dataframes = {}
for obj, block_lines in blocks.items():
    rows = []
    for line in block_lines:
        # Seleciona linhas que começam com 4 dígitos (supõe que iniciam com o ano)
        if re.match(r'^\d{4}', line.strip()):
            parsed = parse_extended_line(line)
            if parsed is not None:
                rows.append(parsed)
    if rows:
        df = pd.DataFrame(rows, columns=column_names)
        dataframes[obj] = df

# Exibe os DataFrames resultantes para cada objeto
for obj, df in dataframes.items():
    print(f"\nDataFrame para o objeto {obj}:")
    print(df)

**Teste de utilização**

In [None]:
df = dataframes["5FI3921"]


valor = df.iloc[8]['Decl']
print(valor) 