## PARA EXTRAER TIPO DE INVERSI√ìN Y UEI SOLAMENTE

In [None]:
!pip install selenium webdriver-manager openpyxl pandas

In [None]:
!pip install playwright
!playwright install chromium

In [2]:
import pandas as pd
import time
import os
from selenium import webdriver
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait as Wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager

# ================================================================
# CONFIGURACI√ìN OPTIMIZADA
# ================================================================
RUTA_ENTRADA = r"C:\Users\user\Downloads\CUI_junto.xlsx"
RUTA_SALIDA = r"C:\Users\user\Downloads\CUI_UEI.xlsx"
MAX_REINTENTOS = 2
MODO_VISIBLE = True
GUARDAR_CADA = 5

# Timeouts optimizados
TIMEOUT_PAGINA = 20
TIMEOUT_ELEMENTO = 10
TIMEOUT_MODAL = 10

# ================================================================
# FUNCIONES DE CHECKPOINT
# ================================================================
def cargar_progreso():
    """Carga progreso previo si existe"""
    if os.path.exists(RUTA_SALIDA):
        try:
            df = pd.read_excel(RUTA_SALIDA)
            print(f"üì• Progreso encontrado: {len(df)} CUIs ya procesados")
            return df.to_dict('records')
        except:
            pass
    return []

def obtener_pendientes(completa, procesados):
    """Calcula CUIs pendientes"""
    if not procesados:
        return completa
    cuis_ok = {str(r['CUI']) for r in procesados}
    pendientes = [cui for cui in completa if str(cui) not in cuis_ok]
    if pendientes:
        print(f"‚è≥ Pendientes: {len(pendientes)}")
    return pendientes

def guardar(resultados):
    """Guarda progreso"""
    try:
        pd.DataFrame(resultados).to_excel(RUTA_SALIDA, index=False)
    except Exception as e:
        print(f"‚ö†Ô∏è Error guardando: {e}")

# ================================================================
# LEER CUIs
# ================================================================
print("üìÇ Cargando CUIs...")
df_cui = pd.read_excel(RUTA_ENTRADA)
lista_completa = df_cui['CUI'].astype(str).tolist()

resultados = cargar_progreso()
lista_cuis = obtener_pendientes(lista_completa, resultados)

if not lista_cuis:
    print("üéâ ¬°Todos los CUIs ya fueron procesados!")
    exit()

# ================================================================
# CONFIGURAR NAVEGADOR OPTIMIZADO
# ================================================================
def crear_driver():
    service = Service(ChromeDriverManager().install())
    options = webdriver.ChromeOptions()

    if not MODO_VISIBLE:
        options.add_argument("--headless=new")
    else:
        options.add_argument("--start-maximized")

    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-logging")
    options.add_argument("--log-level=3")
    options.add_argument("--silent")

    prefs = {
        "profile.managed_default_content_settings.images": 2,
        "profile.default_content_setting_values.notifications": 2,
        "profile.default_content_setting_values.media_stream": 2,
        "profile.default_content_setting_values.geolocation": 2,
    }
    options.add_experimental_option("prefs", prefs)
    options.add_experimental_option('excludeSwitches', ['enable-logging'])
    options.page_load_strategy = 'normal'

    driver = Chrome(service=service, options=options)
    driver.set_page_load_timeout(TIMEOUT_PAGINA)
    driver.set_script_timeout(15)
    return driver

driver = crear_driver()
print(f"‚ö° Procesando {len(lista_cuis)} CUIs pendientes\n")

# ================================================================
# CERRAR VENTANAS EMERGENTES
# ================================================================
def cerrar_ventanas_emergentes(driver):
    try:
        driver.execute_script("""
            document.querySelectorAll('.modal.show').forEach(m => {
                m.style.display='none';
                m.classList.remove('show');
            });
            document.querySelectorAll('.modal-backdrop').forEach(b => b.remove());
            document.body.classList.remove('modal-open');
            document.body.style.overflow='';
            document.body.style.paddingRight='';
        """)
    except:
        pass

# ================================================================
# ESPERAR Y VERIFICAR MODAL
# ================================================================
def esperar_modal_visible(driver, timeout=10):
    try:
        Wait(driver, timeout).until(
            EC.visibility_of_element_located((By.ID, "divResumenCont"))
        )
        time.sleep(0.3)
        
        tiene_contenido = driver.execute_script("""
            var modal = document.getElementById('divResumenCont');
            if (!modal) return false;
            if (modal.children.length === 0) return false;
            if (modal.offsetParent === null) return false;
            
            var tieneValores = document.getElementById('td_tipinv_r') !== null &&
                               document.getElementById('td_uei_r') !== null;
            
            return tieneValores;
        """)
        
        if not tiene_contenido:
            time.sleep(0.5)
            tiene_contenido = driver.execute_script("""
                return document.getElementById('td_tipinv_r') !== null;
            """)
        
        return tiene_contenido
        
    except TimeoutException:
        return False
    except Exception as e:
        print(f"‚ö†Ô∏è Error: {str(e)[:20]}", end=" ")
        return False

# ================================================================
# EXTRACCI√ìN DE DATOS - TIPO DE INVERSI√ìN Y UEI
# ================================================================
def extraer_datos_modal(driver):
    try:
        datos = driver.execute_script("""
            function getText(id) {
                try {
                    var elem = document.getElementById(id);
                    if (!elem) return 'NO DISPONIBLE';
                    var texto = elem.textContent || elem.innerText || '';
                    return texto.trim() || 'NO DISPONIBLE';
                } catch(e) {
                    return 'NO DISPONIBLE';
                }
            }

            var resultado = {
                tipo_inversion: getText('td_tipinv_r'),
                unidad_ejecutora: getText('td_uei_r')
            };
            
            return resultado;
        """)

        valores_validos = sum(1 for v in datos.values() if v != 'NO DISPONIBLE')
        
        if valores_validos < 1:
            print(f"‚ö†Ô∏è Sin datos", end=" ")
            return None
            
        return datos

    except Exception as e:
        print(f"‚ö†Ô∏è Error: {str(e)[:20]}", end=" ")
        return None

# ================================================================
# PROCESAR CUI
# ================================================================
def procesar_cui(cui, intento=1):
    try:
        driver.get("https://ofi5.mef.gob.pe/ssi/")
        
        input_box = Wait(driver, TIMEOUT_ELEMENTO).until(
            EC.element_to_be_clickable((By.ID, "txt_cu"))
        )
        input_box.clear()
        input_box.send_keys(cui)
        
        btn_buscar = Wait(driver, TIMEOUT_ELEMENTO).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "btn_bus"))
        )
        btn_buscar.click()

        Wait(driver, TIMEOUT_ELEMENTO).until(
            EC.presence_of_element_located((By.ID, "td_cu"))
        )
        time.sleep(0.3)

        cerrar_ventanas_emergentes(driver)
        time.sleep(0.2)

        try:
            btn_resumen = Wait(driver, 5).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, "img[src*='resumen.png']"))
            )
            driver.execute_script("arguments[0].scrollIntoView(true);", btn_resumen)
            time.sleep(0.2)
            driver.execute_script("arguments[0].click();", btn_resumen)
        except:
            driver.execute_script("""
                var imgs = document.querySelectorAll('img[src*="resumen.png"]');
                if (imgs.length > 0) {
                    imgs[0].click();
                }
            """)
        
        time.sleep(0.5)

        if not esperar_modal_visible(driver, TIMEOUT_MODAL):
            raise Exception("Modal no carg√≥")

        datos = extraer_datos_modal(driver)
        
        if not datos:
            raise Exception("Sin datos")

        try:
            driver.execute_script("""
                var btn = document.querySelector('button[data-bs-dismiss="modal"]');
                if (btn) btn.click();
            """)
            time.sleep(0.2)
        except:
            pass

        return datos

    except Exception as e:
        if intento < MAX_REINTENTOS:
            print(f"üîÑ{intento+1}...", end=" ")
            time.sleep(1)
            cerrar_ventanas_emergentes(driver)
            return procesar_cui(cui, intento + 1)
        else:
            raise e

# ================================================================
# LOOP PRINCIPAL
# ================================================================
tiempo_inicio = time.time()
exitosos = 0
fallos = 0

print(f"{'='*60}")
print(f"Inicio: {time.strftime('%H:%M:%S')}")
print(f"{'='*60}\n")

try:
    for idx, cui in enumerate(lista_cuis, 1):
        t_inicio = time.time()
        print(f"[{idx}/{len(lista_cuis)}] {cui}:", end=" ")
        
        try:
            datos = procesar_cui(cui)
            
            resultados.append({
                "CUI": cui,
                "Tipo de Inversi√≥n": datos['tipo_inversion'],
                "Unidad Ejecutora de Inversiones (UEI)": datos['unidad_ejecutora']
            })
            exitosos += 1
            print(f"‚úÖ ({time.time()-t_inicio:.1f}s)")
            
        except Exception as e:
            resultados.append({
                "CUI": cui,
                "Tipo de Inversi√≥n": "NO DISPONIBLE",
                "Unidad Ejecutora de Inversiones (UEI)": "NO DISPONIBLE"
            })
            fallos += 1
            print(f"‚ùå ({time.time()-t_inicio:.1f}s)")

        # GUARDAR CADA N CUIs
        if (idx % GUARDAR_CADA == 0) or (idx == len(lista_cuis)):
            guardar(resultados)
            if idx % GUARDAR_CADA == 0:
                print(f"   üíæ", end="")

        # Progreso cada 10 CUIs
        if idx % 10 == 0:
            t = time.time() - tiempo_inicio
            prom = t / idx
            eta = (len(lista_cuis) - idx) * prom
            print(f"\n   üìä {exitosos}‚úÖ {fallos}‚ùå | {t:.0f}s | ETA:{eta:.0f}s\n")

except KeyboardInterrupt:
    print("\n\n‚ö†Ô∏è INTERRUMPIDO")
    guardar(resultados)
    driver.quit()
    print(f"üíæ Progreso guardado. Ejecuta nuevamente para continuar.")
    exit()

# ================================================================
# FINALIZAR
# ================================================================
driver.quit()
guardar(resultados)

tiempo_total = time.time() - tiempo_inicio

print(f"\n{'='*60}")
print("üèÅ COMPLETADO")
print(f"{'='*60}")
print(f"üìä Total en sesi√≥n: {len(lista_cuis)}")
print(f"‚úÖ Exitosos: {exitosos}")
print(f"‚ùå Fallidos: {fallos}")
print(f"‚è±Ô∏è Tiempo: {tiempo_total:.1f}s")
print(f"üíæ Archivo: {RUTA_SALIDA}")
print(f"{'='*60}")

üìÇ Cargando CUIs...
üì• Progreso encontrado: 937 CUIs ya procesados
üéâ ¬°Todos los CUIs ya fueron procesados!
‚ö° Procesando 0 CUIs pendientes

Inicio: 01:52:17


üèÅ COMPLETADO
üìä Total en sesi√≥n: 0
‚úÖ Exitosos: 0
‚ùå Fallidos: 0
‚è±Ô∏è Tiempo: 2.2s
üíæ Archivo: C:\Users\user\Downloads\CUI_UEI.xlsx
