In [None]:
#todo A√±adir deflactor del PIB trimestral


In [1]:
import pandas as pd
import numpy as np
import os
from collections import defaultdict # √ötil para recolectar datos a nivel estatal

# ----------------------------------------------------------------------
# FUNCIONES DE UTILIDAD
# ----------------------------------------------------------------------

def weighted_average(df, value_col, weight_col):
    """Calcula el promedio ponderado de una columna usando pesos (factores de expansi√≥n)."""
    # Asegura que las columnas de valor y peso existan y no sean NaN
    df_filtered = df.dropna(subset=[value_col, weight_col])
    
    # Maneja el caso de que no haya datos o la suma de pesos sea cero
    if df_filtered.empty or df_filtered[weight_col].sum() == 0:
        return np.nan
    
    # Excluir valores negativos o cero si se asume que 'ingocup' es ingreso positivo
    # if value_col in ['ingocup', 'ing_x_hrs']:
    #     df_filtered = df_filtered[df_filtered[value_col] > 0]
    
    return np.average(df_filtered[value_col], weights=df_filtered[weight_col])

# Diccionario de Entidades para mapear c√≥digos a nombres
ENTIDADES = {
    1: 'Aguascalientes', 2: 'Baja California', 3: 'Baja California Sur', 4: 'Campeche',
    5: 'Coahuila', 6: 'Colima', 7: 'Chiapas', 8: 'Chihuahua', 9: 'Ciudad de M√©xico',
    10: 'Durango', 11: 'Guanajuato', 12: 'Guerrero', 13: 'Hidalgo', 14: 'Jalisco',
    15: 'M√©xico', 16: 'Michoac√°n', 17: 'Morelos', 18: 'Nayarit', 19: 'Nuevo Le√≥n',
    20: 'Oaxaca', 21: 'Puebla', 22: 'Quer√©taro', 23: 'Quintana Roo', 24: 'San Luis Potos√≠',
    25: 'Sinaloa', 26: 'Sonora', 27: 'Tabasco', 28: 'Tamaulipas', 29: 'Tlaxcala',
    30: 'Veracruz', 31: 'Yucat√°n', 32: 'Zacatecas'
}

# ----------------------------------------------------------------------
# FUNCI√ìN PRINCIPAL DE PROCESAMIENTO TRIMESTRAL
# ----------------------------------------------------------------------

def procesar_trimestre_enoe(year, quarter, file_format='dta'):
    """
    Carga, limpia y calcula indicadores clave a nivel nacional y estatal 
    para un trimestre espec√≠fico.
    
    Args:
        year (int): El a√±o del trimestre a analizar (e.g., 2023).
        quarter (int): El n√∫mero de trimestre (1, 2, 3, 4).
        file_format (str): Formato del archivo ('dta' o 'csv').
        
    Returns:
        tuple: (pd.Series Nacional, pd.DataFrame Estatal) con los indicadores, 
               o (None, None) si el archivo no se encuentra o est√° vac√≠o.
    """
    periodo_str = f"{year} T{quarter}"
    print(f"\n--- ‚è≥ Procesando: {periodo_str} ---")

    # --- 1. Construcci√≥n de la Ruta del Archivo ---
    year_short = str(year)[-2:]
    dir_name = f"ENOE_{year}_{quarter}"
    file_name = f"ENOE_SDEMT{quarter}{year_short}.{file_format}"
    file_path = os.path.join("data", f"ENOE_{file_format}", dir_name, file_name)
    
    # --- 2. Carga de Datos y Manejo de Errores (Debugging) ---
    if not os.path.exists(file_path):
        print(f"‚ùå Error Cr√≠tico: Archivo no encontrado en: {file_path}")
        return None, None # Retorna None para el control en el script principal
    
    try:
        if file_format == 'dta':
            df = pd.read_stata(file_path, convert_categoricals=False)
        elif file_format == 'csv':
            df = pd.read_csv(file_path)
        else:
            raise ValueError("Formato de archivo no soportado.")
        
        if df.empty:
            print(f"‚ùå Error de Carga: Archivo encontrado, pero vac√≠o: {file_path}")
            return None, None
            
        print(f"‚úÖ Archivo cargado exitosamente. {len(df):,} registros.")
        
    except Exception as e:
        print(f"‚ùå Ocurri√≥ un error de lectura de datos: {e}")
        return None, None

    # --- 3. Limpieza y Preparaci√≥n de Datos ---
    # Conversi√≥n de tipos de datos esenciales
    df['r_def'] = df['r_def'].astype(str).str.strip()
    
    columnas_numericas = [
        'fac_tri', 'sex', 'eda', 'clase1', 'clase2', 'c_res',
        'ingocup', 'ing_x_hrs', 'ent'
    ]
    for col in columnas_numericas:
        # Usamos errors='coerce' para convertir valores no num√©ricos a NaN
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Filtro base: Universo de residentes con entrevista completa (r_def='00', c_res=1 o 3)
    df_base = df[(df['r_def'] == '0.0') & (df['c_res'].isin([1, 3]))].copy()
    
    if df_base.empty:
        print("‚ùå Error de Filtro: No se encontraron registros v√°lidos despu√©s del filtro base.")
        return None, None

    # Filtro de Poblaci√≥n en Edad de Trabajar (PET): 15 a√±os y m√°s
    df_15_y_mas = df_base[df_base['eda'].between(15, 98)].copy()
    
    # Definici√≥n de subconjuntos
    df_pea = df_15_y_mas[df_15_y_mas['clase1'] == 1].copy()      # PEA (clase1=1)
    df_ocupada = df_15_y_mas[df_15_y_mas['clase2'] == 1].copy()  # Ocupada (clase2=1)
    
    # Asignaci√≥n de nombres de estado (Necesario para ambos niveles)
    df_base['ent_nombre'] = df_base['ent'].map(ENTIDADES)
    df_15_y_mas['ent_nombre'] = df_15_y_mas['ent'].map(ENTIDADES)
    df_pea['ent_nombre'] = df_pea['ent'].map(ENTIDADES)
    df_ocupada['ent_nombre'] = df_ocupada['ent'].map(ENTIDADES)

    # ------------------------------------------------------------------
    # --- 4. C√ÅLCULOS A NIVEL NACIONAL ---
    # ------------------------------------------------------------------
    
    datos_nacional = {
        # Identificadores de Tiempo
        'year': year,
        'quarter': quarter,
        
        # Poblaci√≥n Total
        'pob_total': df_base['fac_tri'].sum(),
        'pob_hombres_total': df_base[df_base['sex'] == 1]['fac_tri'].sum(),
        'pob_mujeres_total': df_base[df_base['sex'] == 2]['fac_tri'].sum(),
        
        # PET (15 a√±os y m√°s)
        'pet_total': df_15_y_mas['fac_tri'].sum(),
        'pet_hombres_15mas': df_15_y_mas[df_15_y_mas['sex'] == 1]['fac_tri'].sum(),
        'pet_mujeres_15mas': df_15_y_mas[df_15_y_mas['sex'] == 2]['fac_tri'].sum(),

        # PEA
        'pea_total': df_pea['fac_tri'].sum(),
        'pea_hombres': df_pea[df_pea['sex'] == 1]['fac_tri'].sum(),
        'pea_mujeres': df_pea[df_pea['sex'] == 2]['fac_tri'].sum(),
        
        # Ingreso Promedio
        'ing_prom_mes_total': weighted_average(df_ocupada, 'ingocup', 'fac_tri'),
        'ing_prom_mes_hombres': weighted_average(df_ocupada[df_ocupada['sex'] == 1], 'ingocup', 'fac_tri'),
        'ing_prom_mes_mujeres': weighted_average(df_ocupada[df_ocupada['sex'] == 2], 'ingocup', 'fac_tri'),
        
        'ing_prom_hora_total': weighted_average(df_ocupada, 'ing_x_hrs', 'fac_tri'),
        'ing_prom_hora_hombres': weighted_average(df_ocupada[df_ocupada['sex'] == 1], 'ing_x_hrs', 'fac_tri'),
        'ing_prom_hora_mujeres': weighted_average(df_ocupada[df_ocupada['sex'] == 2], 'ing_x_hrs', 'fac_tri'),
    }
    
    # ------------------------------------------------------------------
    # --- 5. C√ÅLCULOS A NIVEL ESTATAL ---
    # ------------------------------------------------------------------
    
    # Inicializaci√≥n de un diccionario de listas para recolectar datos por estado
    datos_estatal = defaultdict(list)
    
    for ent_code, ent_name in ENTIDADES.items():
        # Filtros por Estado
        df_base_est = df_base[df_base['ent'] == ent_code]
        df_15_y_mas_est = df_15_y_mas[df_15_y_mas['ent'] == ent_code]
        df_pea_est = df_pea[df_pea['ent'] == ent_code]
        df_ocupada_est = df_ocupada[df_ocupada['ent'] == ent_code]
        
        # Recolecci√≥n de datos
        datos_estatal['year'].append(year)
        datos_estatal['quarter'].append(quarter)
        datos_estatal['ent_code'].append(ent_code)
        datos_estatal['ent_nombre'].append(ent_name)
        
        # Poblaci√≥n Total
        datos_estatal['pob_total'].append(df_base_est['fac_tri'].sum())
        datos_estatal['pob_hombres_total'].append(df_base_est[df_base_est['sex'] == 1]['fac_tri'].sum())
        datos_estatal['pob_mujeres_total'].append(df_base_est[df_base_est['sex'] == 2]['fac_tri'].sum())

        # PET (15 a√±os y m√°s)
        datos_estatal['pet_hombres_15mas'].append(df_15_y_mas_est[df_15_y_mas_est['sex'] == 1]['fac_tri'].sum())
        datos_estatal['pet_mujeres_15mas'].append(df_15_y_mas_est[df_15_y_mas_est['sex'] == 2]['fac_tri'].sum())

        # PEA
        datos_estatal['pea_hombres'].append(df_pea_est[df_pea_est['sex'] == 1]['fac_tri'].sum())
        datos_estatal['pea_mujeres'].append(df_pea_est[df_pea_est['sex'] == 2]['fac_tri'].sum())
        
        # Ingreso Promedio
        datos_estatal['ing_prom_mes_total'].append(weighted_average(df_ocupada_est, 'ingocup', 'fac_tri'))
        datos_estatal['ing_prom_hora_total'].append(weighted_average(df_ocupada_est, 'ing_x_hrs', 'fac_tri'))

    # Convierte el diccionario recolectado a un DataFrame
    df_estatal_trimestre = pd.DataFrame(datos_estatal)
    
    return pd.Series(datos_nacional), df_estatal_trimestre

# ----------------------------------------------------------------------
# EJECUCI√ìN DEL SCRIPT Y CONSOLIDACI√ìN DE SERIES DE TIEMPO
# ----------------------------------------------------------------------

if __name__ == "__main__":
    # --- RANGO DE AN√ÅLISIS ---
    # Define el rango de a√±os y trimestres a analizar
    START_YEAR = 2018
    END_YEAR = 2024
    
    # Lista para almacenar los resultados nacionales (Series de Pandas)
    resultados_nacionales = []
    # Lista para almacenar los resultados estatales (DataFrames)
    resultados_estatales = []

    # Genera la secuencia de trimestres
    periodos = []
    for y in range(START_YEAR, END_YEAR + 1):
        for q in range(1, 5):
            # Condici√≥n para saltarse trimestres no disponibles (ej. 2020 T2 y T3)
            if y == 2020 and q in [2, 3]:
                print(f"--- ‚ö†Ô∏è Saltando periodo {y} T{q} (No disponible o no oficial). ---")
                continue
            periodos.append((y, q))

    print(f"\n===========================================================")
    print(f"  INICIANDO PROCESAMIENTO DE {len(periodos)} TRIMESTRES")
    print(f"  Rango: {START_YEAR} T1 hasta {END_YEAR} T4")
    print(f"===========================================================")

    # Bucle principal para procesar cada trimestre
    for year, quarter in periodos:
        df_nacional, df_estatal = procesar_trimestre_enoe(year, quarter)
        
        if df_nacional is not None and df_estatal is not None:
            resultados_nacionales.append(df_nacional)
            resultados_estatales.append(df_estatal)
        else:
            # Manejo expl√≠cito de trimestres sin datos (se a√±ade una fila con NA)
            periodo_na = {'year': year, 'quarter': quarter}
            
            # Series Nacional con NA
            serie_na_nacional = pd.Series(periodo_na)
            resultados_nacionales.append(serie_na_nacional)
            
            # DataFrame Estatal con NA
            df_na_estatal = pd.DataFrame(periodo_na, index=range(1, 33)) # 32 estados
            df_na_estatal['ent_code'] = df_na_estatal.index
            df_na_estatal['ent_nombre'] = df_na_estatal['ent_code'].map(ENTIDADES)
            # Rellenar todas las columnas de variables con NaN
            for col in resultados_estatales[0].columns if resultados_estatales else []:
                if col not in df_na_estatal.columns:
                    df_na_estatal[col] = np.nan
            resultados_estatales.append(df_na_estatal)
            
            print(f"--- üö´ Se agreg√≥ NA/NaN para {year} T{quarter} y se prosigue. ---")
            

    # --- 6. CONSOLIDACI√ìN DE BASES DE DATOS ---

    # 1. Serie de Tiempo Nacional
    df_serie_nacional = pd.DataFrame(resultados_nacionales).reset_index(drop=True)
    # Crea un √≠ndice de tiempo para facilitar el an√°lisis
    df_serie_nacional['periodo'] = df_serie_nacional['year'].astype(str) + '-T' + df_serie_nacional['quarter'].astype(str)
    df_serie_nacional.set_index('periodo', inplace=True)

    print("\n===========================================================")
    print("      ‚úÖ BASE DE SERIE DE TIEMPO NACIONAL CREADA")
    print("===========================================================")
    print(df_serie_nacional.head())
    df_serie_nacional.to_csv("Resultados/serie_tiempo_nacional.csv")


    # 2. Serie de Tiempo Estatal
    df_serie_estatal = pd.concat(resultados_estatales, ignore_index=True)
    # Crea un √≠ndice de tiempo
    df_serie_estatal['periodo'] = df_serie_estatal['year'].astype(str) + '-T' + df_serie_estatal['quarter'].astype(str)
    
    print("\n===========================================================")
    print("      ‚úÖ BASE DE SERIE DE TIEMPO ESTATAL CREADA")
    print("===========================================================")
    print(df_serie_estatal.head())
    df_serie_estatal.to_csv("Resultados/serie_tiempo_estatal.csv")

--- ‚ö†Ô∏è Saltando periodo 2020 T2 (No disponible o no oficial). ---
--- ‚ö†Ô∏è Saltando periodo 2020 T3 (No disponible o no oficial). ---

  INICIANDO PROCESAMIENTO DE 26 TRIMESTRES
  Rango: 2018 T1 hasta 2024 T4

--- ‚è≥ Procesando: 2018 T1 ---
‚ùå Error Cr√≠tico: Archivo no encontrado en: data\ENOE_dta\ENOE_2018_1\ENOE_SDEMT118.dta
--- üö´ Se agreg√≥ NA/NaN para 2018 T1 y se prosigue. ---

--- ‚è≥ Procesando: 2018 T2 ---
‚ùå Error Cr√≠tico: Archivo no encontrado en: data\ENOE_dta\ENOE_2018_2\ENOE_SDEMT218.dta
--- üö´ Se agreg√≥ NA/NaN para 2018 T2 y se prosigue. ---

--- ‚è≥ Procesando: 2018 T3 ---
‚ùå Error Cr√≠tico: Archivo no encontrado en: data\ENOE_dta\ENOE_2018_3\ENOE_SDEMT318.dta
--- üö´ Se agreg√≥ NA/NaN para 2018 T3 y se prosigue. ---

--- ‚è≥ Procesando: 2018 T4 ---
‚ùå Error Cr√≠tico: Archivo no encontrado en: data\ENOE_dta\ENOE_2018_4\ENOE_SDEMT418.dta
--- üö´ Se agreg√≥ NA/NaN para 2018 T4 y se prosigue. ---

--- ‚è≥ Procesando: 2019 T1 ---
‚ùå Error Cr√≠tico: Arch

KeyboardInterrupt: 

In [5]:
temp = pd.read_stata("Data\ENOE_dta\ENOE_2005_1\SDEMT105.dta", convert_categoricals= False)


In [6]:
temp

Unnamed: 0,r_def,loc,mun,est,est_d,ageb,t_loc,cd_a,ent,con,...,ma48me1sm,p14apoyos,scian,t_tra,emp_ppal,tue_ppal,trans_ppal,mh_fil2,mh_col,sec_ins
0,0.0,,3.0,24.0,0005,0.0,1,1.0,9.0,502.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,,3.0,24.0,0005,0.0,1,1.0,9.0,502.0,...,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,,3.0,24.0,0005,0.0,1,1.0,9.0,502.0,...,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,,7.0,33.0,0008,0.0,1,1.0,9.0,506.0,...,0.0,2.0,19.0,1.0,1.0,1.0,0.0,1.0,1.0,8.0
4,0.0,,7.0,33.0,0008,0.0,1,1.0,9.0,506.0,...,0.0,2.0,5.0,1.0,1.0,2.0,0.0,3.0,1.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424002,0.0,,,22.0,0888,0.0,4,86.0,32.0,6040.0,...,1.0,2.0,1.0,1.0,1.0,2.0,0.0,4.0,1.0,3.0
424003,0.0,,,22.0,0888,0.0,4,86.0,32.0,6040.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
424004,0.0,,,22.0,0888,0.0,4,86.0,32.0,6040.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
424005,0.0,,,22.0,0888,0.0,4,86.0,32.0,6040.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
temp.columns

Index(['r_def', 'loc', 'mun', 'est', 'est_d', 'ageb', 't_loc', 'cd_a', 'ent',
       'con',
       ...
       'ma48me1sm', 'p14apoyos', 'scian', 't_tra', 'emp_ppal', 'tue_ppal',
       'trans_ppal', 'mh_fil2', 'mh_col', 'sec_ins'],
      dtype='object', length=104)

In [2]:
import pandas as pd
import numpy as np
import os
from collections import defaultdict
import re
from datetime import datetime

# ----------------------------------------------------------------------
# FUNCIONES DE UTILIDAD
# ----------------------------------------------------------------------

def weighted_average(df, value_col, weight_col):
    """Calcula el promedio ponderado de una columna usando pesos (factores de expansi√≥n)."""
    df_filtered = df.dropna(subset=[value_col, weight_col])
    
    # Excluir valores de ingreso no v√°lidos (generalmente negativos o no especificados, si se aplica)
    if value_col in ['ingocup', 'ing_x_hrs']:
        df_filtered = df_filtered[df_filtered[value_col] > 0].copy()
    
    if df_filtered.empty or df_filtered[weight_col].sum() == 0:
        return np.nan
    
    return np.average(df_filtered[value_col], weights=df_filtered[weight_col])

# Diccionario de Entidades para mapear c√≥digos a nombres
ENTIDADES = {
    1: 'Aguascalientes', 2: 'Baja California', 3: 'Baja California Sur', 4: 'Campeche',
    5: 'Coahuila', 6: 'Colima', 7: 'Chiapas', 8: 'Chihuahua', 9: 'Ciudad de M√©xico',
    10: 'Durango', 11: 'Guanajuato', 12: 'Guerrero', 13: 'Hidalgo', 14: 'Jalisco',
    15: 'M√©xico', 16: 'Michoac√°n', 17: 'Morelos', 18: 'Nayarit', 19: 'Nuevo Le√≥n',
    20: 'Oaxaca', 21: 'Puebla', 22: 'Quer√©taro', 23: 'Quintana Roo', 24: 'San Luis Potos√≠',
    25: 'Sinaloa', 26: 'Sonora', 27: 'Tabasco', 28: 'Tamaulipas', 29: 'Tlaxcala',
    30: 'Veracruz', 31: 'Yucat√°n', 32: 'Zacatecas'
}

def obtener_nombre_archivo(year, quarter, file_format='dta'):
    """Determina el nombre del archivo SDEMT seg√∫n el periodo."""
    year_short = str(year)[-2:]
    
    # Periodo 1: 2005 T1 a 2018 T4 (May√∫sculas)
    if year <= 2018:
        base_name = f"SDEMT{quarter}{year_short}".upper()
    
    # Periodo 2: 2019 T1 a 2019 T4 (Min√∫sculas)
    elif year == 2019:
        base_name = f"sdemt{quarter}{year_short}".lower()
    
    # Periodo 3: 2020 T3 a 2022 T4 (Prefijo ENOEN_)
    elif 2020 <= year <= 2022:
        # 2020 T1 y T2 no tienen datos o son no oficiales (se manejan como "saltados" en el script principal)
        base_name = f"ENOEN_SDEMT{quarter}{year_short}".upper()
    
    # Periodo 4: 2023 T1 en adelante (Vuelve a May√∫sculas/Patr√≥n consistente con el documento)
    else: # year >= 2023
        base_name = f"SDEMT{quarter}{year_short}".upper()
        
    dir_name = f"ENOE_{year}_{quarter}"
    file_name = f"{base_name}.{file_format}"
    # Asume que los archivos est√°n en data/dta/ENOE_YYYY_Q/SDEMT...
    #file_path = os.path.join("Data/", file_format, dir_name, file_name) 
    file_path = os.path.join("Data/ENOE_dta", dir_name, file_name) 
    return file_path

def pedir_rango_trimestral():
    """Pide al usuario el rango de a√±os y trimestres para generar la serie de tiempo."""
    while True:
        try:
            print("\n--- Definici√≥n del Rango de la Serie de Tiempo ---")
            start_year = int(input("Ingrese el A√ëO de inicio (e.g., 2018): "))
            start_quarter = int(input("Ingrese el TRIMESTRE de inicio (1 a 4): "))
            end_year = int(input("Ingrese el A√ëO final (e.g., 2024): "))
            end_quarter = int(input("Ingrese el TRIMESTRE final (1 a 4): "))
            
            if not (1 <= start_quarter <= 4 and 1 <= end_quarter <= 4):
                raise ValueError("El trimestre debe ser un n√∫mero entre 1 y 4.")
            
            start_date = datetime(start_year, start_quarter * 3 - 2, 1)
            end_date = datetime(end_year, end_quarter * 3 - 2, 1)

            if start_date > end_date:
                raise ValueError("El periodo de inicio debe ser anterior o igual al periodo final.")
                
            break
        except ValueError as e:
            print(f"Entrada inv√°lida: {e}. Por favor, intente de nuevo.")
            
    # Generar la secuencia de trimestres
    periodos = []
    current_year = start_year
    current_quarter = start_quarter
    
    while current_year < end_year or (current_year == end_year and current_quarter <= end_quarter):
        
        # Manejo de trimestres faltantes (2020 T2 y T3 no oficiales/disponibles)
        if current_year == 2020 and current_quarter in [2, 3]:
            print(f"--- ‚ö†Ô∏è Saltando periodo {current_year} T{current_quarter} (No disponible o no oficial). ---")
            pass # No se a√±ade el periodo a la lista para no intentar cargarlo.
            
        else:
            periodos.append((current_year, current_quarter))
            
        # Pasar al siguiente trimestre
        if current_quarter == 4:
            current_quarter = 1
            current_year += 1
        else:
            current_quarter += 1
            
    return periodos

# ----------------------------------------------------------------------
# FUNCI√ìN PRINCIPAL DE PROCESAMIENTO TRIMESTRAL
# ----------------------------------------------------------------------

def procesar_trimestre_enoe(year, quarter, file_format='dta'):
    """
    Carga, limpia y calcula indicadores clave a nivel nacional y estatal 
    para un trimestre espec√≠fico.
    """
    periodo_str = f"{year} T{quarter}"
    print(f"\n--- ‚è≥ Procesando: {periodo_str} ---")

    # --- 1. Obtener Ruta y Ponderador ---
    file_path = obtener_nombre_archivo(year, quarter, file_format)
    
    # Determinar el campo ponderador correcto seg√∫n el periodo 
    if year < 2020 or (year == 2020 and quarter < 3):
        PONDERATOR = 'FAC'
    else:
        PONDERATOR = 'FAC_TRI'
    
    # --- 2. Carga de Datos y Manejo de Errores (Debugging) ---
    if not os.path.exists(file_path):
        print(f"‚ùå Error Cr√≠tico: Archivo no encontrado en: {file_path}")
        return None, None
    
    try:
        if file_format == 'dta':
            # Se usa `encoding='latin-1'` si se encuentran problemas con codificaci√≥n de texto
            df = pd.read_stata(file_path, convert_categoricals=False) 
        elif file_format == 'csv':
            df = pd.read_csv(file_path)
        else:
            raise ValueError("Formato de archivo no soportado.")
        
        if df.empty:
            print(f"‚ùå Error de Carga: Archivo encontrado, pero vac√≠o: {file_path}")
            return None, None
            
        print(f"‚úÖ Archivo cargado exitosamente. {len(df):,} registros. Ponderador: {PONDERATOR}")
        
    except Exception as e:
        print(f"‚ùå Ocurri√≥ un error de lectura de datos en {file_path}: {e}")
        return None, None

    # --- 3. Limpieza y Preparaci√≥n de Datos ---
    
    # Conversi√≥n de tipos de datos esenciales y estandarizaci√≥n de nombres
    columnas_requeridas = [
        PONDERATOR, 'sex', 'eda', 'clase1', 'clase2', 'c_res', 'r_def', 'ent',
        'ingocup', 'ing_x_hrs', 'pos_ocu', 'emp_ppal', 'sub_o' # Indicadores estrat√©gicos
    ]
    
    for col in columnas_requeridas:
        if col not in df.columns:
            # A√±adir columna con NaN/0 si falta, para evitar errores en c√°lculos posteriores (excepto ponderador)
            if col == PONDERATOR:
                 print(f"‚ùå Error Cr√≠tico: Columna de ponderador '{PONDERATOR}' no encontrada.")
                 return None, None
            df[col] = np.nan if col not in ['r_def', 'c_res'] else 0
            print(f"‚ö†Ô∏è Columna '{col}' no encontrada. Se a√±adi√≥ con NaN/0 para proseguir.")

    # Conversi√≥n de tipos
    df['r_def'] = df['r_def'].astype(str).str.strip()
    for col in ['sex', 'eda', 'clase1', 'clase2', 'c_res', 'ent', 'pos_ocu', 'emp_ppal', 'sub_o']:
         # Convertir a num√©rico, forzando errores a NaN, luego a entero (si es posible)
         df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0).astype(int)
    for col in ['ingocup', 'ing_x_hrs', PONDERATOR]:
         df[col] = pd.to_numeric(df[col], errors='coerce')


    # CRITERIO GENERAL DE FILTRADO (POBLACI√ìN DE 15 A√ëOS Y M√ÅS)
    # R_DEF='00' y (C_RES=1 o 3) y (EDA>=15 y EDA<=98) [cite: 148]
    
    # 1. Poblaci√≥n total residente
    df_base = df[(df['r_def'] == '00') & (df['c_res'].isin([1, 3]))].copy()

    # 2. Poblaci√≥n en Edad de Trabajar (PET) 15 a√±os y m√°s
    df_15_y_mas = df_base[df_base['eda'].between(15, 98)].copy()
    
    if df_15_y_mas.empty:
        print("‚ùå Error de Filtro: No se encontraron registros v√°lidos despu√©s del filtro PET.")
        return None, None
    
    # Asignaci√≥n de nombres de estado
    df_base['ent_nombre'] = df_base['ent'].map(ENTIDADES)
    df_15_y_mas['ent_nombre'] = df_15_y_mas['ent'].map(ENTIDADES)
    
    # ------------------------------------------------------------------
    # --- 4. C√ÅLCULOS A NIVEL NACIONAL ---
    # ------------------------------------------------------------------
    
    # Subconjuntos basados en campos precodificados y el criterio general [cite: 147]
    df_pea = df_15_y_mas[df_15_y_mas['clase1'] == 1].copy()      
    df_pnea = df_15_y_mas[df_15_y_mas['clase1'] == 2].copy()
    df_ocupada = df_15_y_mas[df_15_y_mas['clase2'] == 1].copy() 

    datos_nacional = {
        # Identificadores de Tiempo
        'year': year,
        'quarter': quarter,
        
        # 1. Poblaci√≥n
        'pob_total': df_base[PONDERATOR].sum(),
        'pob_15_y_mas': df_15_y_mas[PONDERATOR].sum(), # [cite: 162]
        'pob_hombres_total': df_base[df_base['sex'] == 1][PONDERATOR].sum(),
        'pob_mujeres_total': df_base[df_base['sex'] == 2][PONDERATOR].sum(),
        
        # 2. PEA y PNEA
        'pea_total': df_pea[PONDERATOR].sum(),
        'pea_hombres': df_pea[df_pea['sex'] == 1][PONDERATOR].sum(),
        'pea_mujeres': df_pea[df_pea['sex'] == 2][PONDERATOR].sum(),
        'pnea_total': df_pnea[PONDERATOR].sum(), # [cite: 163]
        
        # Indicadores Estrat√©gicos (CLASE2 y CLASE1) [cite: 162, 163]
        'ocupada_total': df_ocupada[PONDERATOR].sum(),
        'desocupada_total': df_15_y_mas[df_15_y_mas['clase2'] == 2][PONDERATOR].sum(),
        'pnea_disponible': df_15_y_mas[df_15_y_mas['clase2'] == 3][PONDERATOR].sum(),
        'pnea_no_disponible': df_15_y_mas[df_15_y_mas['clase2'] == 4][PONDERATOR].sum(),
        
        # Indicadores Estrat√©gicos (POSICI√ìN EN LA OCUPACI√ìN - Ocupados) [cite: 163]
        'subordinados_remunerados': df_ocupada[df_ocupada['pos_ocu'] == 1][PONDERATOR].sum(),
        'empleadores': df_ocupada[df_ocupada['pos_ocu'] == 2][PONDERATOR].sum(),
        'cuenta_propia': df_ocupada[df_ocupada['pos_ocu'] == 3][PONDERATOR].sum(),
        'trabajadores_no_remunerados': df_ocupada[df_ocupada['pos_ocu'] == 4][PONDERATOR].sum(),
        
        # Indicadores Estrat√©gicos (CONDICI√ìN DE INFORMALIDAD - Ocupados) [cite: 169]
        'ocupacion_formal': df_ocupada[df_ocupada['emp_ppal'] == 2][PONDERATOR].sum(),
        'ocupacion_informal': df_ocupada[df_ocupada['emp_ppal'] == 1][PONDERATOR].sum(),
        
        # Indicador Estrat√©gico (SUBOCUPACI√ìN - Ocupados)
        'subocupacion': df_ocupada[df_ocupada['sub_o'] == 1][PONDERATOR].sum(),
        
        # 3. Ingreso Promedio
        'ing_prom_mes_total': weighted_average(df_ocupada, 'ingocup', PONDERATOR),
        'ing_prom_hora_total': weighted_average(df_ocupada, 'ing_x_hrs', PONDERATOR),
    }
    
    # ------------------------------------------------------------------
    # --- 5. C√ÅLCULOS A NIVEL ESTATAL ---
    # ------------------------------------------------------------------
    
    datos_estatal = defaultdict(list)
    
    for ent_code, ent_name in ENTIDADES.items():
        # Filtros base por Estado (Criterio General)
        df_base_est = df_base[df_base['ent'] == ent_code].copy()
        df_15_y_mas_est = df_15_y_mas[df_15_y_mas['ent'] == ent_code].copy()
        
        # Subconjuntos Estatales (basados en precodificados y el filtro base estatal)
        df_pea_est = df_15_y_mas_est[df_15_y_mas_est['clase1'] == 1].copy()
        df_pnea_est = df_15_y_mas_est[df_15_y_mas_est['clase1'] == 2].copy()
        df_ocupada_est = df_15_y_mas_est[df_15_y_mas_est['clase2'] == 1].copy()
        
        # Recolecci√≥n de datos
        datos_estatal['year'].append(year)
        datos_estatal['quarter'].append(quarter)
        datos_estatal['ent_code'].append(ent_code)
        datos_estatal['ent_nombre'].append(ent_name)
        
        # Poblaci√≥n
        datos_estatal['pob_total'].append(df_base_est[PONDERATOR].sum())
        datos_estatal['pob_15_y_mas'].append(df_15_y_mas_est[PONDERATOR].sum())
        datos_estatal['pob_hombres_total'].append(df_base_est[df_base_est['sex'] == 1][PONDERATOR].sum())
        datos_estatal['pob_mujeres_total'].append(df_base_est[df_base_est['sex'] == 2][PONDERATOR].sum())
        
        # PEA y PNEA
        datos_estatal['pea_total'].append(df_pea_est[PONDERATOR].sum())
        datos_estatal['pnea_total'].append(df_pnea_est[PONDERATOR].sum())
        
        # Indicadores Estrat√©gicos (CLASE2 y CLASE1)
        datos_estatal['ocupada_total'].append(df_ocupada_est[PONDERATOR].sum())
        datos_estatal['desocupada_total'].append(df_15_y_mas_est[df_15_y_mas_est['clase2'] == 2][PONDERATOR].sum())
        datos_estatal['pnea_disponible'].append(df_15_y_mas_est[df_15_y_mas_est['clase2'] == 3][PONDERATOR].sum())
        datos_estatal['pnea_no_disponible'].append(df_15_y_mas_est[df_15_y_mas_est['clase2'] == 4][PONDERATOR].sum())
        
        # Indicadores Estrat√©gicos (POSICI√ìN EN LA OCUPACI√ìN - Ocupados)
        datos_estatal['subordinados_remunerados'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 1][PONDERATOR].sum())
        datos_estatal['empleadores'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 2][PONDERATOR].sum())
        datos_estatal['cuenta_propia'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 3][PONDERATOR].sum())
        datos_estatal['trabajadores_no_remunerados'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 4][PONDERATOR].sum())

        # Indicadores Estrat√©gicos (CONDICI√ìN DE INFORMALIDAD - Ocupados)
        datos_estatal['ocupacion_formal'].append(df_ocupada_est[df_ocupada_est['emp_ppal'] == 2][PONDERATOR].sum())
        datos_estatal['ocupacion_informal'].append(df_ocupada_est[df_ocupada_est['emp_ppal'] == 1][PONDERATOR].sum())
        
        # Indicador Estrat√©gico (SUBOCUPACI√ìN - Ocupados)
        datos_estatal['subocupacion'].append(df_ocupada_est[df_ocupada_est['sub_o'] == 1][PONDERATOR].sum())

        # Ingreso Promedio
        datos_estatal['ing_prom_mes_total'].append(weighted_average(df_ocupada_est, 'ingocup', PONDERATOR))
        datos_estatal['ing_prom_hora_total'].append(weighted_average(df_ocupada_est, 'ing_x_hrs', PONDERATOR))

    df_estatal_trimestre = pd.DataFrame(datos_estatal)
    
    return pd.Series(datos_nacional), df_estatal_trimestre

# ----------------------------------------------------------------------
# EJECUCI√ìN DEL SCRIPT Y CONSOLIDACI√ìN DE SERIES DE TIEMPO
# ----------------------------------------------------------------------

if __name__ == "__main__":
    
    periodos = pedir_rango_trimestral()
    
    # Inicializaci√≥n para la consolidaci√≥n
    resultados_nacionales = []
    resultados_estatales = []

    print(f"\n===========================================================")
    print(f"  INICIANDO PROCESAMIENTO DE {len(periodos)} TRIMESTRES")
    print(f"===========================================================")

    # Bucle principal para procesar cada trimestre
    for year, quarter in periodos:
        
        df_nacional, df_estatal = procesar_trimestre_enoe(year, quarter)
        
        if df_nacional is not None and df_estatal is not None:
            resultados_nacionales.append(df_nacional)
            resultados_estatales.append(df_estatal)
        else:
            # Manejo expl√≠cito de trimestres sin datos (se a√±ade una fila con NA)
            # Esto se asegura de mantener la continuidad de la serie de tiempo.
            periodo_na = {'year': year, 'quarter': quarter}
            
            # Serie Nacional con NA
            serie_na_nacional = pd.Series(periodo_na)
            # Se a√±aden las columnas faltantes (variables calculadas) con NaN
            if resultados_nacionales:
                # Usar la estructura de la primera serie de tiempo para rellenar los NaNs
                for col in resultados_nacionales[0].index:
                    if col not in serie_na_nacional:
                         serie_na_nacional[col] = np.nan
            resultados_nacionales.append(serie_na_nacional)
            
            # DataFrame Estatal con NA
            df_na_estatal = pd.DataFrame(periodo_na, index=range(1, 33)) # 32 estados
            df_na_estatal['ent_code'] = df_na_estatal.index
            df_na_estatal['ent_nombre'] = df_na_estatal['ent_code'].map(ENTIDADES)
            # Rellenar todas las columnas de variables con NaN
            if resultados_estatales:
                 # Usar la estructura del primer DataFrame estatal para rellenar los NaNs
                for col in resultados_estatales[0].columns:
                    if col not in df_na_estatal.columns:
                        df_na_estatal[col] = np.nan
            resultados_estatales.append(df_na_estatal)
            
            print(f"--- üö´ Se agreg√≥ NA/NaN para {year} T{quarter} y se prosigue. ---")
            

    # --- 6. CONSOLIDACI√ìN DE BASES DE DATOS ---

    # 1. Serie de Tiempo Nacional
    df_serie_nacional = pd.DataFrame(resultados_nacionales).reset_index(drop=True)
    df_serie_nacional['periodo'] = df_serie_nacional['year'].astype(str) + '-T' + df_serie_nacional['quarter'].astype(str)
    df_serie_nacional.set_index('periodo', inplace=True)

    print("\n===========================================================")
    print("      ‚úÖ BASE DE SERIE DE TIEMPO NACIONAL CREADA")
    print("      (Incluye nuevos indicadores estrat√©gicos)")
    print("===========================================================")
    print(df_serie_nacional.head())
    # Opcional: df_serie_nacional.to_csv("serie_tiempo_nacional_estrat.csv")


    # 2. Serie de Tiempo Estatal
    df_serie_estatal = pd.concat(resultados_estatales, ignore_index=True)
    df_serie_estatal['periodo'] = df_serie_estatal['year'].astype(str) + '-T' + df_serie_estatal['quarter'].astype(str)
    
    print("\n===========================================================")
    print("      ‚úÖ BASE DE SERIE DE TIEMPO ESTATAL CREADA")
    print("      (Incluye nuevos indicadores estrat√©gicos)")
    print("===========================================================")
    print(df_serie_estatal.head())
    # Opcional: df_serie_estatal.to_csv("serie_tiempo_estatal_estrat.csv")


--- Definici√≥n del Rango de la Serie de Tiempo ---
--- ‚ö†Ô∏è Saltando periodo 2020 T2 (No disponible o no oficial). ---
--- ‚ö†Ô∏è Saltando periodo 2020 T3 (No disponible o no oficial). ---

  INICIANDO PROCESAMIENTO DE 80 TRIMESTRES

--- ‚è≥ Procesando: 2005 T1 ---
‚úÖ Archivo cargado exitosamente. 424,007 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2005 T1 y se prosigue. ---

--- ‚è≥ Procesando: 2005 T2 ---
‚úÖ Archivo cargado exitosamente. 428,727 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2005 T2 y se prosigue. ---

--- ‚è≥ Procesando: 2005 T3 ---
‚úÖ Archivo cargado exitosamente. 421,751 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2005 T3 y se prosigue. ---

--- ‚è≥ Procesando: 2005 T4 ---
‚úÖ Archivo cargado exitosamente. 423,757 registros. Ponde

One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(file_path, convert_categoricals=False)


‚úÖ Archivo cargado exitosamente. 407,725 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2009 T1 y se prosigue. ---

--- ‚è≥ Procesando: 2009 T2 ---


One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(file_path, convert_categoricals=False)


‚úÖ Archivo cargado exitosamente. 405,529 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2009 T2 y se prosigue. ---

--- ‚è≥ Procesando: 2009 T3 ---
‚úÖ Archivo cargado exitosamente. 402,919 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2009 T3 y se prosigue. ---

--- ‚è≥ Procesando: 2009 T4 ---
‚úÖ Archivo cargado exitosamente. 403,862 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2009 T4 y se prosigue. ---

--- ‚è≥ Procesando: 2010 T1 ---
‚úÖ Archivo cargado exitosamente. 406,797 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2010 T1 y se prosigue. ---

--- ‚è≥ Procesando: 2010 T2 ---
‚úÖ Archivo cargado exitosamente. 408,164 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de pond

One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(file_path, convert_categoricals=False)


‚úÖ Archivo cargado exitosamente. 392,937 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2013 T1 y se prosigue. ---

--- ‚è≥ Procesando: 2013 T2 ---
‚úÖ Archivo cargado exitosamente. 393,107 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2013 T2 y se prosigue. ---

--- ‚è≥ Procesando: 2013 T3 ---
‚úÖ Archivo cargado exitosamente. 394,472 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2013 T3 y se prosigue. ---

--- ‚è≥ Procesando: 2013 T4 ---
‚úÖ Archivo cargado exitosamente. 400,354 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de ponderador 'FAC' no encontrada.
--- üö´ Se agreg√≥ NA/NaN para 2013 T4 y se prosigue. ---

--- ‚è≥ Procesando: 2014 T1 ---
‚úÖ Archivo cargado exitosamente. 404,014 registros. Ponderador: FAC
‚ùå Error Cr√≠tico: Columna de pond

In [1]:
# Sitio raro donde viene un deflactor: http://www.apta.com.mx/aptace/info/iyf_inpc.php

# Nueva versi√≥n con control de nombre del factor de expansi√≥n

import pandas as pd
import numpy as np
import os
from collections import defaultdict
from datetime import datetime

# ----------------------------------------------------------------------
# FUNCIONES DE UTILIDAD
# ----------------------------------------------------------------------

def weighted_average(df, value_col, weight_col):
    """Calcula el promedio ponderado de una columna usando pesos (factores de expansi√≥n)."""
    df_filtered = df.dropna(subset=[value_col, weight_col])
    
    # Excluir valores de ingreso no v√°lidos (generalmente positivos si estamos calculando promedio)
    if value_col in ['ingocup', 'ing_x_hrs']:
        df_filtered = df_filtered[df_filtered[value_col] > 0].copy()
    
    if df_filtered.empty or df_filtered[weight_col].sum() == 0:
        return np.nan
    
    return np.average(df_filtered[value_col], weights=df_filtered[weight_col])

# Diccionario de Entidades para mapear c√≥digos a nombres
ENTIDADES = {
    1: 'Aguascalientes', 2: 'Baja California', 3: 'Baja California Sur', 4: 'Campeche',
    5: 'Coahuila', 6: 'Colima', 7: 'Chiapas', 8: 'Chihuahua', 9: 'Ciudad de M√©xico',
    10: 'Durango', 11: 'Guanajuato', 12: 'Guerrero', 13: 'Hidalgo', 14: 'Jalisco',
    15: 'M√©xico', 16: 'Michoac√°n', 17: 'Morelos', 18: 'Nayarit', 19: 'Nuevo Le√≥n',
    20: 'Oaxaca', 21: 'Puebla', 22: 'Quer√©taro', 23: 'Quintana Roo', 24: 'San Luis Potos√≠',
    25: 'Sinaloa', 26: 'Sonora', 27: 'Tabasco', 28: 'Tamaulipas', 29: 'Tlaxcala',
    30: 'Veracruz', 31: 'Yucat√°n', 32: 'Zacatecas'
}

def obtener_nombre_archivo(year, quarter, file_format='dta'):
    """Determina el nombre del archivo SDEMT seg√∫n el periodo y la nueva ruta."""
    year_short = str(year)[-2:]
    
    # L√≥gica de Nomenclatura del Archivo
    if year <= 2018:
        base_name = f"SDEMT{quarter}{year_short}".upper()
    elif year == 2019:
        base_name = f"sdemt{quarter}{year_short}".lower()
    elif 2020 <= year <= 2022 and quarter >= 3:
        base_name = f"ENOEN_SDEMT{quarter}{year_short}".upper()
    elif year >= 2023:
        # Mantiene el patr√≥n general para lo m√°s reciente
        base_name = f"SDEMT{quarter}{year_short}".upper()
    else: # 2020 T1, T2 y otros que no caen en reglas
        base_name = None 
        
    dir_name = f"ENOE_{year}_{quarter}"
    file_name = f"{base_name}.{file_format}" if base_name else None
    
    # üî¥ L√≥gica de Ruta Actualizada por el Usuario
    # Asume Data/ENOE_dta se encuentra en el directorio de ejecuci√≥n
    file_path = os.path.join("Data/ENOE_dta", dir_name, file_name) if file_name else None
    
    return file_path

def pedir_rango_trimestral():
    """Pide al usuario el rango de a√±os y trimestres para generar la serie de tiempo."""
    while True:
        try:
            print("\n--- Definici√≥n del Rango de la Serie de Tiempo ---")
            start_year = int(input("Ingrese el A√ëO de inicio (e.g., 2005): "))
            start_quarter = int(input("Ingrese el TRIMESTRE de inicio (1 a 4): "))
            end_year = int(input("Ingrese el A√ëO final (e.g., 2024): "))
            end_quarter = int(input("Ingrese el TRIMESTRE final (1 a 4): "))
            
            if not (1 <= start_quarter <= 4 and 1 <= end_quarter <= 4):
                raise ValueError("El trimestre debe ser un n√∫mero entre 1 y 4.")
            
            start_date = datetime(start_year, start_quarter * 3 - 2, 1)
            end_date = datetime(end_year, end_quarter * 3 - 2, 1)

            if start_date > end_date:
                raise ValueError("El periodo de inicio debe ser anterior o igual al periodo final.")
                
            break
        except ValueError as e:
            print(f"Entrada inv√°lida: {e}. Por favor, intente de nuevo.")
            
    # Generar la secuencia de trimestres
    periodos = []
    current_year = start_year
    current_quarter = start_quarter
    
    while current_year < end_year or (current_year == end_year and current_quarter <= end_quarter):
        
        # Manejo de trimestres hist√≥ricamente faltantes o no oficiales (2020 T1 y T2)
        if current_year == 2020 and current_quarter in [1, 2]:
            print(f"--- ‚ö†Ô∏è Saltando periodo {current_year} T{current_quarter} (No disponible o no oficial). ---")
            # A√±adimos el periodo a la lista para forzar la inserci√≥n de NaN en la serie de tiempo
            periodos.append((current_year, current_quarter))
            
        else:
            periodos.append((current_year, current_quarter))
            
        # Pasar al siguiente trimestre
        if current_quarter == 4:
            current_quarter = 1
            current_year += 1
        else:
            current_quarter += 1
            
    return periodos

# ----------------------------------------------------------------------
# FUNCI√ìN PRINCIPAL DE PROCESAMIENTO TRIMESTRAL
# ----------------------------------------------------------------------

def procesar_trimestre_enoe(year, quarter, file_format='dta'):
    """
    Carga, limpia y calcula indicadores clave a nivel nacional y estatal 
    para un trimestre espec√≠fico, ajustando el ponderador.
    """
    periodo_str = f"{year} T{quarter}"
    print(f"\n--- ‚è≥ Procesando: {periodo_str} ---")

    # --- 1. Obtener Ruta y Ponderador Din√°mico ---
    file_path = obtener_nombre_archivo(year, quarter, file_format)
    
    
    #######################################################################
    # Ojo con el nombre de la variable de factor de expansi√¥n
    #######################################################################

    if year < 2023:
        PONDERATOR = 'fac' # Nombre en min√∫sculas seg√∫n tu lista de 2005
    else:
        PONDERATOR = 'fac_tri' # Nombre en min√∫sculas (m√°s seguro para DTA)
    
    if file_path is None:
        return None, None
        
    # --- 2. Carga de Datos y Manejo de Errores (Debugging) ---
    if not os.path.exists(file_path):
        print(f"‚ùå Error Cr√≠tico: Archivo no encontrado en la ruta ajustada: {file_path}")
        return None, None
    
    try:
        # Se requiere la lista de columnas para asegurarnos de que el archivo es correcto
        df = pd.read_stata(file_path, convert_categoricals=False) 
        
        if df.empty:
            print(f"‚ùå Error de Carga: Archivo encontrado, pero vac√≠o: {file_path}")
            return None, None
            
        print(f"‚úÖ Archivo cargado exitosamente. {len(df):,} registros. Ponderador: {PONDERATOR.upper()}")
        
    except Exception as e:
        print(f"‚ùå Ocurri√≥ un error de lectura de datos en {file_path}: {e}")
        return None, None

    # --- 3. Limpieza y Preparaci√≥n de Datos ---
    
    # üî¥ Nombres de columnas seg√∫n tu input de 2005 (min√∫sculas y ajustados)
    columnas_requeridas = {
        PONDERATOR: PONDERATOR, 
        'r_def': 'r_def', 'c_res': 'c_res', 'ent': 'ent', 'sex': 'sex', 'eda': 'eda',
        'clase1': 'clase1', 'clase2': 'clase2', 'pos_ocu': 'pos_ocu',
        'emp_ppal': 'emp_ppal', 'sub_o': 'sub_o',
        'ingocup': 'ingocup', 'ing_x_hrs': 'ing_x_hrs'
    }
    
    # Normalizar los nombres de columnas a min√∫sculas para asegurar la compatibilidad
    df.columns = df.columns.str.lower()
    
    for original_col, required_col in columnas_requeridas.items():
        if original_col not in df.columns:
            if original_col == PONDERATOR:
                 # No podemos continuar sin el ponderador
                 print(f"‚ùå Error Cr√≠tico: Columna de ponderador '{PONDERATOR}' no encontrada en el DataFrame.")
                 return None, None
            
            # Se a√±aden columnas faltantes con 0/NaN para evitar errores en c√°lculos
            df[required_col] = 0 if required_col in ['r_def', 'c_res', 'clase1', 'clase2', 'pos_ocu', 'emp_ppal', 'sub_o'] else np.nan
            print(f"‚ö†Ô∏è Columna '{required_col}' no encontrada. Se a√±adi√≥ con valor por defecto.")

    # Conversi√≥n de tipos de datos esenciales
    df['r_def'] = df['r_def'].astype(str).str.strip()
    for col in ['sex', 'eda', 'clase1', 'clase2', 'c_res', 'ent', 'pos_ocu', 'emp_ppal', 'sub_o']:
         # Convertir a num√©rico, forzando errores a NaN, luego a entero (si es posible)
         df[col] = pd.to_numeric(df[col], errors='coerce').fillna(-1).astype(int)
    for col in ['ingocup', 'ing_x_hrs', PONDERATOR]:
         df[col] = pd.to_numeric(df[col], errors='coerce')


    # CRITERIO GENERAL DE FILTRADO (POBLACI√ìN DE 15 A√ëOS Y M√ÅS)
    
    #########################################
    # Ojo r_def, revisar cuales son las opciones, al convertir en n√∫merico puede poner "0.0" como el valor que nos interesa
    #########################################

    # 1. Poblaci√≥n total residente
    df_base = df[(df['r_def'] == '0.0') & (df['c_res'].isin([1, 3]))].copy()

    # 2. Poblaci√≥n en Edad de Trabajar (PET) 15 a√±os y m√°s
    df_15_y_mas = df_base[df_base['eda'].between(15, 98)].copy()
    
    if df_15_y_mas.empty:
        print("‚ùå Error de Filtro: No se encontraron registros v√°lidos despu√©s del filtro PET.")
        return None, None
    
    # Asignaci√≥n de nombres de estado
    df_base['ent_nombre'] = df_base['ent'].map(ENTIDADES)
    df_15_y_mas['ent_nombre'] = df_15_y_mas['ent'].map(ENTIDADES)
    
    # ------------------------------------------------------------------
    # --- 4. C√ÅLCULOS A NIVEL NACIONAL ---
    # ------------------------------------------------------------------
    
    # Subconjuntos basados en campos precodificados y el criterio general
    df_pea = df_15_y_mas[df_15_y_mas['clase1'] == 1].copy()      
    df_pnea = df_15_y_mas[df_15_y_mas['clase1'] == 2].copy()
    df_ocupada = df_15_y_mas[df_15_y_mas['clase2'] == 1].copy() 

    datos_nacional = {
        # Identificadores de Tiempo
        'year': year,
        'quarter': quarter,
        
        # 1. Poblaci√≥n
        'pob_total': df_base[PONDERATOR].sum(),
        'pob_15_y_mas': df_15_y_mas[PONDERATOR].sum(), 
        'pob_hombres_total': df_base[df_base['sex'] == 1][PONDERATOR].sum(),
        'pob_mujeres_total': df_base[df_base['sex'] == 2][PONDERATOR].sum(),
        
        # 2. PEA y PNEA
        'pea_total': df_pea[PONDERATOR].sum(),
        'pea_hombres': df_pea[df_pea['sex'] == 1][PONDERATOR].sum(),
        'pea_mujeres': df_pea[df_pea['sex'] == 2][PONDERATOR].sum(),
        'pnea_total': df_pnea[PONDERATOR].sum(), 
        
        # 3. Indicadores Estrat√©gicos (CLASE2 y CLASE1)
        'ocupada_total': df_ocupada[PONDERATOR].sum(),
        'desocupada_total': df_15_y_mas[df_15_y_mas['clase2'] == 2][PONDERATOR].sum(),
        # üî¥ Nota: Uso CLASE2 para clasificar PNEA disponible/no disponible
        'pnea_disponible': df_15_y_mas[df_15_y_mas['clase2'] == 3][PONDERATOR].sum(),
        'pnea_no_disponible': df_15_y_mas[df_15_y_mas['clase2'] == 4][PONDERATOR].sum(),
        
        # 4. Indicadores Estrat√©gicos (POSICI√ìN EN LA OCUPACI√ìN - Ocupados)
        'subordinados_remunerados': df_ocupada[df_ocupada['pos_ocu'] == 1][PONDERATOR].sum(),
        'empleadores': df_ocupada[df_ocupada['pos_ocu'] == 2][PONDERATOR].sum(),
        'cuenta_propia': df_ocupada[df_ocupada['pos_ocu'] == 3][PONDERATOR].sum(),
        'trabajadores_no_remunerados': df_ocupada[df_ocupada['pos_ocu'] == 4][PONDERATOR].sum(),
        
        # 5. Indicadores Estrat√©gicos (CONDICI√ìN DE INFORMALIDAD - Ocupados)
        # Nota: Asumo la definici√≥n est√°ndar: EMP_PPAL=1 (informal), EMP_PPAL=2 (formal)
        'ocupacion_formal': df_ocupada[df_ocupada['emp_ppal'] == 2][PONDERATOR].sum(),
        'ocupacion_informal': df_ocupada[df_ocupada['emp_ppal'] == 1][PONDERATOR].sum(),
        
        # 6. Indicador Estrat√©gico (SUBOCUPACI√ìN - Ocupados)
        'subocupacion': df_ocupada[df_ocupada['sub_o'] == 1][PONDERATOR].sum(),
        
        # 7. Ingreso Promedio
        'ing_prom_mes_total': weighted_average(df_ocupada, 'ingocup', PONDERATOR),
        'ing_prom_hora_total': weighted_average(df_ocupada, 'ing_x_hrs', PONDERATOR),
    }
    
    # ------------------------------------------------------------------
    # --- 5. C√ÅLCULOS A NIVEL ESTATAL ---
    # ------------------------------------------------------------------
    
    datos_estatal = defaultdict(list)
    
    for ent_code, ent_name in ENTIDADES.items():
        # Filtros base por Estado
        df_base_est = df_base[df_base['ent'] == ent_code].copy()
        df_15_y_mas_est = df_15_y_mas[df_15_y_mas['ent'] == ent_code].copy()
        
        # Subconjuntos Estatales 
        df_pea_est = df_15_y_mas_est[df_15_y_mas_est['clase1'] == 1].copy()
        df_pnea_est = df_15_y_mas_est[df_15_y_mas_est['clase1'] == 2].copy()
        df_ocupada_est = df_15_y_mas_est[df_15_y_mas_est['clase2'] == 1].copy()
        
        # Recolecci√≥n de datos
        datos_estatal['year'].append(year)
        datos_estatal['quarter'].append(quarter)
        datos_estatal['ent_code'].append(ent_code)
        datos_estatal['ent_nombre'].append(ent_name)
        
        # Poblaci√≥n
        datos_estatal['pob_total'].append(df_base_est[PONDERATOR].sum())
        datos_estatal['pob_15_y_mas'].append(df_15_y_mas_est[PONDERATOR].sum())
        datos_estatal['pob_hombres_total'].append(df_base_est[df_base_est['sex'] == 1][PONDERATOR].sum())
        datos_estatal['pob_mujeres_total'].append(df_base_est[df_base_est['sex'] == 2][PONDERATOR].sum())
        
        # PEA y PNEA
        datos_estatal['pea_total'].append(df_pea_est[PONDERATOR].sum())
        datos_estatal['pnea_total'].append(df_pnea_est[PONDERATOR].sum())
        
        # Indicadores Estrat√©gicos (CLASE2 y CLASE1)
        datos_estatal['ocupada_total'].append(df_ocupada_est[PONDERATOR].sum())
        datos_estatal['desocupada_total'].append(df_15_y_mas_est[df_15_y_mas_est['clase2'] == 2][PONDERATOR].sum())
        datos_estatal['pnea_disponible'].append(df_15_y_mas_est[df_15_y_mas_est['clase2'] == 3][PONDERATOR].sum())
        datos_estatal['pnea_no_disponible'].append(df_15_y_mas_est[df_15_y_mas_est['clase2'] == 4][PONDERATOR].sum())
        
        # Indicadores Estrat√©gicos (POSICI√ìN EN LA OCUPACI√ìN - Ocupados)
        datos_estatal['subordinados_remunerados'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 1][PONDERATOR].sum())
        datos_estatal['empleadores'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 2][PONDERATOR].sum())
        datos_estatal['cuenta_propia'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 3][PONDERATOR].sum())
        datos_estatal['trabajadores_no_remunerados'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 4][PONDERATOR].sum())

        # Indicadores Estrat√©gicos (CONDICI√ìN DE INFORMALIDAD - Ocupados)
        datos_estatal['ocupacion_formal'].append(df_ocupada_est[df_ocupada_est['emp_ppal'] == 2][PONDERATOR].sum())
        datos_estatal['ocupacion_informal'].append(df_ocupada_est[df_ocupada_est['emp_ppal'] == 1][PONDERATOR].sum())
        
        # Indicador Estrat√©gico (SUBOCUPACI√ìN - Ocupados)
        datos_estatal['subocupacion'].append(df_ocupada_est[df_ocupada_est['sub_o'] == 1][PONDERATOR].sum())

        # Ingreso Promedio
        datos_estatal['ing_prom_mes_total'].append(weighted_average(df_ocupada_est, 'ingocup', PONDERATOR))
        datos_estatal['ing_prom_hora_total'].append(weighted_average(df_ocupada_est, 'ing_x_hrs', PONDERATOR))

    df_estatal_trimestre = pd.DataFrame(datos_estatal)
    
    return pd.Series(datos_nacional), df_estatal_trimestre

# ----------------------------------------------------------------------
# EJECUCI√ìN DEL SCRIPT Y CONSOLIDACI√ìN DE SERIES DE TIEMPO
# ----------------------------------------------------------------------

if __name__ == "__main__":
    
    periodos = pedir_rango_trimestral()
    
    # Inicializaci√≥n para la consolidaci√≥n
    resultados_nacionales = []
    resultados_estatales = []

    print(f"\n===========================================================")
    print(f"  INICIANDO PROCESAMIENTO DE {len(periodos)} TRIMESTRES")
    print(f"  Ajustando ponderador seg√∫n el periodo.")
    print(f"===========================================================")

    # Bucle principal para procesar cada trimestre
    for year, quarter in periodos:
        
        df_nacional, df_estatal = procesar_trimestre_enoe(year, quarter)
        
        # Asumiendo que el primer DataFrame/Serie exitoso establece la estructura
        if not resultados_nacionales and df_nacional is not None:
             estructura_nacional = df_nacional.index
             estructura_estatal = df_estatal.columns
        
        if df_nacional is not None and df_estatal is not None:
            resultados_nacionales.append(df_nacional)
            resultados_estatales.append(df_estatal)
        else:
            # Manejo expl√≠cito de trimestres sin datos
            periodo_na = {'year': year, 'quarter': quarter}
            
            # Solo si ya tenemos una estructura, rellenamos con NaN
            if 'estructura_nacional' in locals():
                
                # Serie Nacional con NA
                serie_na_nacional = pd.Series(periodo_na)
                for col in estructura_nacional:
                    if col not in serie_na_nacional:
                         serie_na_nacional[col] = np.nan
                resultados_nacionales.append(serie_na_nacional)
                
                # DataFrame Estatal con NA
                df_na_estatal = pd.DataFrame(periodo_na, index=range(1, 33))
                df_na_estatal['ent_code'] = df_na_estatal.index
                df_na_estatal['ent_nombre'] = df_na_estatal['ent_code'].map(ENTIDADES)
                for col in estructura_estatal:
                    if col not in df_na_estatal.columns:
                        df_na_estatal[col] = np.nan
                resultados_estatales.append(df_na_estatal)
                
                print(f"--- üö´ Se agreg√≥ NA/NaN para {year} T{quarter} y se prosigue. ---")
            else:
                 print(f"--- üö´ No se pudo establecer la estructura de datos. Omitiendo {year} T{quarter}. ---")
            

    # --- 6. CONSOLIDACI√ìN DE BASES DE DATOS ---

    if resultados_nacionales:
        # 1. Serie de Tiempo Nacional
        df_serie_nacional = pd.DataFrame(resultados_nacionales).reset_index(drop=True)
        df_serie_nacional['periodo'] = df_serie_nacional['year'].astype(str) + '-T' + df_serie_nacional['quarter'].astype(str)
        df_serie_nacional.set_index('periodo', inplace=True)

        print("\n===========================================================")
        print("      ‚úÖ BASE DE SERIE DE TIEMPO NACIONAL CREADA")
        print("===========================================================")
        print(df_serie_nacional.head())
        
        # 2. Serie de Tiempo Estatal
        df_serie_estatal = pd.concat(resultados_estatales, ignore_index=True)
        df_serie_estatal['periodo'] = df_serie_estatal['year'].astype(str) + '-T' + df_serie_estatal['quarter'].astype(str)
        
        print("\n===========================================================")
        print("      ‚úÖ BASE DE SERIE DE TIEMPO ESTATAL CREADA")
        print("===========================================================")
        print(df_serie_estatal.head())
    else:
        print("\n===========================================================")
        print("      ‚ùå NO SE PUDIERON GENERAR LAS BASES DE DATOS.")
        print("      Aseg√∫rate de que la ruta 'Data/ENOE_dta' y los archivos existan.")
        print("===========================================================")


--- Definici√≥n del Rango de la Serie de Tiempo ---
--- ‚ö†Ô∏è Saltando periodo 2020 T1 (No disponible o no oficial). ---
--- ‚ö†Ô∏è Saltando periodo 2020 T2 (No disponible o no oficial). ---

  INICIANDO PROCESAMIENTO DE 82 TRIMESTRES
  Ajustando ponderador seg√∫n el periodo.

--- ‚è≥ Procesando: 2005 T1 ---
‚úÖ Archivo cargado exitosamente. 424,007 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2005 T2 ---
‚úÖ Archivo cargado exitosamente. 428,727 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2005 T3 ---
‚úÖ Archivo cargado exitosamente. 421,751 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2005 T4 ---
‚úÖ Archivo cargado exitosamente. 423,757 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2006 T1 ---
‚úÖ Archivo cargado exitosamente. 426,160 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2006 T2 ---
‚úÖ Archivo cargado exitosamente. 424,579 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2006 T3 ---
‚úÖ Archivo cargado exitosamente. 423,305 registros. Ponderador: FAC

--- ‚è

One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(file_path, convert_categoricals=False)


‚úÖ Archivo cargado exitosamente. 407,725 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2009 T2 ---


One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(file_path, convert_categoricals=False)


‚úÖ Archivo cargado exitosamente. 405,529 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2009 T3 ---
‚úÖ Archivo cargado exitosamente. 402,919 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2009 T4 ---
‚úÖ Archivo cargado exitosamente. 403,862 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2010 T1 ---
‚úÖ Archivo cargado exitosamente. 406,797 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2010 T2 ---
‚úÖ Archivo cargado exitosamente. 408,164 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2010 T3 ---
‚úÖ Archivo cargado exitosamente. 405,533 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2010 T4 ---
‚úÖ Archivo cargado exitosamente. 401,524 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2011 T1 ---
‚úÖ Archivo cargado exitosamente. 402,117 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2011 T2 ---
‚úÖ Archivo cargado exitosamente. 400,977 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2011 T3 ---
‚úÖ Archivo cargado exitosamente. 399,716 registros. Ponderador: FAC

--- ‚è≥ Proc

One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(file_path, convert_categoricals=False)


‚úÖ Archivo cargado exitosamente. 392,937 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2013 T2 ---
‚úÖ Archivo cargado exitosamente. 393,107 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2013 T3 ---
‚úÖ Archivo cargado exitosamente. 394,472 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2013 T4 ---
‚úÖ Archivo cargado exitosamente. 400,354 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2014 T1 ---
‚úÖ Archivo cargado exitosamente. 404,014 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2014 T2 ---
‚úÖ Archivo cargado exitosamente. 406,088 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2014 T3 ---
‚úÖ Archivo cargado exitosamente. 405,803 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2014 T4 ---
‚úÖ Archivo cargado exitosamente. 404,640 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2015 T1 ---
‚úÖ Archivo cargado exitosamente. 404,432 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2015 T2 ---
‚úÖ Archivo cargado exitosamente. 403,865 registros. Ponderador: FAC

--- ‚è≥ Proc

In [7]:
#Ver
#Agregar deflactor
#A√±adir a√±os faltantes


#df_serie_estataL
#df_serie_nacional
# ojo r_def = 0.0 o 00

import pandas as pd
import numpy as np
import os
from collections import defaultdict
from datetime import datetime

# ----------------------------------------------------------------------
# FUNCIONES DE UTILIDAD
# ----------------------------------------------------------------------

def weighted_average(df, value_col, weight_col):
    """Calcula el promedio ponderado de una columna usando pesos (factores de expansi√≥n)."""
    df_filtered = df.dropna(subset=[value_col, weight_col])
    
    # Excluir valores de ingreso no v√°lidos (generalmente positivos si estamos calculando promedio)
    if value_col in ['ingocup', 'ing_x_hrs']:
        df_filtered = df_filtered[df_filtered[value_col] > 0].copy()
    
    if df_filtered.empty or df_filtered[weight_col].sum() == 0:
        return np.nan
    
    return np.average(df_filtered[value_col], weights=df_filtered[weight_col])

# Diccionario de Entidades para mapear c√≥digos a nombres
ENTIDADES = {
    1: 'Aguascalientes', 2: 'Baja California', 3: 'Baja California Sur', 4: 'Campeche',
    5: 'Coahuila', 6: 'Colima', 7: 'Chiapas', 8: 'Chihuahua', 9: 'Ciudad de M√©xico',
    10: 'Durango', 11: 'Guanajuato', 12: 'Guerrero', 13: 'Hidalgo', 14: 'Jalisco',
    15: 'M√©xico', 16: 'Michoac√°n', 17: 'Morelos', 18: 'Nayarit', 19: 'Nuevo Le√≥n',
    20: 'Oaxaca', 21: 'Puebla', 22: 'Quer√©taro', 23: 'Quintana Roo', 24: 'San Luis Potos√≠',
    25: 'Sinaloa', 26: 'Sonora', 27: 'Tabasco', 28: 'Tamaulipas', 29: 'Tlaxcala',
    30: 'Veracruz', 31: 'Yucat√°n', 32: 'Zacatecas'
}

def obtener_nombre_archivo(year, quarter, file_format='dta'):
    """Determina el nombre del archivo SDEMT seg√∫n el periodo y la nueva ruta, incluyendo ajustes para 2021-2022."""
    year_short = str(year)[-2:]
    base_name = None
    
    # 1. 2005_1 a 2018_4: SDEMT<Q><YY>.dta (May√∫sculas)
    if year <= 2018:
        base_name = f"SDEMT{quarter}{year_short}".upper()
    # 2. 2019_1 a 2019_4: sdemt<Q><YY>.dta (Min√∫sculas)
    elif year == 2019:
        base_name = f"sdemt{quarter}{year_short}".lower()
    # 3. 2020_3 a 2022_4: ENOEN_SDEMT<Q><YY>.dta (May√∫sculas)
    # ‚ö†Ô∏è Ajuste: Incluye 2021 T1/T2 y 2022 T1/T2 que fallaban en la l√≥gica anterior.
    elif (year == 2020 and quarter >= 3) or year in [2021, 2022]:
        base_name = f"ENOEN_SDEMT{quarter}{year_short}".upper()
    # 4. 2023_1 en adelante: ENOE_SDEMT<Q><YY>.dta (May√∫sculas)
    elif year >= 2023:
        base_name = f"ENOE_SDEMT{quarter}{year_short}".upper()
    
    dir_name = f"ENOE_{year}_{quarter}"
    file_name = f"{base_name}.{file_format}" if base_name else None
    
    # Ruta ajustada por el usuario
    file_path = os.path.join("Data/ENOE_dta", dir_name, file_name) if file_name else None
    
    return file_path

def pedir_rango_trimestral():
    """Pide al usuario el rango de a√±os y trimestres para generar la serie de tiempo."""
    while True:
        try:
            print("\n--- Definici√≥n del Rango de la Serie de Tiempo ---")
            start_year = int(input("Ingrese el A√ëO de inicio (e.g., 2005): "))
            start_quarter = int(input("Ingrese el TRIMESTRE de inicio (1 a 4): "))
            end_year = int(input("Ingrese el A√ëO final (e.g., 2025): "))
            end_quarter = int(input("Ingrese el TRIMESTRE final (1 a 4): "))
            
            if not (1 <= start_quarter <= 4 and 1 <= end_quarter <= 4):
                raise ValueError("El trimestre debe ser un n√∫mero entre 1 y 4.")
            
            start_date = datetime(start_year, start_quarter * 3 - 2, 1)
            end_date = datetime(end_year, end_quarter * 3 - 2, 1)

            if start_date > end_date:
                raise ValueError("El periodo de inicio debe ser anterior o igual al periodo final.")
                
            break
        except ValueError as e:
            print(f"Entrada inv√°lida: {e}. Por favor, intente de nuevo.")
            
    # Generar la secuencia de trimestres
    periodos = []
    current_year = start_year
    current_quarter = start_quarter
    
    while current_year < end_year or (current_year == end_year and current_quarter <= end_quarter):
        
        # Manejo de trimestres hist√≥ricamente faltantes o no oficiales (2020 T1 y T2)
        if current_year == 2020 and current_quarter in [1, 2]:
            print(f"--- ‚ö†Ô∏è Saltando periodo {current_year} T{current_quarter} (No disponible o no oficial). ---")
            # Agregamos el periodo para insertar NaN en la serie final
            periodos.append((current_year, current_quarter))
            
        else:
            periodos.append((current_year, current_quarter))
            
        # Pasar al siguiente trimestre
        if current_quarter == 4:
            current_quarter = 1
            current_year += 1
        else:
            current_quarter += 1
            
    return periodos

# ----------------------------------------------------------------------
# FUNCI√ìN PRINCIPAL DE PROCESAMIENTO TRIMESTRAL
# ----------------------------------------------------------------------

def procesar_trimestre_enoe(year, quarter, file_format='dta'):
    """
    Carga, limpia y calcula indicadores clave a nivel nacional y estatal 
    para un trimestre espec√≠fico, ajustando el ponderador.
    """
    periodo_str = f"{year} T{quarter}"
    print(f"\n--- ‚è≥ Procesando: {periodo_str} ---")

    # --- 1. Obtener Ruta y Ponderador Din√°mico ---
    file_path = obtener_nombre_archivo(year, quarter, file_format)
    
    # üî¥ L√≥gica del Ponderador Ajustada (Punto 1 del usuario)
    is_fac_tri_period = (
        (year == 2020 and quarter >= 3) or
        (year >= 2021)
        #(year == 2022) or
        #(year >= 2023)
    )
    
    PONDERATOR = 'fac_tri' if is_fac_tri_period else 'fac'
    
    if file_path is None:
        return None, None
        
    # --- 2. Carga de Datos y Manejo de Errores (Debugging) ---
    if not os.path.exists(file_path):
        print(f"‚ùå Error Cr√≠tico: Archivo no encontrado en la ruta ajustada: {file_path}")
        return None, None
    
    try:
        df = pd.read_stata(file_path, convert_categoricals=False) 
        
        if df.empty:
            print(f"‚ùå Error de Carga: Archivo encontrado, pero vac√≠o: {file_path}")
            return None, None
            
        print(f"‚úÖ Archivo cargado exitosamente. {len(df):,} registros. Ponderador: {PONDERATOR.upper()}")
        
    except Exception as e:
        print(f"‚ùå Ocurri√≥ un error de lectura de datos en {file_path}: {e}")
        return None, None

    # --- 3. Limpieza y Preparaci√≥n de Datos ---
    
    # Normalizar los nombres de columnas a min√∫sculas para asegurar la compatibilidad
    df.columns = df.columns.str.lower()
    
    # Variables requeridas para los c√°lculos (se utiliza el nombre en min√∫sculas)
    columnas_requeridas = [
        PONDERATOR, 'r_def', 'c_res', 'ent', 'sex', 'eda', 'clase1', 'clase2', 
        'pos_ocu', 'emp_ppal', 'sub_o', 'ingocup', 'ing_x_hrs'
    ]
    
    for col in columnas_requeridas:
        if col not in df.columns:
            if col == PONDERATOR:
                 print(f"‚ùå Error Cr√≠tico: Columna de ponderador '{PONDERATOR}' no encontrada en el DataFrame.")
                 return None, None
            
            # Se a√±aden columnas faltantes con 0/NaN
            default_val = 0 if col in ['r_def', 'c_res', 'clase1', 'clase2', 'pos_ocu', 'emp_ppal', 'sub_o', 'ent', 'sex', 'eda'] else np.nan
            df[col] = default_val
            print(f"‚ö†Ô∏è Columna '{col}' no encontrada. Se a√±adi√≥ con valor por defecto.")

    # Conversi√≥n de tipos de datos esenciales
    df['r_def'] = df['r_def'].astype(str).str.strip()
    for col in ['sex', 'eda', 'clase1', 'clase2', 'c_res', 'ent', 'pos_ocu', 'emp_ppal', 'sub_o']:
         # Intentamos convertir a entero, usando -1 para valores no v√°lidos (NaN/errores)
         df[col] = pd.to_numeric(df[col], errors='coerce').fillna(-1).astype(int)
    for col in ['ingocup', 'ing_x_hrs', PONDERATOR]:
         df[col] = pd.to_numeric(df[col], errors='coerce')


    # CRITERIO GENERAL DE FILTRADO (POBLACI√ìN DE 15 A√ëOS Y M√ÅS)
    
    # 1. Poblaci√≥n total residente
    # ‚ö†Ô∏è Ajuste: Uso '00' como c√≥digo est√°ndar para r_def (residente habitual presente)
    df_base = df[(df['r_def'] == '0.0') & (df['c_res'].isin([1, 3]))].copy()

    # 2. Poblaci√≥n en Edad de Trabajar (PET) 15 a√±os y m√°s
    df_15_y_mas = df_base[df_base['eda'].between(15, 98)].copy()
    
    if df_15_y_mas.empty:
        print("‚ùå Error de Filtro: No se encontraron registros v√°lidos despu√©s del filtro PET.")
        return None, None
    
    # Asignaci√≥n de nombres de estado
    df_base['ent_nombre'] = df_base['ent'].map(ENTIDADES)
    df_15_y_mas['ent_nombre'] = df_15_y_mas['ent'].map(ENTIDADES)
    
    # ------------------------------------------------------------------
    # --- 4. C√ÅLCULOS A NIVEL NACIONAL ---
    # ------------------------------------------------------------------
    
    # Subconjuntos basados en campos precodificados y el criterio general
    df_pea = df_15_y_mas[df_15_y_mas['clase1'] == 1].copy()      
    df_pnea = df_15_y_mas[df_15_y_mas['clase1'] == 2].copy()
    df_ocupada = df_15_y_mas[df_15_y_mas['clase2'] == 1].copy() 

    datos_nacional = {
        'year': year, 'quarter': quarter,
        
        # 1. Poblaci√≥n
        'pob_total': df_base[PONDERATOR].sum(),
        'pob_15_y_mas': df_15_y_mas[PONDERATOR].sum(), 
        'pob_hombres_total': df_base[df_base['sex'] == 1][PONDERATOR].sum(),
        'pob_mujeres_total': df_base[df_base['sex'] == 2][PONDERATOR].sum(),
        
        # 2. PEA y PNEA
        'pea_total': df_pea[PONDERATOR].sum(),
        'pea_hombres': df_pea[df_pea['sex'] == 1][PONDERATOR].sum(),
        'pea_mujeres': df_pea[df_pea['sex'] == 2][PONDERATOR].sum(),
        'pnea_total': df_pnea[PONDERATOR].sum(), 
        
        # 3. Indicadores Estrat√©gicos (CLASE2 y CLASE1)
        'ocupada_total': df_ocupada[PONDERATOR].sum(),
        'desocupada_total': df_15_y_mas[df_15_y_mas['clase2'] == 2][PONDERATOR].sum(),
        'pnea_disponible': df_15_y_mas[df_15_y_mas['clase2'] == 3][PONDERATOR].sum(),
        'pnea_no_disponible': df_15_y_mas[df_15_y_mas['clase2'] == 4][PONDERATOR].sum(),
        
        # 4. Indicadores Estrat√©gicos (POSICI√ìN EN LA OCUPACI√ìN - Ocupados)
        'subordinados_remunerados': df_ocupada[df_ocupada['pos_ocu'] == 1][PONDERATOR].sum(),
        'empleadores': df_ocupada[df_ocupada['pos_ocu'] == 2][PONDERATOR].sum(),
        'cuenta_propia': df_ocupada[df_ocupada['pos_ocu'] == 3][PONDERATOR].sum(),
        'trabajadores_no_remunerados': df_ocupada[df_ocupada['pos_ocu'] == 4][PONDERATOR].sum(),
        
        # 5. Indicadores Estrat√©gicos (CONDICI√ìN DE INFORMALIDAD - Ocupados)
        'ocupacion_formal': df_ocupada[df_ocupada['emp_ppal'] == 2][PONDERATOR].sum(),
        'ocupacion_informal': df_ocupada[df_ocupada['emp_ppal'] == 1][PONDERATOR].sum(),
        
        # 6. Indicador Estrat√©gico (SUBOCUPACI√ìN - Ocupados)
        'subocupacion': df_ocupada[df_ocupada['sub_o'] == 1][PONDERATOR].sum(),
        
        # 7. Ingreso Promedio
        'ing_prom_mes_total': weighted_average(df_ocupada, 'ingocup', PONDERATOR),
        'ing_prom_hora_total': weighted_average(df_ocupada, 'ing_x_hrs', PONDERATOR),
    }
    
    # ------------------------------------------------------------------
    # --- 5. C√ÅLCULOS A NIVEL ESTATAL ---
    # ------------------------------------------------------------------
    
    datos_estatal = defaultdict(list)
    
    for ent_code, ent_name in ENTIDADES.items():
        # Filtros base por Estado
        df_base_est = df_base[df_base['ent'] == ent_code].copy()
        df_15_y_mas_est = df_15_y_mas[df_15_y_mas['ent'] == ent_code].copy()
        
        # Subconjuntos Estatales 
        df_pea_est = df_15_y_mas_est[df_15_y_mas_est['clase1'] == 1].copy()
        df_pnea_est = df_15_y_mas_est[df_15_y_mas_est['clase1'] == 2].copy()
        df_ocupada_est = df_15_y_mas_est[df_15_y_mas_est['clase2'] == 1].copy()
        
        # Recolecci√≥n de datos
        datos_estatal['year'].append(year); datos_estatal['quarter'].append(quarter)
        datos_estatal['ent_code'].append(ent_code); datos_estatal['ent_nombre'].append(ent_name)
        
        # Poblaci√≥n
        datos_estatal['pob_total'].append(df_base_est[PONDERATOR].sum())
        datos_estatal['pob_15_y_mas'].append(df_15_y_mas_est[PONDERATOR].sum())
        datos_estatal['pob_hombres_total'].append(df_base_est[df_base_est['sex'] == 1][PONDERATOR].sum())
        datos_estatal['pob_mujeres_total'].append(df_base_est[df_base_est['sex'] == 2][PONDERATOR].sum())
        
        # PEA y PNEA
        datos_estatal['pea_total'].append(df_pea_est[PONDERATOR].sum())
        datos_estatal['pnea_total'].append(df_pnea_est[PONDERATOR].sum())
        
        # Indicadores Estrat√©gicos (CLASE2 y CLASE1)
        datos_estatal['ocupada_total'].append(df_ocupada_est[PONDERATOR].sum())
        datos_estatal['desocupada_total'].append(df_15_y_mas_est[df_15_y_mas_est['clase2'] == 2][PONDERATOR].sum())
        datos_estatal['pnea_disponible'].append(df_15_y_mas_est[df_15_y_mas_est['clase2'] == 3][PONDERATOR].sum())
        datos_estatal['pnea_no_disponible'].append(df_15_y_mas_est[df_15_y_mas_est['clase2'] == 4][PONDERATOR].sum())
        
        # Indicadores Estrat√©gicos (POSICI√ìN EN LA OCUPACI√ìN - Ocupados)
        datos_estatal['subordinados_remunerados'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 1][PONDERATOR].sum())
        datos_estatal['empleadores'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 2][PONDERATOR].sum())
        datos_estatal['cuenta_propia'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 3][PONDERATOR].sum())
        datos_estatal['trabajadores_no_remunerados'].append(df_ocupada_est[df_ocupada_est['pos_ocu'] == 4][PONDERATOR].sum())

        # Indicadores Estrat√©gicos (CONDICI√ìN DE INFORMALIDAD - Ocupados)
        datos_estatal['ocupacion_formal'].append(df_ocupada_est[df_ocupada_est['emp_ppal'] == 2][PONDERATOR].sum())
        datos_estatal['ocupacion_informal'].append(df_ocupada_est[df_ocupada_est['emp_ppal'] == 1][PONDERATOR].sum())
        
        # Indicador Estrat√©gico (SUBOCUPACI√ìN - Ocupados)
        datos_estatal['subocupacion'].append(df_ocupada_est[df_ocupada_est['sub_o'] == 1][PONDERATOR].sum())

        # Ingreso Promedio
        datos_estatal['ing_prom_mes_total'].append(weighted_average(df_ocupada_est, 'ingocup', PONDERATOR))
        datos_estatal['ing_prom_hora_total'].append(weighted_average(df_ocupada_est, 'ing_x_hrs', PONDERATOR))

    df_estatal_trimestre = pd.DataFrame(datos_estatal)
    
    return pd.Series(datos_nacional), df_estatal_trimestre

# ----------------------------------------------------------------------
# EJECUCI√ìN DEL SCRIPT Y CONSOLIDACI√ìN DE SERIES DE TIEMPO
# ----------------------------------------------------------------------

if __name__ == "__main__":
    
    periodos = pedir_rango_trimestral()
    
    # Inicializaci√≥n para la consolidaci√≥n
    resultados_nacionales = []
    resultados_estatales = []

    print(f"\n===========================================================")
    print(f"  INICIANDO PROCESAMIENTO DE {len(periodos)} TRIMESTRES")
    print(f"  Ajustando ponderador y nombres de archivos seg√∫n el periodo.")
    print(f"===========================================================")

    # Bucle principal para procesar cada trimestre
    for year, quarter in periodos:
        
        df_nacional, df_estatal = procesar_trimestre_enoe(year, quarter)
        
        # Asumiendo que el primer DataFrame/Serie exitoso establece la estructura
        if not resultados_nacionales and df_nacional is not None:
             estructura_nacional = df_nacional.index
             estructura_estatal = df_estatal.columns
        
        if df_nacional is not None and df_estatal is not None:
            resultados_nacionales.append(df_nacional)
            resultados_estatales.append(df_estatal)
        else:
            # Manejo expl√≠cito de trimestres sin datos
            periodo_na = {'year': year, 'quarter': quarter}
            
            # Solo si ya tenemos una estructura, rellenamos con NaN
            if 'estructura_nacional' in locals():
                
                # Serie Nacional con NA
                serie_na_nacional = pd.Series(periodo_na)
                for col in estructura_nacional:
                    if col not in serie_na_nacional:
                         serie_na_nacional[col] = np.nan
                resultados_nacionales.append(serie_na_nacional)
                
                # DataFrame Estatal con NA
                df_na_estatal = pd.DataFrame(periodo_na, index=range(1, 33))
                df_na_estatal['ent_code'] = df_na_estatal.index
                df_na_estatal['ent_nombre'] = df_na_estatal['ent_code'].map(ENTIDADES)
                for col in estructura_estatal:
                    if col not in df_na_estatal.columns:
                        df_na_estatal[col] = np.nan
                resultados_estatales.append(df_na_estatal)
                
                # El mensaje ya indica que se agreg√≥ el NA/NaN y prosigue
                if df_nacional is None: 
                   print(f"--- üö´ Se agreg√≥ NA/NaN para {year} T{quarter} y se prosigue. ---")
            else:
                 print(f"--- üö´ No se pudo establecer la estructura de datos. Omitiendo {year} T{quarter}. ---")
            

    # --- 6. CONSOLIDACI√ìN Y GUARDADO DE BASES DE DATOS (Punto 4 del usuario) ---

    if resultados_nacionales:
        # 1. Serie de Tiempo Nacional
        df_serie_nacional = pd.DataFrame(resultados_nacionales).reset_index(drop=True)
        df_serie_nacional['periodo'] = df_serie_nacional['year'].astype(str) + '-T' + df_serie_nacional['quarter'].astype(str)
        df_serie_nacional.set_index('periodo', inplace=True)
        
        # 2. Serie de Tiempo Estatal
        df_serie_estatal = pd.concat(resultados_estatales, ignore_index=True)
        df_serie_estatal['periodo'] = df_serie_estatal['year'].astype(str) + '-T' + df_serie_estatal['quarter'].astype(str)
        
        # 3. Guardar las bases de datos en el directorio ra√≠z
        today = datetime.now().strftime("%Y%m%d")
        
        file_name_nacional = f"{today}_Nacional.csv"
        file_name_estatal = f"{today}_Estados.csv"
        
        # Se guardan como CSV para f√°cil manejo
        df_serie_nacional.to_csv(file_name_nacional, index=True)
        df_serie_estatal.to_csv(file_name_estatal, index=False)

        print("\n===========================================================")
        print("      ‚úÖ BASES DE DATOS CREADAS Y GUARDADAS")
        print("===========================================================")
        print(f"Serie Nacional: {file_name_nacional}")
        print(df_serie_nacional.head())
        print("-" * 50)
        print(f"Serie Estatal: {file_name_estatal}")
        print(df_serie_estatal.head())
        print("===========================================================")
        
    else:
        print("\n===========================================================")
        print("      ‚ùå NO SE PUDIERON GENERAR LAS BASES DE DATOS.")
        print("      Aseg√∫rate de que la ruta 'Data/ENOE_dta' y los archivos existan.")
        print("===========================================================")








--- Definici√≥n del Rango de la Serie de Tiempo ---
--- ‚ö†Ô∏è Saltando periodo 2020 T1 (No disponible o no oficial). ---
--- ‚ö†Ô∏è Saltando periodo 2020 T2 (No disponible o no oficial). ---

  INICIANDO PROCESAMIENTO DE 82 TRIMESTRES
  Ajustando ponderador y nombres de archivos seg√∫n el periodo.

--- ‚è≥ Procesando: 2005 T1 ---
‚úÖ Archivo cargado exitosamente. 424,007 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2005 T2 ---
‚úÖ Archivo cargado exitosamente. 428,727 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2005 T3 ---
‚úÖ Archivo cargado exitosamente. 421,751 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2005 T4 ---
‚úÖ Archivo cargado exitosamente. 423,757 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2006 T1 ---
‚úÖ Archivo cargado exitosamente. 426,160 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2006 T2 ---
‚úÖ Archivo cargado exitosamente. 424,579 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2006 T3 ---
‚úÖ Archivo cargado exitosamente. 423,305 registros. P

One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(file_path, convert_categoricals=False)


‚úÖ Archivo cargado exitosamente. 407,725 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2009 T2 ---


One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(file_path, convert_categoricals=False)


‚úÖ Archivo cargado exitosamente. 405,529 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2009 T3 ---
‚úÖ Archivo cargado exitosamente. 402,919 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2009 T4 ---
‚úÖ Archivo cargado exitosamente. 403,862 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2010 T1 ---
‚úÖ Archivo cargado exitosamente. 406,797 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2010 T2 ---
‚úÖ Archivo cargado exitosamente. 408,164 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2010 T3 ---
‚úÖ Archivo cargado exitosamente. 405,533 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2010 T4 ---
‚úÖ Archivo cargado exitosamente. 401,524 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2011 T1 ---
‚úÖ Archivo cargado exitosamente. 402,117 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2011 T2 ---
‚úÖ Archivo cargado exitosamente. 400,977 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2011 T3 ---
‚úÖ Archivo cargado exitosamente. 399,716 registros. Ponderador: FAC

--- ‚è≥ Proc

One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(file_path, convert_categoricals=False)


‚úÖ Archivo cargado exitosamente. 392,937 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2013 T2 ---
‚úÖ Archivo cargado exitosamente. 393,107 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2013 T3 ---
‚úÖ Archivo cargado exitosamente. 394,472 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2013 T4 ---
‚úÖ Archivo cargado exitosamente. 400,354 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2014 T1 ---
‚úÖ Archivo cargado exitosamente. 404,014 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2014 T2 ---
‚úÖ Archivo cargado exitosamente. 406,088 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2014 T3 ---
‚úÖ Archivo cargado exitosamente. 405,803 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2014 T4 ---
‚úÖ Archivo cargado exitosamente. 404,640 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2015 T1 ---
‚úÖ Archivo cargado exitosamente. 404,432 registros. Ponderador: FAC

--- ‚è≥ Procesando: 2015 T2 ---
‚úÖ Archivo cargado exitosamente. 403,865 registros. Ponderador: FAC

--- ‚è≥ Proc

In [4]:
import pandas as pd


#file_path = "Data\ENOE_dta\ENOE_2020_3\ENOEN_SDEMT320.dta"
#file_path = "Data/ENOE_dta/ENOE_2021_1/ENOE_SDEMT121.dta"
#file_path = "Data\ENOE_dta\ENOE_2021_1\ENOEN_SDEMT121.dta" #fac_tri

#file_path = "Data\ENOE_dta\ENOE_2022_1\ENOEN_SDEMT122.dta"  #fac_tri
 


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10) # Mostrar hasta 10 filas para la previsualizaci√≥n
pd.set_option('display.width', 1000)

temp = pd.read_stata(file_path, convert_categoricals=False)

temp



Unnamed: 0,r_def,loc,mun,est,est_d_tri,est_d_men,ageb,t_loc_tri,t_loc_men,cd_a,ent,con,upm,d_sem,n_pro_viv,v_sel,n_hog,h_mud,n_ent,per,n_ren,c_res,par_c,sex,eda,nac_dia,nac_mes,nac_anio,l_nac_c,cs_p12,cs_p13_1,cs_p13_2,cs_p14_c,cs_p15,cs_p16,cs_p17,n_hij,e_con,cs_p20a_1,cs_p20a_c,cs_p20b_1,cs_p20b_c,cs_p20c_1,cs_ad_mot,cs_p21_des,cs_ad_des,cs_nr_mot,cs_p23_des,cs_nr_ori,ur,zona,salario,fac_tri,fac_men,clase1,clase2,clase3,pos_ocu,seg_soc,rama,c_ocu11c,ing7c,dur9c,emple7c,medica5c,buscar5c,rama_est1,rama_est2,dur_est,ambito1,ambito2,tue1,tue2,tue3,busqueda,d_ant_lab,d_cexp_est,dur_des,sub_o,s_clasifi,remune2c,pre_asa,tip_con,dispo,nodispo,c_inac5c,pnea_est,niv_ins,eda5c,eda7c,eda12c,eda19c,hij5c,domestico,anios_esc,hrsocup,ingocup,ing_x_hrs,tpg_p8a,tcco,cp_anoc,imssissste,ma48me1sm,p14apoyos,scian,t_tra,emp_ppal,tue_ppal,trans_ppal,mh_fil2,mh_col,sec_ins,tipo,mes_cal
0,0.0,,2.0,10.0,122.0,,0.0,1.0,,1.0,9.0,40001.0,900987.0,101.0,11.0,1.0,1.0,0.0,3.0,122.0,1.0,1.0,101.0,1.0,42.0,20.0,6.0,1979.0,9.0,1.0,7.0,4.0,71300,3.0,1.0,2.0,,6.0,1.0,9.0,1.0,2.0,,,,,,,,1.0,2.0,5186.0,1977.0,0.0,1.0,1.0,1.0,1.0,1.0,3.0,6.0,4.0,4.0,5.0,3.0,4.0,3.0,5.0,3.0,2.0,5.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,4.0,2.0,4.0,6.0,11.0,0.0,3.0,16.0,32.0,16000.0,116.27907,0.0,0.0,0.0,1.0,0.0,2.0,6.0,1.0,2.0,2.0,0.0,3.0,2.0,2.0,1.0,96.0
1,0.0,,2.0,10.0,122.0,,0.0,1.0,,1.0,9.0,40001.0,900987.0,101.0,105.0,4.0,1.0,0.0,3.0,122.0,1.0,1.0,101.0,2.0,26.0,28.0,9.0,1995.0,15.0,1.0,7.0,5.0,33100,3.0,1.0,2.0,0.0,6.0,1.0,9.0,1.0,2.0,,,,,,,,1.0,2.0,5186.0,1977.0,0.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,7.0,4.0,7.0,3.0,4.0,3.0,8.0,3.0,3.0,0.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0,3.0,8.0,1.0,3.0,17.0,32.0,0.0,0.00000,0.0,0.0,0.0,1.0,0.0,2.0,12.0,1.0,2.0,2.0,0.0,3.0,2.0,2.0,1.0,96.0
2,0.0,,2.0,10.0,122.0,,0.0,1.0,,1.0,9.0,40001.0,900987.0,101.0,105.0,4.0,1.0,0.0,3.0,122.0,2.0,1.0,501.0,2.0,28.0,1.0,11.0,1993.0,15.0,1.0,7.0,4.0,33100,3.0,1.0,2.0,0.0,6.0,1.0,9.0,1.0,2.0,,,,,,,,1.0,2.0,5186.0,1977.0,0.0,1.0,1.0,1.0,2.0,2.0,4.0,1.0,7.0,4.0,2.0,1.0,4.0,3.0,8.0,3.0,2.0,3.0,1.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0,3.0,8.0,1.0,2.0,16.0,32.0,0.0,0.00000,0.0,0.0,0.0,4.0,0.0,2.0,12.0,1.0,2.0,2.0,0.0,3.0,6.0,4.0,1.0,96.0
3,0.0,,2.0,10.0,122.0,,0.0,1.0,,1.0,9.0,40001.0,900987.0,101.0,105.0,4.0,1.0,0.0,3.0,122.0,3.0,1.0,501.0,2.0,29.0,99.0,99.0,1992.0,15.0,1.0,7.0,4.0,71700,3.0,1.0,2.0,0.0,6.0,1.0,9.0,1.0,2.0,,,,,,,,1.0,2.0,5186.0,1977.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,6.0,7.0,4.0,6.0,3.0,4.0,2.0,3.0,3.0,2.0,6.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0,3.0,8.0,1.0,3.0,16.0,32.0,0.0,0.00000,0.0,0.0,0.0,1.0,0.0,2.0,5.0,1.0,2.0,2.0,0.0,3.0,2.0,2.0,1.0,96.0
4,0.0,,10.0,30.0,124.0,113.0,0.0,1.0,1.0,1.0,9.0,40002.0,912033.0,104.0,33.0,2.0,1.0,0.0,3.0,122.0,1.0,1.0,101.0,2.0,60.0,12.0,9.0,1961.0,9.0,1.0,3.0,3.0,,,,2.0,2.0,3.0,1.0,9.0,1.0,10.0,,,,,,,,1.0,2.0,5186.0,2129.0,13198.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,2.0,6.0,6.0,3.0,4.0,3.0,8.0,4.0,2.0,5.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,5.0,0.0,0.0,0.0,0.0,3.0,3.0,6.0,10.0,15.0,2.0,3.0,9.0,40.0,0.0,0.00000,0.0,0.0,0.0,1.0,0.0,2.0,14.0,1.0,2.0,2.0,0.0,3.0,2.0,2.0,2.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
403647,0.0,,93.0,30.0,198.0,172.0,0.0,2.0,2.0,82.0,14.0,50007.0,1411488.0,102.0,302.0,19.0,1.0,0.0,3.0,122.0,3.0,2.0,,,,,,,,,,,,,,,,,,,,,,3.0,,1.0,,,,2.0,2.0,5186.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
403648,0.0,,13.0,30.0,351.0,313.0,0.0,1.0,1.0,81.0,24.0,50012.0,2400037.0,201.0,86.0,1.0,1.0,0.0,2.0,122.0,5.0,2.0,,,,,,,,,,,,,,,,,,,,,,10.0,YA SE REGRESO A SU CASA PORQUE YA NO CUIDA DE SUS,1.0,,,,2.0,2.0,5186.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
403649,0.0,,21.0,20.0,269.0,237.0,0.0,1.0,1.0,3.0,19.0,40455.0,1902162.0,510.0,29.0,3.0,1.0,0.0,4.0,122.0,1.0,2.0,,,,,,,,,,,,,,,,,,,,,,10.0,SE PELEO CON SU MARIDO,2.0,,,,1.0,2.0,5186.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0
403650,0.0,,53.0,20.0,238.0,209.0,0.0,3.0,3.0,85.0,16.0,50058.0,1605074.0,403.0,321.0,5.0,1.0,0.0,5.0,122.0,8.0,2.0,,,,,,,,,,,,,,,,,,,,,,6.0,,1.0,,,,2.0,2.0,5186.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0


In [13]:
temp

Unnamed: 0,r_def,loc,mun,est,est_d_tri,est_d_men,ageb,t_loc_tri,t_loc_men,cd_a,ent,con,upm,d_sem,n_pro_viv,v_sel,n_hog,h_mud,n_ent,per,n_ren,c_res,par_c,sex,eda,nac_dia,nac_mes,nac_anio,l_nac_c,cs_p12,cs_p13_1,cs_p13_2,cs_p14_c,cs_p15,cs_p16,cs_p17,n_hij,e_con,cs_ad_mot,cs_p20_des,cs_ad_des,cs_nr_mot,cs_p22_des,cs_nr_ori,ur,zona,salario,fac_tri,fac_men,clase1,clase2,clase3,pos_ocu,seg_soc,rama,c_ocu11c,ing7c,dur9c,emple7c,medica5c,buscar5c,rama_est1,rama_est2,dur_est,ambito1,ambito2,tue1,tue2,tue3,busqueda,d_ant_lab,d_cexp_est,dur_des,sub_o,s_clasifi,remune2c,pre_asa,tip_con,dispo,nodispo,c_inac5c,pnea_est,niv_ins,eda5c,eda7c,eda12c,eda19c,hij5c,domestico,anios_esc,hrsocup,ingocup,ing_x_hrs,tpg_p8a,tcco,cp_anoc,imssissste,ma48me1sm,p14apoyos,scian,t_tra,emp_ppal,tue_ppal,trans_ppal,mh_fil2,mh_col,sec_ins,tipo,mes_cal,ca
0,0.0,,10.0,30.0,124.0,,0.0,1.0,,1.0,9.0,40002.0,912010.0,101.0,41.0,1.0,1.0,0.0,2.0,320.0,1.0,1.0,101.0,1.0,34.0,11.0,8.0,1985.0,9.0,1.0,7.0,4.0,5713,3.0,1.0,2.0,,5.0,,,,,,,1.0,2.0,3697.0,981.0,0.0,1.0,1.0,1.0,3.0,2.0,4.0,1.0,7.0,6.0,1.0,1.0,2.0,3.0,9.0,4.0,2.0,3.0,3.0,5.0,0.0,2.0,0.0,0.0,0.0,1.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,3.0,4.0,9.0,0.0,3.0,16.0,48.0,0.0,0.0,1.0,0.0,0.0,4.0,0.0,0.0,16.0,1.0,1.0,1.0,0.0,1.0,7.0,8.0,1.0,98.0,2.0
1,0.0,,10.0,30.0,124.0,,0.0,1.0,,1.0,9.0,40002.0,912010.0,101.0,41.0,1.0,1.0,0.0,2.0,320.0,2.0,1.0,201.0,2.0,30.0,20.0,9.0,1989.0,9.0,1.0,7.0,4.0,5713,3.0,1.0,2.0,1.0,5.0,,,,,,,1.0,2.0,3697.0,981.0,0.0,1.0,2.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,3.0,4.0,9.0,2.0,3.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,98.0,2.0
2,0.0,,10.0,30.0,124.0,,0.0,1.0,,1.0,9.0,40002.0,912010.0,101.0,41.0,1.0,1.0,0.0,2.0,320.0,3.0,1.0,301.0,2.0,9.0,1.0,2.0,2011.0,9.0,1.0,2.0,3.0,,,,1.0,,,,,,,,,1.0,2.0,3697.0,981.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,98.0,2.0
3,0.0,,10.0,30.0,124.0,,0.0,1.0,,1.0,9.0,40002.0,912010.0,101.0,64.0,2.0,1.0,0.0,2.0,320.0,1.0,1.0,101.0,2.0,65.0,99.0,5.0,1955.0,9.0,1.0,8.0,2.0,7311,,1.0,2.0,1.0,6.0,,,,,,,1.0,2.0,3697.0,981.0,0.0,1.0,1.0,1.0,1.0,1.0,4.0,1.0,5.0,5.0,6.0,3.0,4.0,3.0,9.0,4.0,2.0,5.0,2.0,4.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,4.0,4.0,6.0,11.0,16.0,2.0,3.0,18.0,37.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,16.0,1.0,2.0,2.0,0.0,3.0,2.0,7.0,1.0,98.0,2.0
4,0.0,,10.0,30.0,124.0,,0.0,1.0,,1.0,9.0,40002.0,912010.0,101.0,64.0,2.0,1.0,0.0,2.0,320.0,2.0,1.0,301.0,2.0,30.0,99.0,9.0,1989.0,9.0,1.0,7.0,3.0,5532,3.0,2.0,1.0,0.0,6.0,,,,,,,1.0,2.0,3697.0,981.0,0.0,1.0,2.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,3.0,4.0,9.0,1.0,2.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,98.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296869,15.0,,39.0,20.0,436.0,313.0,0.0,1.0,1.0,30.0,30.0,42263.0,3000869.0,405.0,20.0,1.0,1.0,0.0,4.0,320.0,1.0,1.0,101.0,1.0,49.0,99.0,12.0,1970.0,30.0,1.0,7.0,4.0,5341,3.0,1.0,2.0,,5.0,,,,,,,1.0,2.0,3697.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,7.0,2.0
296870,15.0,,59.0,20.0,454.0,441.0,0.0,2.0,2.0,8.0,31.0,40394.0,3102836.0,113.0,24.0,5.0,1.0,0.0,2.0,320.0,2.0,1.0,201.0,2.0,59.0,4.0,12.0,1960.0,31.0,1.0,2.0,6.0,,,,2.0,2.0,5.0,,,,,,,1.0,2.0,3697.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,9.0,2.0
296871,15.0,,39.0,40.0,270.0,259.0,0.0,1.0,1.0,3.0,19.0,40404.0,1906143.0,409.0,14.0,3.0,1.0,0.0,4.0,320.0,3.0,1.0,301.0,2.0,7.0,99.0,99.0,2013.0,19.0,1.0,1.0,2.0,,,,1.0,,,,,,,,,1.0,2.0,3697.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,8.0,2.0
296872,15.0,,3.0,20.0,48.0,47.0,0.0,1.0,1.0,42.0,4.0,41263.0,400992.0,413.0,115.0,5.0,1.0,0.0,4.0,320.0,7.0,1.0,413.0,2.0,5.0,26.0,2.0,2015.0,4.0,2.0,2.0,1.0,,,,1.0,,,,,,,,,1.0,2.0,3697.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,9.0,2.0


In [None]:
# deflactor_mexico = [
#    100.00, 100.52, 101.25, 102.34, 103.11, 103.22, 104.13, 105.47,
#    106.51, 106.68, 107.82, 109.43, 110.60, 111.64, 113.14, 115.02,
#    116.38, 116.44, 117.15, 118.06, 119.92, 120.15, 120.91, 122.37,
#    123.36, 123.68, 124.58, 126.11, 127.28, 127.89, 129.23, 131.02,
#    132.22, 132.30, 133.22, 134.41, 135.61, 135.83, 136.91, 138.25,
#    138.86, 138.76, 139.51, 140.24, 141.01, 141.13, 142.17, 143.94,
#    146.12, 147.27, 148.87, 151.10, 153.07, 153.03, 154.51, 156.41,
#    157.65, 158.12, 158.83, 159.87, 161.42, 161.27, 162.59, 164.84,
#    166.92, 168.12, 170.36, 173.84, 176.63, 179.02, 182.26, 185.73,
#    188.76, 188.80, 190.23, 192.51, 195.96, 196.86, 199.11, 201.76,
#    203.68, 205.14
#]
#df_serie_nacional 
# 
# #df_serie_nacional 