In [1]:
import io
import json
import os
import re
from datetime import datetime
from typing import Dict, List, Optional, Tuple

import pandas as pd
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload


# ======================================================
# 1) Credenciales (Colab usa MI_JSON desde userdata; fuera de Colab usa MI_JSON env)
# ======================================================
def get_credentials() -> Credentials:
    """
    - En Colab: usa MI_JSON desde google.colab.userdata (NO os.environ)
    - En GitHub/local: usa variable de entorno MI_JSON
    """
    info: Dict

    try:
        from google.colab import userdata  # type: ignore

        mi_json = userdata.get("MI_JSON")
        if not mi_json:
            raise ValueError("MI_JSON no encontrado en Colab userdata")
        info = json.loads(mi_json)
        print("Entorno detectado: Google Colab")
    except Exception:
        mi_json = os.environ.get("MI_JSON")
        if not mi_json:
            raise ValueError("MI_JSON no encontrado como variable de entorno (GitHub/local)")
        info = json.loads(mi_json)
        print("Entorno detectado: GitHub / local")

    return Credentials.from_service_account_info(
        info,
        scopes=[
            "https://www.googleapis.com/auth/drive.readonly",
            "https://www.googleapis.com/auth/spreadsheets.readonly",
        ],
    )


creds = get_credentials()
drive_service = build("drive", "v3", credentials=creds)


# ======================================================
# 2) Helpers: meses en espa√±ol + parsing del nombre del archivo
# ======================================================
MES_MAP = {
    "ene": 1,
    "feb": 2,
    "mar": 3,
    "abr": 4,
    "may": 5,
    "jun": 6,
    "jul": 7,
    "ago": 8,
    "sep": 9,
    "oct": 10,
    "nov": 11,
    "dic": 12,
}
MES_NOMBRE = {
    1: "Enero",
    2: "Febrero",
    3: "Marzo",
    4: "Abril",
    5: "Mayo",
    6: "Junio",
    7: "Julio",
    8: "Agosto",
    9: "Septiembre",
    10: "Octubre",
    11: "Noviembre",
    12: "Diciembre",
}


def sheet_name_from_date(dt: datetime) -> str:
    """Ej: datetime(2025,12,...) -> 'Diciembre 2025' """
    return f"{MES_NOMBRE[dt.month]} {dt.year}"


def parse_range_from_filename(name: str) -> Optional[Tuple[int, int, int, int]]:
    """
    Espera nombres tipo:
      'Asignaciones de Cartera Ene26-Abr26.xlsx'
      'Asignaciones de Cartera Sep25-Dic25.xlsx'
    Retorna (start_year, start_month, end_year, end_month) o None si no matchea.
    """
    m = re.search(r"([A-Za-z]{3})(\d{2})\s*-\s*([A-Za-z]{3})(\d{2})", name, flags=re.IGNORECASE)
    if not m:
        return None

    m1, y1, m2, y2 = m.group(1).lower(), m.group(2), m.group(3).lower(), m.group(4)
    if m1 not in MES_MAP or m2 not in MES_MAP:
        return None

    start_month = MES_MAP[m1]
    end_month = MES_MAP[m2]
    start_year = 2000 + int(y1)
    end_year = 2000 + int(y2)

    return (start_year, start_month, end_year, end_month)


def month_index(year: int, month: int) -> int:
    """Convierte (year, month) a √≠ndice comparable."""
    return year * 12 + month


def file_covers_month(file_range: Tuple[int, int, int, int], target_dt: datetime) -> bool:
    sy, sm, ey, em = file_range
    t = month_index(target_dt.year, target_dt.month)
    a = month_index(sy, sm)
    b = month_index(ey, em)
    return a <= t <= b


def is_df_empty_like(df: Optional[pd.DataFrame]) -> bool:
    """
    Considera 'vac√≠a' si:
    - df es None
    - df tiene 0 filas
    - o todas las filas est√°n completamente NaN
    """
    if df is None or df.shape[0] == 0:
        return True
    return df.dropna(how="all").shape[0] == 0


# ======================================================
# 3) Listar archivos en carpeta y elegir el correcto por mes
# ======================================================
def list_assignment_files_in_folder(folder_id: str) -> List[Dict]:
    """
    Lista archivos en la carpeta cuyo nombre contenga 'Asignaciones de Cartera'
    y devuelve una lista con metadatos: id, name, mimeType, modifiedTime, parsed_range
    """
    q = f"'{folder_id}' in parents and trashed=false and name contains 'Asignaciones de Cartera'"
    files: List[Dict] = []
    page_token = None

    while True:
        resp = (
            drive_service.files()
            .list(q=q, fields="nextPageToken, files(id,name,mimeType,modifiedTime)", pageToken=page_token)
            .execute()
        )

        for f in resp.get("files", []):
            fr = parse_range_from_filename(f.get("name", ""))
            if fr:
                f["parsed_range"] = fr
                files.append(f)

        page_token = resp.get("nextPageToken")
        if not page_token:
            break

    if not files:
        raise ValueError(
            "No encontr√© archivos 'Asignaciones de Cartera' con rango tipo Ene26-Abr26 dentro de la carpeta."
        )

    return files


def pick_file_for_month(files_meta: List[Dict], target_dt: datetime) -> Dict:
    """
    Escoge el archivo cuya ventana (en el nombre) cubra el mes target_dt.
    Si hay varios, elige el de rango m√°s corto (m√°s espec√≠fico) y si empatan, el m√°s reciente.
    """
    candidates = []
    for f in files_meta:
        fr = f["parsed_range"]
        if file_covers_month(fr, target_dt):
            sy, sm, ey, em = fr
            span = month_index(ey, em) - month_index(sy, sm)
            candidates.append((span, f.get("modifiedTime", ""), f))

    if candidates:
        candidates.sort(key=lambda x: (x[0], x[1]))  # menor span, luego por modifiedTime asc
        min_span = candidates[0][0]
        same_span = [c for c in candidates if c[0] == min_span]
        same_span.sort(key=lambda x: x[1], reverse=True)  # m√°s reciente primero
        return same_span[0][2]

    t = month_index(target_dt.year, target_dt.month)

    past = []
    for f in files_meta:
        sy, sm, ey, em = f["parsed_range"]
        end_i = month_index(ey, em)
        if end_i <= t:
            past.append((end_i, f.get("modifiedTime", ""), f))
    if past:
        past.sort(key=lambda x: (x[0], x[1]), reverse=True)
        return past[0][2]

    future = []
    for f in files_meta:
        sy, sm, ey, em = f["parsed_range"]
        start_i = month_index(sy, sm)
        if start_i >= t:
            future.append((start_i, f.get("modifiedTime", ""), f))
    if future:
        future.sort(key=lambda x: (x[0], x[1]))
        return future[0][2]

    raise ValueError("No se pudo escoger un archivo por fecha (revisa nombres/rangos).")


# ======================================================
# 4) Descargar archivo (Google Sheets o Excel) a memoria
# ======================================================
def download_file_to_buffer(file_id: str, mime_type: str) -> io.BytesIO:
    buffer = io.BytesIO()

    if mime_type == "application/vnd.google-apps.spreadsheet":
        request = drive_service.files().export_media(
            fileId=file_id,
            mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
        )
    else:
        request = drive_service.files().get_media(fileId=file_id)

    downloader = MediaIoBaseDownload(buffer, request)
    done = False
    while not done:
        _, done = downloader.next_chunk()

    buffer.seek(0)
    return buffer


# ======================================================
# 5) Intentar leer hoja del mes target; si est√° vac√≠a -> retroceder mes a mes
# ======================================================
def shift_month(dt: datetime, n: int) -> datetime:
    """Mueve dt n meses (n puede ser negativo)."""
    y = dt.year + (dt.month - 1 + n) // 12
    m = (dt.month - 1 + n) % 12 + 1
    return datetime(y, m, 1)


def load_assignment_base_from_folder(
    folder_id: str, max_back_months: int = 24
) -> Tuple[pd.DataFrame, Dict, str]:
    """
    Busca base del mes actual; si no existe o est√° vac√≠a,
    busca mes anterior (incluyendo cambio de archivo si aplica).
    Retorna: (df, file_meta, sheet_name_usada)
    """
    files_meta = list_assignment_files_in_folder(folder_id)
    today = datetime.today()

    last_error: Optional[Exception] = None

    for back in range(0, max_back_months + 1):
        target_dt = shift_month(today, -back)
        target_sheet = sheet_name_from_date(target_dt)
        chosen = pick_file_for_month(files_meta, target_dt)

        try:
            buffer = download_file_to_buffer(chosen["id"], chosen["mimeType"])
            df = pd.read_excel(buffer, sheet_name=target_sheet, engine="openpyxl")

            if is_df_empty_like(df):
                print(f"üü° {target_sheet} encontrado pero vac√≠o en: {chosen['name']} -> probando mes anterior...")
                continue

            print("‚úÖ Base encontrada")
            print(f"   Archivo: {chosen['name']}")
            print(f"   Hoja:    {target_sheet}")
            return df, chosen, target_sheet

        except Exception as e:
            last_error = e
            print(
                f"üü† No se pudo usar {target_sheet} en {chosen['name']} ({type(e).__name__}) -> probando mes anterior..."
            )

    raise RuntimeError(
        f"No encontr√© una hoja v√°lida en los √∫ltimos {max_back_months} meses. "
        f"√öltimo error: {repr(last_error)}"
    )


# ======================================================
# 6) EJECUCI√ìN
# ======================================================
FOLDER_ID = "1cf2p3R7iM0xowAt4muEruDwxZoZqD_jB"

df, meta_file, sheet_used = load_assignment_base_from_folder(
    folder_id=FOLDER_ID,
    max_back_months=24,
)

df.head()

Entorno detectado: GitHub / local


üü° Febrero 2026 encontrado pero vac√≠o en: Asignaciones de Cartera Ene26-Abr26.xlsx -> probando mes anterior...


‚úÖ Base encontrada
   Archivo: Asignaciones de Cartera Ene26-Abr26.xlsx
   Hoja:    Enero 2026


Unnamed: 0,Referencia,Cedula,Nombre del cliente,Id deuda,correo,Deudas Activas,BANCO,N√∫mero de Cr√©dito,Deuda Resuelve,DBT,...,I_ESPERADO,Negociador,Tipo Elegible,P_Cierre_norm,D√≠as Atraso liquidacion,Estado PL,Ultima Liquidacion,dias_desde_ultima_liq,Priority_level,Bucket
0,3012650,52216213,CLAUDIA PATRICIA AVENDANO CALDERON,111266,claudiapa426@hotmail.com,2,Scotiabank Citibank,318121778,9256000.0,58255000.0,...,125415.711006,Negodito,,0.633814,,Al d√≠a,NaT,,Prioridad 2,
1,3122962933,50868603,Luz Nelly Betancourt Ortega,225561,zulyllen@hotmail.com,3,Banco de Occidente,540625-6-730934-608 VQ,2138165.0,11396681.0,...,12585.582824,William Santiago Abril Esguerra,,0.392411,2162.0,Atrasado,NaT,,Prioridad 1,10.0
2,3122962933,50868603,Luz Nelly Betancourt Ortega,225562,zulyllen@hotmail.com,3,Banco de Occidente,417899-4-107487-516 VQ,3299116.0,11396681.0,...,18849.499266,William Santiago Abril Esguerra,,0.340089,1796.0,Atrasado,NaT,,Prioridad 1,10.0
3,3122962933,50868603,Luz Nelly Betancourt Ortega,225563,zulyllen@hotmail.com,3,√âxito,745089099,5959400.0,11396681.0,...,32976.81479,William Santiago Abril Esguerra,,0.274687,1342.0,Atrasado,NaT,,Prioridad 1,10.0
4,3196892494,52902886,Diana Marcela Suarez,404243,marcelasuarezladino@outlook.com,5,Banco Popular,346383,1921650.0,10126350.0,...,9581.3469,Dayana Isabel Ojito Ortiz,,0.34625,1173.0,Atrasado,NaT,,Prioridad 1,


In [2]:
df = df.rename(columns={"Deuda Resuelve": "D_BRAVO"})

In [3]:
df.columns

Index(['Referencia', 'Cedula', 'Nombre del cliente', 'Id deuda', 'correo',
       'Deudas Activas', 'BANCO', 'N√∫mero de Cr√©dito', 'D_BRAVO', 'DBT',
       'MORA', 'Dias de Atraso', 'Apartado Mensual', 'Fecha inicio',
       'vehiculo', 'Meses en el Programa', 'tipo_cliente', 'Comisi√≥n Mensual',
       'Tipo de cobro', 'CE', 'Estado Deuda', 'estado_novacion', 'Fecha PL',
       'sub_estado_deuda', 'ID_reparadora', 'estado_reparadora',
       'sub_estado_reparadora', 'dias_mora_ingreso', 'ultima_actividad',
       'PB_PL', 'estado_estructuracion', 'estado_flujo_liquidacion',
       'Ahorro total', 'Ahorro medio', 'Por cobrar', 'BANCOS_ESTANDAR',
       'Descuento', 'ultimo contacto', 'fecha mensaje', 'Mora_estructurado',
       'MORA_CREDITO', 'Potencial Credito', 'ultimo Pab', 'ultima act',
       'Tipo de Liquidacion', 'Clasificaci√≥n Banco', 'Apartados 30%', 'Plazo',
       'Estructurable', 'Potencial', '# Pagos', 'P_Pab', 'S_PAb',
       'Potencial_Estructurados', 'Tipo_Liq_calc',

In [4]:
df

Unnamed: 0,Referencia,Cedula,Nombre del cliente,Id deuda,correo,Deudas Activas,BANCO,N√∫mero de Cr√©dito,D_BRAVO,DBT,...,I_ESPERADO,Negociador,Tipo Elegible,P_Cierre_norm,D√≠as Atraso liquidacion,Estado PL,Ultima Liquidacion,dias_desde_ultima_liq,Priority_level,Bucket
0,3012650,52216213,CLAUDIA PATRICIA AVENDANO CALDERON,111266,claudiapa426@hotmail.com,2,Scotiabank Citibank,318121778,9256000.0,58255000.0,...,125415.711006,Negodito,,0.633814,,Al d√≠a,NaT,,Prioridad 2,
1,3122962933,50868603,Luz Nelly Betancourt Ortega,225561,zulyllen@hotmail.com,3,Banco de Occidente,540625-6-730934-608 VQ,2138165.0,11396681.0,...,12585.582824,William Santiago Abril Esguerra,,0.392411,2162.0,Atrasado,NaT,,Prioridad 1,10.0
2,3122962933,50868603,Luz Nelly Betancourt Ortega,225562,zulyllen@hotmail.com,3,Banco de Occidente,417899-4-107487-516 VQ,3299116.0,11396681.0,...,18849.499266,William Santiago Abril Esguerra,,0.340089,1796.0,Atrasado,NaT,,Prioridad 1,10.0
3,3122962933,50868603,Luz Nelly Betancourt Ortega,225563,zulyllen@hotmail.com,3,√âxito,745089099,5959400.0,11396681.0,...,32976.814790,William Santiago Abril Esguerra,,0.274687,1342.0,Atrasado,NaT,,Prioridad 1,10.0
4,3196892494,52902886,Diana Marcela Suarez,404243,marcelasuarezladino@outlook.com,5,Banco Popular,346383,1921650.0,10126350.0,...,9581.346900,Dayana Isabel Ojito Ortiz,,0.346250,1173.0,Atrasado,NaT,,Prioridad 1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41253,3225833196,1019066241,JUAN DAVID LOZANO TRIVINO,2091518,juan17lt@hotmail.com,4,Davivienda,1891,13695024.0,174795352.0,...,56291.062129,Gabriela Saavedra Latorre,,0.376714,,Al d√≠a,NaT,,Prioridad 2,10.0
41254,3225833196,1019066241,JUAN DAVID LOZANO TRIVINO,2091519,juan17lt@hotmail.com,4,Scotiabank Colpatria,5853,50463309.0,174795352.0,...,187132.142578,Gabriela Saavedra Latorre,,0.247219,,Al d√≠a,NaT,,Prioridad 2,10.0
41255,3225833196,1019066241,JUAN DAVID LOZANO TRIVINO,2091520,juan17lt@hotmail.com,4,Bancolombia,9610,82022139.0,174795352.0,...,285911.490530,Gabriela Saavedra Latorre,,0.247219,,Al d√≠a,NaT,,Prioridad 2,10.0
41256,3105507630,79381039,JUAN CARLOS PACHECO CONTRERAS,2091523,j.pacheco1166@gmail.com,5,Bancolombia,130100335,130935011.0,190332243.0,...,456411.215484,Julian Andres Ossa Bohorquez,,0.247219,,Al d√≠a,NaT,,Prioridad 2,10.0


#Saldos Actualizados

In [5]:
"""
Lee 3 hojas de un Google Sheets con encabezados "corridos" (no en la fila 1),
extrae REFERENCIA y SALDO, agrega columna Vehiculo (POWWI/SKANDIA/COINK),
y concatena todo en un solo DataFrame.

‚úÖ Funciona en:
- Google Colab (usa secreto MI_JSON)
- GitHub / local (usa env MI_JSON o GOOGLE_APPLICATION_CREDENTIALS o service_account.json)

Requisitos:
pip install gspread google-auth pandas
"""

import os
import json
import numpy as np
import pandas as pd
from typing import Union, Dict, Any

import gspread
from google.oauth2.service_account import Credentials


# =========================
# 1) CONFIG
# =========================
SPREADSHEET_ID = "1mvxPdnyp5ip_0Lqyf6qy09BAtX323PF2Yc5-qGoukeU"

SHEETS = [
    ("SALDOS POWWI", "POWWI"),
    ("SALDOS SKANDIA", "SKANDIA"),
    ("SALDOS COINK", "COINK"),
]

SCOPES = [
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive",
]


# =========================
# 2) CREDENCIALES (Colab / GitHub / Local)
# =========================
def _coerce_to_dict(value: Union[str, dict]) -> Dict[str, Any]:
    """Convierte string JSON o dict a dict."""
    if isinstance(value, dict):
        return value
    if isinstance(value, str):
        v = value.strip()

        # Si el secreto/ENV viene con una capa extra de comillas
        if (v.startswith("'") and v.endswith("'")) or (v.startswith('"') and v.endswith('"')):
            # Intentamos quitar solo UNA capa (sin da√±ar JSON normal)
            try:
                maybe = v[1:-1]
                # Si eso parece JSON, √∫salo
                if maybe.strip().startswith("{") and maybe.strip().endswith("}"):
                    v = maybe
            except Exception:
                pass

        return json.loads(v)
    raise TypeError(f"Credenciales en formato no soportado: {type(value)}")


def load_service_account_info() -> Dict[str, Any]:
    """
    Devuelve el dict JSON del service account desde cualquiera de estas fuentes (en orden):
    1) Google Colab secret: MI_JSON (userdata.get)
    2) Variable de entorno: MI_JSON (string JSON)
    3) Variable de entorno: GOOGLE_APPLICATION_CREDENTIALS (path a .json)
    4) Archivo local: service_account.json (si existe)
    """
    # (1) Colab secret
    try:
        from google.colab import userdata  # type: ignore

        mi_json = userdata.get("MI_JSON")
        if mi_json is not None:
            return _coerce_to_dict(mi_json)
    except Exception:
        pass  # no es colab

    # (2) Env var MI_JSON
    env_mi_json = os.getenv("MI_JSON")
    if env_mi_json:
        return _coerce_to_dict(env_mi_json)

    # (3) Path v√≠a GOOGLE_APPLICATION_CREDENTIALS
    gac = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
    if gac and os.path.exists(gac):
        with open(gac, "r", encoding="utf-8") as f:
            return json.load(f)

    # (4) Archivo local por defecto
    default_path = "service_account.json"
    if os.path.exists(default_path):
        with open(default_path, "r", encoding="utf-8") as f:
            return json.load(f)

    raise FileNotFoundError(
        "No encontr√© credenciales. Usa UNA de estas opciones:\n"
        "- Colab: guarda el secreto MI_JSON\n"
        "- GitHub/Local: export MI_JSON='{\"type\":...}'\n"
        "- GitHub/Local: export GOOGLE_APPLICATION_CREDENTIALS='/ruta/key.json'\n"
        "- GitHub/Local: crea service_account.json en el proyecto"
    )


def get_gspread_client() -> gspread.Client:
    info = load_service_account_info()
    creds = Credentials.from_service_account_info(info, scopes=SCOPES)
    return gspread.authorize(creds)


# =========================
# 3) HELPERS: HEADER CORRIDO + SALDO A N√öMERO (CORREGIDO)
# =========================
def _norm(x) -> str:
    return str(x).strip().upper() if x is not None else ""


def _to_number_col(s: pd.Series) -> pd.Series:
    """
    Convierte SALDO desde textos como:
    - 1.057.428         -> 1057428
    - 353.281           -> 353281
    - $5.019.407.320,00 -> 5019407320.00
    - 2723949           -> 2723949
    """
    s = s.astype(str).str.strip()

    # quita $ y espacios
    s = s.str.replace(r"[\$\s]", "", regex=True)

    has_comma = s.str.contains(",", na=False)

    # Si hay coma: formato ES -> miles '.' y decimal ','
    s_comma = (
        s.str.replace(".", "", regex=False)
         .str.replace(",", ".", regex=False)
    )

    # Si NO hay coma: '.' se asume miles (se quita)
    s_nocomma = s.str.replace(".", "", regex=False)

    cleaned = np.where(has_comma, s_comma, s_nocomma)
    return pd.to_numeric(cleaned, errors="coerce")


def read_sheet_with_shifted_header(
    gc: gspread.Client,
    spreadsheet_id: str,
    worksheet_name: str,
    vehiculo_value: str,
    max_scan_rows: int = 300,
) -> pd.DataFrame:
    """
    Lee una hoja donde el header real NO est√° en la fila 1.
    Detecta la fila que contiene 'REFERENCIA' y 'SALDO', arma el DF desde ah√≠.
    Devuelve columnas: REFERENCIA, SALDO, Vehiculo
    """
    sh = gc.open_by_key(spreadsheet_id)
    ws = sh.worksheet(worksheet_name)
    values = ws.get_all_values()
    raw = pd.DataFrame(values)

    header_row_idx = None
    for i in range(min(len(raw), max_scan_rows)):
        row = raw.iloc[i].map(_norm).tolist()
        if "REFERENCIA" in row and "SALDO" in row:
            header_row_idx = i
            break

    if header_row_idx is None:
        raise ValueError(
            f"No pude encontrar una fila de encabezados con 'REFERENCIA' y 'SALDO' en: {worksheet_name}"
        )

    headers = raw.iloc[header_row_idx].map(lambda x: str(x).strip()).tolist()
    df = raw.iloc[header_row_idx + 1 :].copy()
    df.columns = [h.strip() for h in headers]

    # Asegura columnas REFERENCIA y SALDO aunque vengan con case raro
    cols_upper = {c.upper(): c for c in df.columns}
    if "REFERENCIA" not in df.columns and "REFERENCIA" in cols_upper:
        df.rename(columns={cols_upper["REFERENCIA"]: "REFERENCIA"}, inplace=True)
    if "SALDO" not in df.columns and "SALDO" in cols_upper:
        df.rename(columns={cols_upper["SALDO"]: "SALDO"}, inplace=True)

    if "REFERENCIA" not in df.columns or "SALDO" not in df.columns:
        raise ValueError(
            f"En {worksheet_name} detect√© el header pero no quedaron columnas 'REFERENCIA' y 'SALDO'. "
            f"Columnas encontradas: {list(df.columns)}"
        )

    df = df[["REFERENCIA", "SALDO"]].copy()

    # Limpieza b√°sica
    df["REFERENCIA"] = df["REFERENCIA"].astype(str).str.strip()
    df = df[df["REFERENCIA"].ne("")].copy()

    df["SALDO"] = _to_number_col(df["SALDO"])
    df["Vehiculo"] = vehiculo_value

    return df.reset_index(drop=True)


# =========================
# 4) MAIN: LEE + CONCATENA + PRUEBAS R√ÅPIDAS
# =========================
def build_df_saldos_concat() -> pd.DataFrame:
    gc = get_gspread_client()

    dfs = []
    for sheet_name, vehiculo in SHEETS:
        df_i = read_sheet_with_shifted_header(
            gc=gc,
            spreadsheet_id=SPREADSHEET_ID,
            worksheet_name=sheet_name,
            vehiculo_value=vehiculo,
        )
        dfs.append(df_i)

    df_saldos_concat = pd.concat(dfs, ignore_index=True)[["REFERENCIA", "SALDO", "Vehiculo"]]
    return df_saldos_concat


if __name__ == "__main__":
    df_saldos_concat = build_df_saldos_concat()

    print("‚úÖ Listo. Shape:", df_saldos_concat.shape)
    print(df_saldos_concat.head(15))

    # Chequeo del caso que mencionaste (cambia si quieres)
    test_ref = "3001342492"
    print("\nüîé Validaci√≥n referencia", test_ref)
    print(df_saldos_concat.loc[df_saldos_concat["REFERENCIA"].astype(str) == test_ref])

‚úÖ Listo. Shape: (15011, 3)
    REFERENCIA    SALDO Vehiculo
0   3001033679   802214    POWWI
1   3001342492  1347428    POWWI
2   3001456624        0    POWWI
3   3001542338        0    POWWI
4   3001720277   482072    POWWI
5   3001788198   308942    POWWI
6   3001877418  2240462    POWWI
7   3001934505      204    POWWI
8   3001963725  1320696    POWWI
9   3002025530     1521    POWWI
10  3002061945      206    POWWI
11  3002063077  2557127    POWWI
12  3002071472   190289    POWWI
13  3002074569   640624    POWWI
14  3002074976      202    POWWI

üîé Validaci√≥n referencia 3001342492
   REFERENCIA    SALDO Vehiculo
1  3001342492  1347428    POWWI


In [6]:
df_saldos_concat

Unnamed: 0,REFERENCIA,SALDO,Vehiculo
0,3001033679,802214,POWWI
1,3001342492,1347428,POWWI
2,3001456624,0,POWWI
3,3001542338,0,POWWI
4,3001720277,482072,POWWI
...,...,...,...
15006,3225957847,0,COINK
15007,3158457408,0,COINK
15008,3015562062,0,COINK
15009,3213353311,0,COINK


In [7]:
import os
import json
import pandas as pd
import gspread
from google.oauth2.service_account import Credentials

# -------- CONFIG --------
SPREADSHEET_ID = "1mvxPdnyp5ip_0Lqyf6qy09BAtX323PF2Yc5-qGoukeU"
SHEET_NAME   = "TOTAL"

SCOPES = [
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive",
]

# -------- CREDENCIALES --------
def _coerce_to_dict(value):
    if isinstance(value, dict):
        return value
    if isinstance(value, str):
        v = value.strip()
        if (v.startswith("'") and v.endswith("'")) or (v.startswith('"') and v.endswith('"')):
            maybe = v[1:-1]
            if maybe.strip().startswith("{") and maybe.strip().endswith("}"):
                v = maybe
        return json.loads(v)
    raise TypeError(f"Formato no soportado: {type(value)}")

def load_service_account_info():
    """
    Intenta encontrar credenciales SA JSON desde:
    1) Secrets Colab (MI_JSON)
    2) Env var MI_JSON
    3) GOOGLE_APPLICATION_CREDENTIALS (path)
    4) Archivo local service_account.json
    """
    try:
        from google.colab import userdata
        mi_json = userdata.get("MI_JSON")
        if mi_json:
            return _coerce_to_dict(mi_json)
    except Exception:
        pass

    env = os.getenv("MI_JSON")
    if env:
        return _coerce_to_dict(env)

    gac = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
    if gac and os.path.exists(gac):
        with open(gac, "r", encoding="utf-8") as f:
            return json.load(f)

    if os.path.exists("service_account.json"):
        with open("service_account.json", "r", encoding="utf-8") as f:
            return json.load(f)

    raise FileNotFoundError("No encontr√© credenciales SA para Google Sheets")

def get_gspread_client():
    info = load_service_account_info()
    creds = Credentials.from_service_account_info(info, scopes=SCOPES)
    return gspread.authorize(creds)

# -------- LEER HOJA --------
def read_total_sheet():
    gc = get_gspread_client()
    sh = gc.open_by_key(SPREADSHEET_ID)
    ws = sh.worksheet(SHEET_NAME)

    # Lee todo en bruto
    values = ws.get_all_values()
    df = pd.DataFrame(values)

    # si la primera fila YA es encabezado:
    df.columns = df.iloc[0]
    df = df[1:].reset_index(drop=True)

    return df

# -------- EJECUCI√ìN --------
df_total = read_total_sheet()

print("Shape:", df_total.shape)
df_total.head(15)

Shape: (17950, 7)


Unnamed: 0,Unnamed: 1,REFERENCIA,DOCUMENTO,X COBRAR FLUJOS,X COBRAR FACTURACI√ìN,TOTAL,MENSUALIDADES COBRADAS
0,,3213843031,,"$124.269,00","$260.000,00",$384.269,0
1,,3208393999,,"$199.749,00","$0,00",$199.749,0
2,,3107746045,,"$0,00","$380.531,00",$380.531,0
3,,3163143964,,"$107.885,00","$0,00",$107.885,0
4,,1090391063,,"$407.474,00","$0,00",$407.474,0
5,,3016223771,,"$0,00","$316.000,00",$316.000,0
6,,3503946092,,"$0,00","$9.415.663,00",$9.415.663,0
7,,3505950650,,"$400.853,00","$0,00",$400.853,0
8,,3177510366,,"$60.639,00","$0,00",$60.639,0
9,,3144747305,,"$12.500,00","$0,00",$12.500,0


In [8]:
# 1) Normalizar nombres de columnas (por seguridad)
df_saldos_concat.columns = [c.strip().upper() for c in df_saldos_concat.columns]
df_total.columns = [c.strip().upper() for c in df_total.columns]

# 2) Quedarnos solo con lo necesario de df_total
df_total_aux = df_total[["REFERENCIA", "TOTAL"]].copy()

# 3) Merge (left join)
df_saldos_concat = df_saldos_concat.merge(
    df_total_aux,
    on="REFERENCIA",
    how="left"
)

# 4) Renombrar TOTAL ‚Üí Saldo
df_saldos_concat = df_saldos_concat.rename(columns={"TOTAL": "Saldo"})

df_saldos_concat

Unnamed: 0,REFERENCIA,SALDO,VEHICULO,Saldo
0,3001033679,802214,POWWI,$801.955
1,3001342492,1347428,POWWI,
2,3001456624,0,POWWI,
3,3001542338,0,POWWI,$396.063
4,3001720277,482072,POWWI,
...,...,...,...,...
15006,3225957847,0,COINK,
15007,3158457408,0,COINK,
15008,3015562062,0,COINK,$816.616
15009,3213353311,0,COINK,


In [9]:
import numpy as np
import pandas as pd

# 1) Renombrar SALDO -> Ahorro
df_saldos_concat = df_saldos_concat.rename(columns={"SALDO": "Ahorro"})

# 2) Convertir Saldo (string con separador de miles) a num√©rico
df_saldos_concat["Saldo"] = (
    df_saldos_concat["Saldo"]
        .astype(str)
        .str.strip()
        .str.replace(r"[\$\s]", "", regex=True)   # quita $ y espacios
        .str.replace(".", "", regex=False)        # quita separador de miles
)

df_saldos_concat["Saldo"] = pd.to_numeric(
    df_saldos_concat["Saldo"],
    errors="coerce"
)

df_saldos_concat.head()

Unnamed: 0,REFERENCIA,Ahorro,VEHICULO,Saldo
0,3001033679,802214,POWWI,801955.0
1,3001342492,1347428,POWWI,
2,3001456624,0,POWWI,
3,3001542338,0,POWWI,396063.0
4,3001720277,482072,POWWI,


In [10]:
df_saldos_concat

Unnamed: 0,REFERENCIA,Ahorro,VEHICULO,Saldo
0,3001033679,802214,POWWI,801955.0
1,3001342492,1347428,POWWI,
2,3001456624,0,POWWI,
3,3001542338,0,POWWI,396063.0
4,3001720277,482072,POWWI,
...,...,...,...,...
15006,3225957847,0,COINK,
15007,3158457408,0,COINK,
15008,3015562062,0,COINK,816616.0
15009,3213353311,0,COINK,


#Ultimo Mensaje actualizado

In [11]:
import os
import requests
import pandas as pd
from io import StringIO
from requests.exceptions import JSONDecodeError

BASE_URL = "https://metabase.resuelve.io"
CARD_ID = 11382


def running_in_colab():
    try:
        import google.colab  # noqa
        return True
    except ImportError:
        return False


def get_mb_credentials():
    user = None
    password = None

    if running_in_colab():
        from google.colab import userdata
        user = userdata.get("MB_USER")
        password = userdata.get("MB_PASS")

    if not user:
        user = os.environ.get("MB_USER")
    if not password:
        password = os.environ.get("MB_PASS")

    if not user or not password:
        raise ValueError("No encontr√© credenciales de Metabase.")

    return user, password


def create_metabase_session():
    user, password = get_mb_credentials()

    resp = requests.post(
        f"{BASE_URL}/api/session",
        json={"username": user, "password": password},
        timeout=60,
    )
    resp.raise_for_status()

    session_id = resp.json()["id"]
    headers = {"X-Metabase-Session": session_id}
    return headers


def get_card_full(card_id: int) -> pd.DataFrame:
    headers = create_metabase_session()

    try:
        resp = requests.post(
            f"{BASE_URL}/api/card/{card_id}/query/json",
            headers=headers,
            json={},
            timeout=600,
        )
        resp.raise_for_status()
        rows = resp.json()
        return pd.DataFrame(rows)

    except (JSONDecodeError, ValueError):
        resp = requests.post(
            f"{BASE_URL}/api/card/{card_id}/query/csv",
            headers=headers,
            json={},
            timeout=600,
        )
        resp.raise_for_status()
        return pd.read_csv(StringIO(resp.text))


df_11382 = get_card_full(CARD_ID)
print(df_11382.shape)
df_11382.head()

(15805, 5)


Unnamed: 0,id,bank_reference,phone_number,country,message_date_utc
0,193082,46195,3112210187,co,2025-07-16T23:14:24Z
1,222944,3103017405,3103017405,co,2025-09-18T21:01:41Z
2,261699,3133140552,3133140552,co,2025-02-05T15:44:03Z
3,268273,3002139025,3002139025,co,2025-10-10T15:22:09Z
4,269957,3225806775,3225806775,co,2025-03-13T20:29:34Z


In [12]:
import pandas as pd

df_11382["message_date_utc"] = (
    pd.to_datetime(df_11382["message_date_utc"], utc=True)  # asegura UTC
      .dt.tz_convert("America/Bogota")                      # pasa a hora Colombia
      .dt.tz_localize(None)                                 # quita info de zona
)

In [13]:
df_11382

Unnamed: 0,id,bank_reference,phone_number,country,message_date_utc
0,193082,46195,3112210187,co,2025-07-16 18:14:24
1,222944,3103017405,3103017405,co,2025-09-18 16:01:41
2,261699,3133140552,3133140552,co,2025-02-05 10:44:03
3,268273,3002139025,3002139025,co,2025-10-10 10:22:09
4,269957,3225806775,3225806775,co,2025-03-13 15:29:34
...,...,...,...,...,...
15800,606171,3132177031,3132177031,co,2026-02-02 16:17:01
15801,607300,3008887491,3008887491,co,2026-01-21 14:45:56
15802,607818,3142317725,3142317725,co,2026-01-19 12:54:14
15803,609403,3108413132,3108413132,co,2026-02-03 16:46:07


#Estado Reparadora

In [14]:
import os
import requests
import pandas as pd
from io import StringIO
from requests.exceptions import JSONDecodeError

BASE_URL = "https://metabase.resuelve.io"
CARD_ID = 11320


# =========================
# 1) Detectar entorno
# =========================
def running_in_colab():
    try:
        import google.colab  # noqa
        return True
    except ImportError:
        return False


# =========================
# 2) Credenciales Metabase
# =========================
def get_mb_credentials():
    user = None
    password = None

    # Colab: Secrets
    if running_in_colab():
        from google.colab import userdata
        user = userdata.get("MB_USER")
        password = userdata.get("MB_PASS")

    # GitHub / local: env vars
    if not user:
        user = os.environ.get("MB_USER")
    if not password:
        password = os.environ.get("MB_PASS")

    if not user or not password:
        raise ValueError("No encontr√© credenciales de Metabase (MB_USER / MB_PASS).")

    return user, password


# =========================
# 3) Sesi√≥n Metabase
# =========================
def create_metabase_session():
    user, password = get_mb_credentials()

    resp = requests.post(
        f"{BASE_URL}/api/session",
        json={"username": user, "password": password},
        timeout=60,
    )
    resp.raise_for_status()

    session_id = resp.json()["id"]
    return {"X-Metabase-Session": session_id}


# =========================
# 4) Descargar tarjeta
# =========================
def get_card_full(card_id: int) -> pd.DataFrame:
    headers = create_metabase_session()

    try:
        # Intento JSON
        resp = requests.post(
            f"{BASE_URL}/api/card/{card_id}/query/json",
            headers=headers,
            json={},
            timeout=600,
        )
        resp.raise_for_status()
        return pd.DataFrame(resp.json())

    except (JSONDecodeError, ValueError):
        # Fallback CSV
        resp = requests.post(
            f"{BASE_URL}/api/card/{card_id}/query/csv",
            headers=headers,
            json={},
            timeout=600,
        )
        resp.raise_for_status()
        return pd.read_csv(StringIO(resp.text))


# =========================
# 5) Ejecutar + filtrar columnas
# =========================
df_11320 = get_card_full(CARD_ID)

# normalizar nombres de columnas
df_11320.columns = [c.lower() for c in df_11320.columns]

cols_needed = ["referencia", "id_deuda", "estado_deuda", "sub_estado_deuda", "estado_reparadora", "sub_estado_reparadora"]

missing = set(cols_needed) - set(df_11320.columns)
if missing:
    raise ValueError(f"Faltan columnas requeridas en el DataFrame: {missing}")

df_11320 = df_11320[cols_needed].copy()

print("‚úÖ DataFrame listo:", df_11320.shape)
df_11320.head()

‚úÖ DataFrame listo: (52750, 6)


Unnamed: 0,referencia,id_deuda,estado_deuda,sub_estado_deuda,estado_reparadora,sub_estado_reparadora
0,3166163877,859643,cancelled,preparing_negotiation,active,saving_paused
1,3112364580,1281002,negotiation,negotiation,active,saving_paused
2,3218004797,988802,cancelled,preparing_negotiation,active,saving_paused
3,3102001339,1289998,negotiation,negotiation,active,saving_paused
4,3053846463,547420,negotiation,negotiation,active,saving_paused


#Credito

In [15]:
import os, json
import pandas as pd
import gspread
from google.oauth2.service_account import Credentials

# =========================
# CARGAR MI_JSON (Colab o GitHub/local)
# =========================
mi_json = None

# 1) Intentar Colab (si est√° disponible)
try:
    from google.colab import userdata
    mi_json = userdata.get("MI_JSON")
except Exception:
    pass

# 2) Si no es Colab, intentar variable de entorno
if not mi_json:
    mi_json = os.environ.get("MI_JSON")

if not mi_json:
    raise ValueError(
        "No se encontr√≥ MI_JSON. "
        "En Colab: configura el secreto MI_JSON. "
        "En GitHub/local: define la variable de entorno MI_JSON."
    )

mi_json_dict = json.loads(mi_json)

# =========================
# AUTENTICACI√ìN
# =========================
SCOPES = [
    "https://www.googleapis.com/auth/spreadsheets.readonly",
    "https://www.googleapis.com/auth/drive.readonly",
]

creds = Credentials.from_service_account_info(mi_json_dict, scopes=SCOPES)
client = gspread.authorize(creds)

# =========================
# ABRIR SHEET Y HOJA
# =========================
spreadsheet_id = "1yAwiQ72vfiSFpoObCQdHWFW3BmARLS-uLl4vc1dN5EY"
sheet_name = "Priorizaci√≥n Batch"

ws = client.open_by_key(spreadsheet_id).worksheet(sheet_name)

# =========================
# LEER COMO DATAFRAME
# =========================
data = ws.get_all_records()
df_batch = pd.DataFrame(data)

df_batch.head()

Unnamed: 0,Referencia,Origen,Veh√≠culo,Linea de Negocio,Pricing,Tipo de Cl.,AM,AMDUM,Status Reparadora,DR TOTAL,...,DE ($),DE (#),Edad,oct-25,nov-25,dic-25,NO_PERFILA,Criterio,DEMANDAS,Potencial
0,3105001765,Batch 1,POWWI,BRAVO_CO,NORMAL,13,495636,1.55%,Activo,31538850,...,#VALUE!,#VALUE!,27,NO_OPP,NO_OPP,NO_OPP,False,False,0,#VALUE!
1,3106139107,Batch 1,POWWI,BRAVO_CO,NORMAL,13,165568,2.77%,Activo,5976650,...,#VALUE!,#VALUE!,52,NO_OPP,NO_OPP,NO_OPP,False,False,0,#VALUE!
2,3104316480,Batch 1,COINK,BRAVO_CO,NORMAL,13,466702,1.43%,Activo,32594100,...,#VALUE!,#VALUE!,51,NO_OPP,NO_OPP,NO_OPP,False,False,0,#VALUE!
3,3057860566,Batch 1,COINK,BRAVO_CO,NORMAL,13,332298,2.77%,Activo,12010950,...,#VALUE!,#VALUE!,34,NO_OPP,NO_OPP,NO_OPP,False,False,0,#VALUE!
4,3134933895,Batch 1,COINK,BRAVO_CO,NORMAL,13,301199,3.91%,Activo,7701750,...,#VALUE!,#VALUE!,31,NO_OPP,NO_OPP,NO_OPP,False,False,0,#VALUE!


In [16]:
import os, json
import pandas as pd
import gspread
from google.oauth2.service_account import Credentials

# =========================
# CARGAR MI_JSON (Colab o GitHub)
# =========================
mi_json = None

# 1) Intentar Colab (si est√° disponible)
try:
    from google.colab import userdata  # solo existe en Colab
    mi_json = userdata.get("MI_JSON")
except Exception:
    pass

# 2) Si no es Colab, intentar variable de entorno (GitHub Secrets / local)
if not mi_json:
    mi_json = os.environ.get("MI_JSON")

if not mi_json:
    raise ValueError(
        "No se encontr√≥ MI_JSON. "
        "En Colab: configura el secreto MI_JSON. "
        "En GitHub/local: define la variable de entorno MI_JSON."
    )

mi_json_dict = json.loads(mi_json)

# =========================
# AUTENTICACI√ìN (gspread)
# =========================
SCOPES = [
    "https://www.googleapis.com/auth/spreadsheets.readonly",
    "https://www.googleapis.com/auth/drive.readonly",
]

creds = Credentials.from_service_account_info(mi_json_dict, scopes=SCOPES)
client = gspread.authorize(creds)

# =========================
# ABRIR SHEET Y HOJA
# =========================
spreadsheet_id = "1yAwiQ72vfiSFpoObCQdHWFW3BmARLS-uLl4vc1dN5EY"
sheet_name = "Priorizaci√≥n Novaciones"

ws = client.open_by_key(spreadsheet_id).worksheet(sheet_name)

# =========================
# LEER COMO DATAFRAME
# =========================
data = ws.get_all_records()
df_novaciones = pd.DataFrame(data)

df_novaciones.head()

Unnamed: 0,Referencia,Origen,Veh√≠culo,Linea de Negocio,Pricing,Tipo de Cl.,Referencia CR,AM,AMDUM,Status Reparadora,...,Edad,oct-25,nov-25,dic-25,NO_PERFILA,DSLP,Dias de Atraso,Criterio,DEMANDAS,Potencial
0,3138166574,Cr Parcial,POWWI,BRAVO_CO,PRICING 20%,15,8011170A,1604896,1.17%,Activo,...,36.0,CERRADO,NO_OPP,CERRADO,False,6,0,False,0,No viable
1,52325208,Cr Parcial,POWWI,BRAVO_CO,PRICING 25%,16,8011452,2048209,2.13%,Baja,...,,NO_OPP,NO_OPP,CERRADO,False,6,0,,0,No viable
2,3192716223,Cr Parcial,DING,RTD_CO,NORMAL,13,8007650RR,259138,2.19%,Activo,...,32.0,NO_OPP,NO_OPP,NO_OPP,False,35,0,True,0,No viable
3,1032360290,Cr Parcial,SKANDIA,BRAVO_CO,PRICING 25%,16,8011449,1123865,1.27%,Activo,...,39.0,NO_OPP,NO_PERFILA,CERRADO,True,21,0,True,0,No viable
4,3183935756,Cr Parcial,POWWI,BRAVO_CO,NORMAL,13,8011440,538093,1.47%,Activo,...,33.0,PENDIENTE_DE_ACUERDOS,NO_LE_INTERESA,CERRADO,False,36,0,True,0,No viable


In [17]:
import pandas as pd

# Copias opcionales
df_batch = df_batch.copy()
df_novaciones = df_novaciones.copy()

# 1) Quedarnos solo con las columnas en com√∫n
cols_comunes = ["Referencia", "Origen", "Potencial"]

df_batch_sub = df_batch[cols_comunes]
df_novaciones_sub = df_novaciones[cols_comunes]

# 2) Concatenar
df_credi = pd.concat(
    [df_batch_sub, df_novaciones_sub],
    ignore_index=True
)

df_credi

Unnamed: 0,Referencia,Origen,Potencial
0,3105001765,Batch 1,#VALUE!
1,3106139107,Batch 1,#VALUE!
2,3104316480,Batch 1,#VALUE!
3,3057860566,Batch 1,#VALUE!
4,3134933895,Batch 1,#VALUE!
...,...,...,...
4569,3102650946,Cr Parcial,No viable
4570,3005091498,Cr Parcial,No viable
4571,3005307929,Cr Parcial,No viable
4572,43625657,Cr Parcial,No viable


Columnas actualizadas

In [18]:
df

Unnamed: 0,Referencia,Cedula,Nombre del cliente,Id deuda,correo,Deudas Activas,BANCO,N√∫mero de Cr√©dito,D_BRAVO,DBT,...,I_ESPERADO,Negociador,Tipo Elegible,P_Cierre_norm,D√≠as Atraso liquidacion,Estado PL,Ultima Liquidacion,dias_desde_ultima_liq,Priority_level,Bucket
0,3012650,52216213,CLAUDIA PATRICIA AVENDANO CALDERON,111266,claudiapa426@hotmail.com,2,Scotiabank Citibank,318121778,9256000.0,58255000.0,...,125415.711006,Negodito,,0.633814,,Al d√≠a,NaT,,Prioridad 2,
1,3122962933,50868603,Luz Nelly Betancourt Ortega,225561,zulyllen@hotmail.com,3,Banco de Occidente,540625-6-730934-608 VQ,2138165.0,11396681.0,...,12585.582824,William Santiago Abril Esguerra,,0.392411,2162.0,Atrasado,NaT,,Prioridad 1,10.0
2,3122962933,50868603,Luz Nelly Betancourt Ortega,225562,zulyllen@hotmail.com,3,Banco de Occidente,417899-4-107487-516 VQ,3299116.0,11396681.0,...,18849.499266,William Santiago Abril Esguerra,,0.340089,1796.0,Atrasado,NaT,,Prioridad 1,10.0
3,3122962933,50868603,Luz Nelly Betancourt Ortega,225563,zulyllen@hotmail.com,3,√âxito,745089099,5959400.0,11396681.0,...,32976.814790,William Santiago Abril Esguerra,,0.274687,1342.0,Atrasado,NaT,,Prioridad 1,10.0
4,3196892494,52902886,Diana Marcela Suarez,404243,marcelasuarezladino@outlook.com,5,Banco Popular,346383,1921650.0,10126350.0,...,9581.346900,Dayana Isabel Ojito Ortiz,,0.346250,1173.0,Atrasado,NaT,,Prioridad 1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41253,3225833196,1019066241,JUAN DAVID LOZANO TRIVINO,2091518,juan17lt@hotmail.com,4,Davivienda,1891,13695024.0,174795352.0,...,56291.062129,Gabriela Saavedra Latorre,,0.376714,,Al d√≠a,NaT,,Prioridad 2,10.0
41254,3225833196,1019066241,JUAN DAVID LOZANO TRIVINO,2091519,juan17lt@hotmail.com,4,Scotiabank Colpatria,5853,50463309.0,174795352.0,...,187132.142578,Gabriela Saavedra Latorre,,0.247219,,Al d√≠a,NaT,,Prioridad 2,10.0
41255,3225833196,1019066241,JUAN DAVID LOZANO TRIVINO,2091520,juan17lt@hotmail.com,4,Bancolombia,9610,82022139.0,174795352.0,...,285911.490530,Gabriela Saavedra Latorre,,0.247219,,Al d√≠a,NaT,,Prioridad 2,10.0
41256,3105507630,79381039,JUAN CARLOS PACHECO CONTRERAS,2091523,j.pacheco1166@gmail.com,5,Bancolombia,130100335,130935011.0,190332243.0,...,456411.215484,Julian Andres Ossa Bohorquez,,0.247219,,Al d√≠a,NaT,,Prioridad 2,10.0


In [19]:
df.info()

<class 'pandas.DataFrame'>
RangeIndex: 41258 entries, 0 to 41257
Data columns (total 70 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   Referencia                 41258 non-null  int64         
 1   Cedula                     41258 non-null  int64         
 2   Nombre del cliente         41258 non-null  str           
 3   Id deuda                   41258 non-null  int64         
 4   correo                     41258 non-null  str           
 5   Deudas Activas             41258 non-null  int64         
 6   BANCO                      41258 non-null  str           
 7   N√∫mero de Cr√©dito          41258 non-null  object        
 8   D_BRAVO                    41258 non-null  float64       
 9   DBT                        41113 non-null  float64       
 10  MORA                       41258 non-null  int64         
 11  Dias de Atraso             41258 non-null  int64         
 12  Apartado Mens

In [20]:
# Normalizar nombres por seguridad (opcional pero recomendado)
df.columns = [c.strip() for c in df.columns]

# Columnas que quieres conservar
cols_keep = ["Referencia", "Id deuda", "ID_reparadora"]

# Verificar que existan
missing = set(cols_keep) - set(df.columns)
if missing:
    raise ValueError(f"Faltan columnas en el DataFrame: {missing}")

# Filtrar
df = df[cols_keep].copy()

df.head()

Unnamed: 0,Referencia,Id deuda,ID_reparadora
0,3012650,111266,36429
1,3122962933,225561,58880
2,3122962933,225562,58880
3,3122962933,225563,58880
4,3196892494,404243,116628


In [21]:
df

Unnamed: 0,Referencia,Id deuda,ID_reparadora
0,3012650,111266,36429
1,3122962933,225561,58880
2,3122962933,225562,58880
3,3122962933,225563,58880
4,3196892494,404243,116628
...,...,...,...
41253,3225833196,2091518,599188
41254,3225833196,2091519,599188
41255,3225833196,2091520,599188
41256,3105507630,2091523,597684


In [22]:
df_saldos_concat

Unnamed: 0,REFERENCIA,Ahorro,VEHICULO,Saldo
0,3001033679,802214,POWWI,801955.0
1,3001342492,1347428,POWWI,
2,3001456624,0,POWWI,
3,3001542338,0,POWWI,396063.0
4,3001720277,482072,POWWI,
...,...,...,...,...
15006,3225957847,0,COINK,
15007,3158457408,0,COINK,
15008,3015562062,0,COINK,816616.0
15009,3213353311,0,COINK,


In [23]:
import pandas as pd

# =========================
# 0) Normalizaci√≥n b√°sica
# =========================
df.columns = [c.strip() for c in df.columns]
df_saldos_concat.columns = [c.strip() for c in df_saldos_concat.columns]

df["Referencia"] = df["Referencia"].astype(str)
df_saldos_concat["REFERENCIA"] = df_saldos_concat["REFERENCIA"].astype(str)

n0 = len(df)  # para verificar que no cambie

# =========================
# 1) Colapsar df_saldos_concat a 1 fila por REFERENCIA (evita duplicar filas)
# =========================
# Ahorro = suma de Ahorro por referencia (si tienes varias filas por veh√≠culo)
# Por cobrar = tomar el mismo valor por referencia; si var√≠a, usamos max (o sum si aplica)
aux_saldos = (
    df_saldos_concat
      .groupby("REFERENCIA", as_index=False)
      .agg({
          "Ahorro": "max",   # <- Ahorro total por referencia
          "Saldo": "sum"     # <- Por cobrar (si es √∫nico, max=√∫nico). Si quieres sum, c√°mbialo a "sum"
      })
      .rename(columns={
          "REFERENCIA": "Referencia",
          "Ahorro": "Ahorro total",
          "Saldo": "Por cobrar"
      })
)

# =========================
# 2) Merge LEFT (NO agrega filas)
# =========================
df = df.merge(aux_saldos, on="Referencia", how="left", validate="many_to_one")

# =========================
# 3) Ahorro medio = Ahorro total / (# Id deuda √∫nicos por Referencia)
#    (sin merges adicionales -> no cambia filas)
# =========================
deudas_unicas = df.groupby("Referencia")["Id deuda"].transform("nunique")
df["Ahorro medio"] = df["Ahorro total"] / deudas_unicas

# =========================
# 4) Verificaci√≥n estricta
# =========================
assert len(df) == n0, "‚ùå Se alter√≥ el n√∫mero de filas (esto no deber√≠a pasar)."

df.head()

Unnamed: 0,Referencia,Id deuda,ID_reparadora,Ahorro total,Por cobrar,Ahorro medio
0,3012650,111266,36429,,,
1,3122962933,225561,58880,,,
2,3122962933,225562,58880,,,
3,3122962933,225563,58880,,,
4,3196892494,404243,116628,,,


In [24]:
df

Unnamed: 0,Referencia,Id deuda,ID_reparadora,Ahorro total,Por cobrar,Ahorro medio
0,3012650,111266,36429,,,
1,3122962933,225561,58880,,,
2,3122962933,225562,58880,,,
3,3122962933,225563,58880,,,
4,3196892494,404243,116628,,,
...,...,...,...,...,...,...
41253,3225833196,2091518,599188,2268774.0,199994.0,567193.500000
41254,3225833196,2091519,599188,2268774.0,199994.0,567193.500000
41255,3225833196,2091520,599188,2268774.0,199994.0,567193.500000
41256,3105507630,2091523,597684,2848260.0,0.0,569652.000000


In [25]:
import pandas as pd
import numpy as np

# =========================
# 0) Preparaci√≥n: columnas y tipos
# =========================
# (no cambia filas de df)
df.columns = [c.strip() for c in df.columns]
df_11382.columns = [c.strip() for c in df_11382.columns]

# Claves como string para evitar no-match por tipo
df["Referencia"] = df["Referencia"].astype(str)
df["ID_reparadora"] = df["ID_reparadora"].astype(str)

# En df_11382: bank_reference y message_date_utc (y un id para cruzar por ID_reparadora)
# Normalizamos nombres a min√∫scula para manejar variantes
df_11382 = df_11382.copy()
df_11382.columns = [c.lower() for c in df_11382.columns]

# Asegurar que existan estas columnas (ajusta si tu df_11382 tiene otro nombre para el id)
required = {"bank_reference", "message_date_utc"}
missing = required - set(df_11382.columns)
if missing:
    raise ValueError(f"En df_11382 faltan columnas requeridas: {missing}")

# Detectar columna id en df_11382 para match con ID_reparadora (ajusta aqu√≠ si ya sabes el nombre exacto)
# Preferencias t√≠picas:
id_candidates = ["id_reparadora", "id", "id_deuda", "reparadora_id"]
id_col = next((c for c in id_candidates if c in df_11382.columns), None)
if id_col is None:
    raise ValueError(
        "No encontr√© en df_11382 una columna de ID para cruzar con ID_reparadora. "
        "Dime el nombre exacto de esa columna en df_11382 (ej: 'id_reparadora')."
    )

# Tipos para llaves
df_11382["bank_reference"] = df_11382["bank_reference"].astype(str)
df_11382[id_col] = df_11382[id_col].astype(str)

# =========================
# 1) Asegurar fecha en hora Colombia y sin tz (datetime normal)
# =========================
df_11382["message_date_utc"] = (
    pd.to_datetime(df_11382["message_date_utc"], utc=True, errors="coerce")
      .dt.tz_convert("America/Bogota")
      .dt.tz_localize(None)
)

# =========================
# 2) Sacar √öLTIMA fecha por bank_reference y por id (max)
# =========================
last_by_ref = (
    df_11382.groupby("bank_reference", as_index=False)["message_date_utc"]
           .max()
           .rename(columns={"bank_reference": "Referencia", "message_date_utc": "fecha_mensaje_ref"})
)

last_by_id = (
    df_11382.groupby(id_col, as_index=False)["message_date_utc"]
           .max()
           .rename(columns={id_col: "ID_reparadora", "message_date_utc": "fecha_mensaje_id"})
)

# =========================
# 3) Unir a df (SIN agregar filas) y resolver fallback:
#    primero por Referencia, si falta entonces por ID_reparadora
# =========================
n0 = len(df)

df = df.merge(last_by_ref, on="Referencia", how="left", validate="many_to_one")
df = df.merge(last_by_id, on="ID_reparadora", how="left", validate="many_to_one")

# fecha mensaje final (prioriza por referencia)
df["fecha mensaje"] = df["fecha_mensaje_ref"].combine_first(df["fecha_mensaje_id"])

# limpiar auxiliares
df = df.drop(columns=["fecha_mensaje_ref", "fecha_mensaje_id"])

assert len(df) == n0, "‚ùå Se alter√≥ el n√∫mero de filas (esto no deber√≠a pasar)."

# =========================
# 4) Crear "ultimo contacto" (0 si este mes, 1 si mes pasado, ..., 6 si >6 o NaN)
# =========================
now_co = pd.Timestamp.now(tz="America/Bogota").tz_localize(None)

# diferencia en meses entre (hoy) y (fecha mensaje)
# (year*12 + month) para ambos y restar
msg = df["fecha mensaje"]
months_diff = (now_co.year * 12 + now_co.month) - (msg.dt.year * 12 + msg.dt.month)

# si NaN -> 6; si >6 -> 6; si <0 (fecha futura) -> 0
df["ultimo contacto"] = months_diff.clip(lower=0)
df["ultimo contacto"] = df["ultimo contacto"].where(msg.notna(), 6)
df["ultimo contacto"] = df["ultimo contacto"].clip(upper=6).astype("Int64")

df.head()

Unnamed: 0,Referencia,Id deuda,ID_reparadora,Ahorro total,Por cobrar,Ahorro medio,fecha mensaje,ultimo contacto
0,3012650,111266,36429,,,,NaT,6
1,3122962933,225561,58880,,,,2025-12-29 07:31:41,2
2,3122962933,225562,58880,,,,2025-12-29 07:31:41,2
3,3122962933,225563,58880,,,,2025-12-29 07:31:41,2
4,3196892494,404243,116628,,,,NaT,6


In [26]:
import pandas as pd

# =========================
# 0) Normalizaci√≥n b√°sica
# =========================
df.columns = [c.strip() for c in df.columns]
df_11320.columns = [c.strip() for c in df_11320.columns]

# Clave como string para evitar no-match
df["Id deuda"] = df["Id deuda"].astype(str)              #R
df_11320["id_deuda"] = df_11320["id_deuda"].astype(str) #r

n0 = len(df)  # para verificaci√≥n final

# =========================
# 1) Colapsar df_11320 a 1 fila por referencia
#    (tomamos el √∫ltimo estado v√°lido; si prefieres otro criterio, dime)
# =========================
aux_estado = (
    df_11320
      .sort_values(by=["id_deuda"])  # orden neutro (no afecta max)
      .groupby("id_deuda", as_index=False)
      .agg({
          "estado_deuda": "last",
          "sub_estado_deuda": "last",
          "estado_reparadora": "last",
          "sub_estado_reparadora": "last"
      })
      .rename(columns={"id_deuda": "Id deuda"})
)

# =========================
# 2) Merge LEFT (NO agrega filas)
# =========================
df = df.merge(
    aux_estado,
    on="Id deuda",
    how="left",
    validate="many_to_one"   # üîí evita duplicaci√≥n silenciosa
)

# =========================
# 3) Verificaci√≥n estricta
# =========================
assert len(df) == n0, "‚ùå Se alter√≥ el n√∫mero de filas (esto no deber√≠a pasar)."

df.head()

Unnamed: 0,Referencia,Id deuda,ID_reparadora,Ahorro total,Por cobrar,Ahorro medio,fecha mensaje,ultimo contacto,estado_deuda,sub_estado_deuda,estado_reparadora,sub_estado_reparadora
0,3012650,111266,36429,,,,NaT,6,negotiation,negotiation,active,saving_paused
1,3122962933,225561,58880,,,,2025-12-29 07:31:41,2,negotiation,negotiation,active,saving_paused
2,3122962933,225562,58880,,,,2025-12-29 07:31:41,2,negotiation,negotiation,active,saving_paused
3,3122962933,225563,58880,,,,2025-12-29 07:31:41,2,negotiation,negotiation,active,saving_paused
4,3196892494,404243,116628,,,,NaT,6,negotiation,negotiation,active,saving_paused


In [27]:
import pandas as pd

# =========================
# 0) Normalizaci√≥n b√°sica
# =========================
df.columns = [c.strip() for c in df.columns]
df_credi.columns = [c.strip() for c in df_credi.columns]

df["Referencia"] = df["Referencia"].astype(str)
df_credi["Referencia"] = df_credi["Referencia"].astype(str)

n0 = len(df)  # verificaci√≥n final

# =========================
# 1) Colapsar df_credi a 1 fila por Referencia
# =========================
aux_credi = (
    df_credi
      .groupby("Referencia", as_index=False)
      .agg({
          "Origen": "last",        # categor√≠a ‚Üí √∫ltimo valor
          "Potencial": "last"      # valor ‚Üí √∫ltimo (o max si prefieres)
      })
      .rename(columns={
          "Origen": "tipo de Liquidacion",
          "Potencial": "Potencial Credito"
      })
)

# =========================
# 2) Merge LEFT (NO agrega filas)
# =========================
df = df.merge(
    aux_credi,
    on="Referencia",
    how="left",
    validate="many_to_one"   # üîí evita duplicaciones
)

# =========================
# 3) Verificaci√≥n estricta
# =========================
assert len(df) == n0, "‚ùå Se alter√≥ el n√∫mero de filas (esto no deber√≠a pasar)."

df.head()

Unnamed: 0,Referencia,Id deuda,ID_reparadora,Ahorro total,Por cobrar,Ahorro medio,fecha mensaje,ultimo contacto,estado_deuda,sub_estado_deuda,estado_reparadora,sub_estado_reparadora,tipo de Liquidacion,Potencial Credito
0,3012650,111266,36429,,,,NaT,6,negotiation,negotiation,active,saving_paused,,
1,3122962933,225561,58880,,,,2025-12-29 07:31:41,2,negotiation,negotiation,active,saving_paused,,
2,3122962933,225562,58880,,,,2025-12-29 07:31:41,2,negotiation,negotiation,active,saving_paused,,
3,3122962933,225563,58880,,,,2025-12-29 07:31:41,2,negotiation,negotiation,active,saving_paused,,
4,3196892494,404243,116628,,,,NaT,6,negotiation,negotiation,active,saving_paused,,


In [28]:
df.info()

<class 'pandas.DataFrame'>
RangeIndex: 41258 entries, 0 to 41257
Data columns (total 14 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   Referencia             41258 non-null  str           
 1   Id deuda               41258 non-null  str           
 2   ID_reparadora          41258 non-null  str           
 3   Ahorro total           30669 non-null  float64       
 4   Por cobrar             30669 non-null  float64       
 5   Ahorro medio           30669 non-null  float64       
 6   fecha mensaje          38184 non-null  datetime64[us]
 7   ultimo contacto        41258 non-null  Int64         
 8   estado_deuda           40342 non-null  str           
 9   sub_estado_deuda       32696 non-null  str           
 10  estado_reparadora      40342 non-null  str           
 11  sub_estado_reparadora  40339 non-null  str           
 12  tipo de Liquidacion    10086 non-null  str           
 13  Potencial Cr

In [29]:
import io
import json
import os
import re
from datetime import datetime
from typing import Dict, List, Optional, Tuple

import pandas as pd
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload

import openpyxl
from openpyxl.utils.dataframe import dataframe_to_rows


# ======================================================
# 1) Credenciales (Colab / GitHub) ‚Äî CON ESCRITURA
# ======================================================
def get_credentials() -> Credentials:
    try:
        from google.colab import userdata  # type: ignore
        mi_json = userdata.get("MI_JSON")
        if not mi_json:
            raise ValueError("MI_JSON no encontrado en Colab userdata")
        info = json.loads(mi_json) if isinstance(mi_json, str) else mi_json
        print("Entorno detectado: Google Colab")
    except Exception:
        mi_json = os.environ.get("MI_JSON")
        if not mi_json:
            raise ValueError("MI_JSON no encontrado como variable de entorno (GitHub/local)")
        info = json.loads(mi_json)
        print("Entorno detectado: GitHub / local")

    return Credentials.from_service_account_info(
        info,
        scopes=["https://www.googleapis.com/auth/drive"],
    )

creds = get_credentials()
drive_service = build("drive", "v3", credentials=creds)


# ======================================================
# 2) Helpers: meses / archivos
# ======================================================
MES_MAP = {"ene": 1, "feb": 2, "mar": 3, "abr": 4, "may": 5, "jun": 6, "jul": 7, "ago": 8, "sep": 9, "oct": 10, "nov": 11, "dic": 12}
MES_NOMBRE = {1: "Enero", 2: "Febrero", 3: "Marzo", 4: "Abril", 5: "Mayo", 6: "Junio", 7: "Julio", 8: "Agosto", 9: "Septiembre", 10: "Octubre", 11: "Noviembre", 12: "Diciembre"}

def sheet_name_from_date(dt: datetime) -> str:
    return f"{MES_NOMBRE[dt.month]} {dt.year}"

def parse_range_from_filename(name: str) -> Optional[Tuple[int, int, int, int]]:
    m = re.search(r"([A-Za-z]{3})(\d{2})\s*-\s*([A-Za-z]{3})(\d{2})", name, flags=re.IGNORECASE)
    if not m:
        return None
    m1, y1, m2, y2 = m.group(1).lower(), m.group(2), m.group(3).lower(), m.group(4)
    if m1 not in MES_MAP or m2 not in MES_MAP:
        return None
    return (2000 + int(y1), MES_MAP[m1], 2000 + int(y2), MES_MAP[m2])

def month_index(year: int, month: int) -> int:
    return year * 12 + month

def file_covers_month(file_range: Tuple[int, int, int, int], target_dt: datetime) -> bool:
    sy, sm, ey, em = file_range
    t = month_index(target_dt.year, target_dt.month)
    a = month_index(sy, sm)
    b = month_index(ey, em)
    return a <= t <= b

def is_df_empty_like(df: Optional[pd.DataFrame]) -> bool:
    if df is None or df.shape[0] == 0:
        return True
    return df.dropna(how="all").shape[0] == 0

def shift_month(dt: datetime, n: int) -> datetime:
    y = dt.year + (dt.month - 1 + n) // 12
    m = (dt.month - 1 + n) % 12 + 1
    return datetime(y, m, 1)


# ======================================================
# 3) Buscar el archivo en carpeta, bajar workbook completo
# ======================================================
def list_assignment_files_in_folder(folder_id: str) -> List[Dict]:
    q = f"'{folder_id}' in parents and trashed=false and name contains 'Asignaciones de Cartera'"
    files: List[Dict] = []
    page_token = None
    while True:
        resp = drive_service.files().list(
            q=q, fields="nextPageToken, files(id,name,mimeType,modifiedTime)", pageToken=page_token
        ).execute()
        for f in resp.get("files", []):
            fr = parse_range_from_filename(f.get("name", ""))
            if fr:
                f["parsed_range"] = fr
                files.append(f)
        page_token = resp.get("nextPageToken")
        if not page_token:
            break
    if not files:
        raise ValueError("No encontr√© archivos 'Asignaciones de Cartera' con rango tipo Ene26-Abr26 en la carpeta.")
    return files

def pick_file_for_month(files_meta: List[Dict], target_dt: datetime) -> Dict:
    candidates = []
    for f in files_meta:
        fr = f["parsed_range"]
        if file_covers_month(fr, target_dt):
            sy, sm, ey, em = fr
            span = month_index(ey, em) - month_index(sy, sm)
            candidates.append((span, f.get("modifiedTime", ""), f))
    if not candidates:
        raise ValueError("No hay archivo que cubra el mes objetivo (revisa nombres/rangos).")
    candidates.sort(key=lambda x: (x[0], x[1]))
    min_span = candidates[0][0]
    same_span = [c for c in candidates if c[0] == min_span]
    same_span.sort(key=lambda x: x[1], reverse=True)
    return same_span[0][2]

def download_file_to_bytes(file_id: str) -> bytes:
    buf = io.BytesIO()
    request = drive_service.files().get_media(fileId=file_id)
    downloader = MediaIoBaseDownload(buf, request)
    done = False
    while not done:
        _, done = downloader.next_chunk()
    return buf.getvalue()

def upload_bytes_overwrite(file_id: str, content: bytes):
    media = MediaIoBaseUpload(
        io.BytesIO(content),
        mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
        resumable=True
    )
    drive_service.files().update(fileId=file_id, media_body=media).execute()


# ======================================================
# 4) PATCH SIN MERGE ‚Äî con regla especial para Tipo de Liquidacion
# ======================================================
def patch_excel_inplace_by_id_deuda(df_excel: pd.DataFrame, df_actualizado: pd.DataFrame) -> pd.DataFrame:
    df_excel = df_excel.copy()
    df_actualizado = df_actualizado.copy()

    df_excel.columns = [c.strip() for c in df_excel.columns]
    df_actualizado.columns = [c.strip() for c in df_actualizado.columns]

    if "Id deuda" not in df_excel.columns:
        raise ValueError("El Excel no tiene columna 'Id deuda'")
    if "Id deuda" not in df_actualizado.columns:
        raise ValueError("Tu df (actualizado) no tiene columna 'Id deuda'")

    # Crear SOLO las 2 columnas nuevas permitidas si no existen
    for newcol in ["fecha mensaje", "Potencial Credito"]:
        if newcol not in df_excel.columns:
            df_excel[newcol] = pd.NA

    # mapeo df -> excel (NO actualizar ID_reparadora)
    col_map_df = {
        "Ahorro total": "Ahorro total",
        "Por cobrar": "Por cobrar",
        "Ahorro medio": "Ahorro medio",
        "fecha mensaje": "fecha mensaje",
        "ultimo contacto": "ultimo contacto",
        "Estado Deuda": "estado_deuda",
        "sub_estado_deuda": "sub_estado_deuda",
        "estado_reparadora": "estado_reparadora",
        "sub_estado_reparadora": "sub_estado_reparadora",
        "Tipo de Liquidacion": "tipo de Liquidacion",
        "Potencial Credito": "Potencial Credito",
    }

    missing_src = [src for src in col_map_df.values() if src not in df_actualizado.columns]
    if missing_src:
        raise ValueError(
            f"Tu df no tiene estas columnas fuente: {missing_src}\n"
            f"Columnas df: {list(df_actualizado.columns)}"
        )

    # Llaves
    df_excel["Id deuda"] = df_excel["Id deuda"].astype(str).str.strip()
    df_actualizado["Id deuda"] = df_actualizado["Id deuda"].astype(str).str.strip()

    df_excel_idx = df_excel.set_index("Id deuda", drop=False)

    # Fuente (1 fila por Id deuda)
    src_cols = ["Id deuda"] + list(set(col_map_df.values()))
    df_src = (
        df_actualizado[src_cols]
        .drop_duplicates(subset=["Id deuda"], keep="last")
        .set_index("Id deuda")
        .reindex(df_excel_idx.index)  # NO agrega filas
    )

    # 1) Overwrite normal para TODAS excepto Tipo de Liquidacion
    normal_update_cols = [
        "Ahorro total",
        "Por cobrar",
        "Ahorro medio",
        "fecha mensaje",
        "ultimo contacto",
        "Estado Deuda",
        "sub_estado_deuda",
        "estado_reparadora",
        "sub_estado_reparadora",
        "Potencial Credito",
    ]

    for dest in normal_update_cols:
        src = col_map_df[dest]
        df_excel_idx[dest] = df_src[src].values

    # 2) Regla especial para Tipo de Liquidacion:
    #    - Si df trae NaN / vac√≠o => NO actualizar (dejar el valor del archivo)
    dest = "Tipo de Liquidacion"
    src = col_map_df[dest]
    new_vals = df_src[src]

    # vac√≠o si NaN o string vac√≠o/espacios
    mask_keep_old = new_vals.isna() | (new_vals.astype(str).str.strip() == "")

    old_vals = df_excel_idx[dest] if dest in df_excel_idx.columns else pd.Series(index=df_excel_idx.index, dtype="object")
    combined = new_vals.copy()
    combined[mask_keep_old] = old_vals[mask_keep_old]

    df_excel_idx[dest] = combined.values

    out = df_excel_idx.reset_index(drop=True)

    # Reordenar columnas nuevas
    if "ultimo contacto" in out.columns and "fecha mensaje" in out.columns:
        cols = list(out.columns)
        cols.remove("fecha mensaje")
        idx = cols.index("ultimo contacto") + 1
        cols.insert(idx, "fecha mensaje")
        out = out[cols]

    if "MORA_CREDITO" in out.columns and "Potencial Credito" in out.columns:
        cols = list(out.columns)
        cols.remove("Potencial Credito")
        idx = cols.index("MORA_CREDITO") + 1
        cols.insert(idx, "Potencial Credito")
        out = out[cols]

    return out


# ======================================================
# 5) Reemplazar HOJA en el workbook y subir a Drive
# ======================================================
def replace_sheet_with_df(wb: openpyxl.Workbook, sheet_name: str, df_sheet: pd.DataFrame):
    if sheet_name not in wb.sheetnames:
        wb.create_sheet(sheet_name)
    ws = wb[sheet_name]
    ws.delete_rows(1, ws.max_row)

    for r in dataframe_to_rows(df_sheet, index=False, header=True):
        ws.append(r)

def pick_month_sheet_nonempty_from_workbook_bytes(xlsx_bytes: bytes, max_back_months: int = 6) -> Tuple[str, pd.DataFrame, openpyxl.Workbook]:
    wb = openpyxl.load_workbook(io.BytesIO(xlsx_bytes))
    today = datetime.today()

    for back in range(0, max_back_months + 1):
        sheet = sheet_name_from_date(shift_month(today, -back))
        if sheet not in wb.sheetnames:
            continue
        df_try = pd.read_excel(io.BytesIO(xlsx_bytes), sheet_name=sheet, engine="openpyxl")
        if is_df_empty_like(df_try):
            continue
        return sheet, df_try, wb

    raise RuntimeError("No encontr√© una hoja no vac√≠a en el workbook dentro del rango de meses.")

def update_drive_excel_file(folder_id: str, df_actualizado: pd.DataFrame, max_back_months: int = 6):
    files_meta = list_assignment_files_in_folder(folder_id)
    chosen = pick_file_for_month(files_meta, datetime.today())

    xlsx_bytes = download_file_to_bytes(chosen["id"])
    sheet_used, df_excel_mes, wb = pick_month_sheet_nonempty_from_workbook_bytes(xlsx_bytes, max_back_months=max_back_months)

    print(f"‚úÖ Base encontrada\n   Archivo: {chosen['name']}\n   Hoja:    {sheet_used}")

    df_excel_actualizado = patch_excel_inplace_by_id_deuda(df_excel_mes, df_actualizado)
    replace_sheet_with_df(wb, sheet_used, df_excel_actualizado)

    out_buf = io.BytesIO()
    wb.save(out_buf)
    upload_bytes_overwrite(chosen["id"], out_buf.getvalue())

    print("‚úÖ Archivo actualizado en Drive (sobrescrito).")


# ======================================================
# 6) EJECUCI√ìN FINAL
# ======================================================
FOLDER_ID = "1cf2p3R7iM0xowAt4muEruDwxZoZqD_jB"
update_drive_excel_file(folder_id=FOLDER_ID, df_actualizado=df, max_back_months=6)

update_drive_excel_file(folder_id=FOLDER_ID, df_actualizado=df, max_back_months=6)

Entorno detectado: GitHub / local


‚úÖ Base encontrada
   Archivo: Asignaciones de Cartera Ene26-Abr26.xlsx
   Hoja:    Enero 2026


‚úÖ Archivo actualizado en Drive (sobrescrito).


‚úÖ Base encontrada
   Archivo: Asignaciones de Cartera Ene26-Abr26.xlsx
   Hoja:    Enero 2026


‚úÖ Archivo actualizado en Drive (sobrescrito).
