In [24]:
import io
import json
import os
import re
from datetime import datetime
from typing import Dict, List, Optional, Tuple

import pandas as pd
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload


# ======================================================
# 1) Credenciales (Colab usa MI_JSON desde userdata; fuera de Colab usa MI_JSON env)
# ======================================================
def get_credentials() -> Credentials:
    """
    - En Colab: usa MI_JSON desde google.colab.userdata (NO os.environ)
    - En GitHub/local: usa variable de entorno MI_JSON
    """
    info: Dict

    try:
        from google.colab import userdata  # type: ignore

        mi_json = userdata.get("MI_JSON")
        if not mi_json:
            raise ValueError("MI_JSON no encontrado en Colab userdata")
        info = json.loads(mi_json)
        print("Entorno detectado: Google Colab")
    except Exception:
        mi_json = os.environ.get("MI_JSON")
        if not mi_json:
            raise ValueError("MI_JSON no encontrado como variable de entorno (GitHub/local)")
        info = json.loads(mi_json)
        print("Entorno detectado: GitHub / local")

    return Credentials.from_service_account_info(
        info,
        scopes=[
            "https://www.googleapis.com/auth/drive.readonly",
            "https://www.googleapis.com/auth/spreadsheets.readonly",
        ],
    )


creds = get_credentials()
drive_service = build("drive", "v3", credentials=creds)


# ======================================================
# 2) Helpers: meses en espa√±ol + parsing del nombre del archivo
# ======================================================
MES_MAP = {
    "ene": 1,
    "feb": 2,
    "mar": 3,
    "abr": 4,
    "may": 5,
    "jun": 6,
    "jul": 7,
    "ago": 8,
    "sep": 9,
    "oct": 10,
    "nov": 11,
    "dic": 12,
}
MES_NOMBRE = {
    1: "Enero",
    2: "Febrero",
    3: "Marzo",
    4: "Abril",
    5: "Mayo",
    6: "Junio",
    7: "Julio",
    8: "Agosto",
    9: "Septiembre",
    10: "Octubre",
    11: "Noviembre",
    12: "Diciembre",
}


def sheet_name_from_date(dt: datetime) -> str:
    """Ej: datetime(2025,12,...) -> 'Diciembre 2025' """
    return f"{MES_NOMBRE[dt.month]} {dt.year}"


def parse_range_from_filename(name: str) -> Optional[Tuple[int, int, int, int]]:
    """
    Espera nombres tipo:
      'Asignaciones de Cartera Ene26-Abr26.xlsx'
      'Asignaciones de Cartera Sep25-Dic25.xlsx'
    Retorna (start_year, start_month, end_year, end_month) o None si no matchea.
    """
    m = re.search(r"([A-Za-z]{3})(\d{2})\s*-\s*([A-Za-z]{3})(\d{2})", name, flags=re.IGNORECASE)
    if not m:
        return None

    m1, y1, m2, y2 = m.group(1).lower(), m.group(2), m.group(3).lower(), m.group(4)
    if m1 not in MES_MAP or m2 not in MES_MAP:
        return None

    start_month = MES_MAP[m1]
    end_month = MES_MAP[m2]
    start_year = 2000 + int(y1)
    end_year = 2000 + int(y2)

    return (start_year, start_month, end_year, end_month)


def month_index(year: int, month: int) -> int:
    """Convierte (year, month) a √≠ndice comparable."""
    return year * 12 + month


def file_covers_month(file_range: Tuple[int, int, int, int], target_dt: datetime) -> bool:
    sy, sm, ey, em = file_range
    t = month_index(target_dt.year, target_dt.month)
    a = month_index(sy, sm)
    b = month_index(ey, em)
    return a <= t <= b


def is_df_empty_like(df: Optional[pd.DataFrame]) -> bool:
    """
    Considera 'vac√≠a' si:
    - df es None
    - df tiene 0 filas
    - o todas las filas est√°n completamente NaN
    """
    if df is None or df.shape[0] == 0:
        return True
    return df.dropna(how="all").shape[0] == 0


# ======================================================
# 3) Listar archivos en carpeta y elegir el correcto por mes
# ======================================================
def list_assignment_files_in_folder(folder_id: str) -> List[Dict]:
    """
    Lista archivos en la carpeta cuyo nombre contenga 'Asignaciones de Cartera'
    y devuelve una lista con metadatos: id, name, mimeType, modifiedTime, parsed_range
    """
    q = f"'{folder_id}' in parents and trashed=false and name contains 'Asignaciones de Cartera'"
    files: List[Dict] = []
    page_token = None

    while True:
        resp = (
            drive_service.files()
            .list(q=q, fields="nextPageToken, files(id,name,mimeType,modifiedTime)", pageToken=page_token)
            .execute()
        )

        for f in resp.get("files", []):
            fr = parse_range_from_filename(f.get("name", ""))
            if fr:
                f["parsed_range"] = fr
                files.append(f)

        page_token = resp.get("nextPageToken")
        if not page_token:
            break

    if not files:
        raise ValueError(
            "No encontr√© archivos 'Asignaciones de Cartera' con rango tipo Ene26-Abr26 dentro de la carpeta."
        )

    return files


def pick_file_for_month(files_meta: List[Dict], target_dt: datetime) -> Dict:
    """
    Escoge el archivo cuya ventana (en el nombre) cubra el mes target_dt.
    Si hay varios, elige el de rango m√°s corto (m√°s espec√≠fico) y si empatan, el m√°s reciente.
    """
    candidates = []
    for f in files_meta:
        fr = f["parsed_range"]
        if file_covers_month(fr, target_dt):
            sy, sm, ey, em = fr
            span = month_index(ey, em) - month_index(sy, sm)
            candidates.append((span, f.get("modifiedTime", ""), f))

    if candidates:
        candidates.sort(key=lambda x: (x[0], x[1]))  # menor span, luego por modifiedTime asc
        min_span = candidates[0][0]
        same_span = [c for c in candidates if c[0] == min_span]
        same_span.sort(key=lambda x: x[1], reverse=True)  # m√°s reciente primero
        return same_span[0][2]

    t = month_index(target_dt.year, target_dt.month)

    past = []
    for f in files_meta:
        sy, sm, ey, em = f["parsed_range"]
        end_i = month_index(ey, em)
        if end_i <= t:
            past.append((end_i, f.get("modifiedTime", ""), f))
    if past:
        past.sort(key=lambda x: (x[0], x[1]), reverse=True)
        return past[0][2]

    future = []
    for f in files_meta:
        sy, sm, ey, em = f["parsed_range"]
        start_i = month_index(sy, sm)
        if start_i >= t:
            future.append((start_i, f.get("modifiedTime", ""), f))
    if future:
        future.sort(key=lambda x: (x[0], x[1]))
        return future[0][2]

    raise ValueError("No se pudo escoger un archivo por fecha (revisa nombres/rangos).")


# ======================================================
# 4) Descargar archivo (Google Sheets o Excel) a memoria
# ======================================================
def download_file_to_buffer(file_id: str, mime_type: str) -> io.BytesIO:
    buffer = io.BytesIO()

    if mime_type == "application/vnd.google-apps.spreadsheet":
        request = drive_service.files().export_media(
            fileId=file_id,
            mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
        )
    else:
        request = drive_service.files().get_media(fileId=file_id)

    downloader = MediaIoBaseDownload(buffer, request)
    done = False
    while not done:
        _, done = downloader.next_chunk()

    buffer.seek(0)
    return buffer


# ======================================================
# 5) Intentar leer hoja del mes target; si est√° vac√≠a -> retroceder mes a mes
# ======================================================
def shift_month(dt: datetime, n: int) -> datetime:
    """Mueve dt n meses (n puede ser negativo)."""
    y = dt.year + (dt.month - 1 + n) // 12
    m = (dt.month - 1 + n) % 12 + 1
    return datetime(y, m, 1)


def load_assignment_base_from_folder(
    folder_id: str, max_back_months: int = 24
) -> Tuple[pd.DataFrame, Dict, str]:
    """
    Busca base del mes actual; si no existe o est√° vac√≠a,
    busca mes anterior (incluyendo cambio de archivo si aplica).
    Retorna: (df, file_meta, sheet_name_usada)
    """
    files_meta = list_assignment_files_in_folder(folder_id)
    today = datetime.today()

    last_error: Optional[Exception] = None

    for back in range(0, max_back_months + 1):
        target_dt = shift_month(today, -back)
        target_sheet = sheet_name_from_date(target_dt)
        chosen = pick_file_for_month(files_meta, target_dt)

        try:
            buffer = download_file_to_buffer(chosen["id"], chosen["mimeType"])
            df = pd.read_excel(buffer, sheet_name=target_sheet, engine="openpyxl")

            if is_df_empty_like(df):
                print(f"üü° {target_sheet} encontrado pero vac√≠o en: {chosen['name']} -> probando mes anterior...")
                continue

            print("‚úÖ Base encontrada")
            print(f"   Archivo: {chosen['name']}")
            print(f"   Hoja:    {target_sheet}")
            return df, chosen, target_sheet

        except Exception as e:
            last_error = e
            print(
                f"üü† No se pudo usar {target_sheet} en {chosen['name']} ({type(e).__name__}) -> probando mes anterior..."
            )

    raise RuntimeError(
        f"No encontr√© una hoja v√°lida en los √∫ltimos {max_back_months} meses. "
        f"√öltimo error: {repr(last_error)}"
    )


# ======================================================
# 6) EJECUCI√ìN
# ======================================================
FOLDER_ID = "1cf2p3R7iM0xowAt4muEruDwxZoZqD_jB"

df, meta_file, sheet_used = load_assignment_base_from_folder(
    folder_id=FOLDER_ID,
    max_back_months=24,
)

df.head()

Entorno detectado: Google Colab
‚úÖ Base encontrada
   Archivo: Asignaciones de Cartera Ene26-Abr26.xlsx
   Hoja:    Enero 2026


Unnamed: 0,Referencia,Cedula,Nombre del cliente,Id deuda,correo,Deudas Activas,BANCO,N√∫mero de Cr√©dito,D_BRAVO,DBT,...,I_ESPERADO,Negociador,Tipo Elegible,P_Cierre_norm,D√≠as Atraso liquidacion,Estado PL,Ultima Liquidacion,dias_desde_ultima_liq,Priority_level,Bucket
0,3012650,52216213,CLAUDIA PATRICIA AVENDANO CALDERON,111266,claudiapa426@hotmail.com,2,Scotiabank Citibank,318121778,9256000.0,58255000.0,...,125415.711006,Negodito,,0.633814,,Al d√≠a,NaT,,Prioridad 2,
1,3143959063,194588,Gerardo Gonzalez Cantor,1027469,gerargoca@hotmail.com,2,CREDIVALORES,¬¥401093318,8830800.0,57676800.0,...,63448.507718,Jerry Arturo Hernandez Sanchez,,0.610339,837.0,Atrasado,2023-03-15,1029.0,Prioridad 1,10.0
2,3143959063,194588,Gerardo Gonzalez Cantor,1027470,gerargoca@hotmail.com,2,Serfinanza,22965756,24366300.0,57676800.0,...,52189.380903,Jerry Arturo Hernandez Sanchez,,0.43428,,Al d√≠a,2023-03-15,1029.0,Prioridad 1,10.0
3,3103134531,328330,Otoniel Tavera Rodriguez,1427049,otoniel3617@gmail.com,3,Banco Popular,513010282,13829550.0,29351556.0,...,54702.785025,Jerry Arturo Hernandez Sanchez,,0.274687,0.0,Al d√≠a,2024-07-03,553.0,Prioridad 1,10.0
4,3196892494,52902886,Diana Marcela Suarez,404243,marcelasuarezladino@outlook.com,5,Banco Popular,346383,1921650.0,10126350.0,...,9581.3469,Dayana Isabel Ojito Ortiz,,0.34625,1173.0,Atrasado,NaT,,Prioridad 1,


In [25]:
df.columns

Index(['Referencia', 'Cedula', 'Nombre del cliente', 'Id deuda', 'correo',
       'Deudas Activas', 'BANCO', 'N√∫mero de Cr√©dito', 'D_BRAVO', 'DBT',
       'MORA', 'Dias de Atraso', 'Apartado Mensual', 'Fecha inicio',
       'vehiculo', 'Meses en el Programa', 'tipo_cliente', 'Comisi√≥n Mensual',
       'Tipo de cobro', 'CE', 'Estado Deuda', 'estado_novacion', 'Fecha PL',
       'sub_estado_deuda', 'ID_reparadora', 'estado_reparadora',
       'sub_estado_reparadora', 'dias_mora_ingreso', 'ultima_actividad',
       'PB_PL', 'estado_estructuracion', 'estado_flujo_liquidacion',
       'Ahorro total', 'Ahorro medio', 'Por cobrar', 'BANCOS_ESTANDAR',
       'Descuento', 'ultimo contacto', 'Mora_estructurado', 'MORA_CREDITO',
       'ultimo Pab', 'ultima act', 'Tipo de Liquidacion',
       'Clasificaci√≥n Banco', 'Apartados 30%', 'Plazo', 'Estructurable',
       'Potencial', '# Pagos', 'P_Pab', 'S_PAb', 'Potencial_Estructurados',
       'Tipo_Liq_calc', 'P_cierre_tradicional', 'P_cierre_Cre

In [26]:
# ---------------------------------------
# PASO 1 ‚Äî Construir base de cartera asignada
# ---------------------------------------

# Trabajamos sobre copia por seguridad
df_cartera = df.copy()

# Columnas que queremos conservar
cols_cartera = [
    "Referencia",
    "Id deuda",
    "Negociador",
    "BANCOS_ESTANDAR",
    "Descuento",
    "D_BRAVO",
    "Estructurable",
    "Potencial",
    "Meses en el Programa",
    "Tipo de Liquidacion",
    "Bucket",
    "Ahorro total",
    "Ahorro medio"
]

# Validaci√≥n: columnas faltantes
faltantes = [c for c in cols_cartera if c not in df_cartera.columns]
if faltantes:
    raise ValueError(f"Faltan estas columnas en df: {faltantes}")

# Nos quedamos solo con esas columnas
df_cartera = df_cartera[cols_cartera].copy()

# Opcional: eliminar duplicados por deuda (clave natural)
df_cartera = df_cartera.drop_duplicates(subset=["Id deuda"])

# Reset index limpio
df_cartera = df_cartera.reset_index(drop=True)

df_cartera.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41258 entries, 0 to 41257
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Referencia            41258 non-null  int64  
 1   Id deuda              41258 non-null  int64  
 2   Negociador            41258 non-null  object 
 3   BANCOS_ESTANDAR       41258 non-null  object 
 4   Descuento             36308 non-null  float64
 5   D_BRAVO               41258 non-null  float64
 6   Estructurable         41258 non-null  int64  
 7   Potencial             36171 non-null  float64
 8   Meses en el Programa  41258 non-null  int64  
 9   Tipo de Liquidacion   41258 non-null  object 
 10  Bucket                23172 non-null  float64
 11  Ahorro total          41258 non-null  int64  
 12  Ahorro medio          41258 non-null  float64
dtypes: float64(5), int64(5), object(3)
memory usage: 4.1+ MB


In [27]:
df_cartera

Unnamed: 0,Referencia,Id deuda,Negociador,BANCOS_ESTANDAR,Descuento,D_BRAVO,Estructurable,Potencial,Meses en el Programa,Tipo de Liquidacion,Bucket,Ahorro total,Ahorro medio
0,3012650,111266,Negodito,Scotiabank Colpatria,0.7126,9256000.0,1,5.0,116,Cr. Parcial,,0,0.0
1,3143959063,1027469,Jerry Arturo Hernandez Sanchez,Credivalores,0.3924,8830800.0,1,7.0,40,Cr. Parcial,10.0,0,0.0
2,3143959063,1027470,Jerry Arturo Hernandez Sanchez,Serfinanza,0.1644,24366300.0,0,22.0,40,Cr. Parcial,10.0,0,0.0
3,3103134531,1427049,Jerry Arturo Hernandez Sanchez,Banco Popular,0.4800,13829550.0,0,64.0,24,Tradicional,10.0,0,0.0
4,3196892494,404243,Dayana Isabel Ojito Ortiz,Banco Popular,0.4800,1921650.0,1,40.0,75,Cr. Parcial,,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
41253,3012454834,1984328,Gabriela Saavedra Latorre,Serfinanza,,2082650.0,0,,4,Tradicional,10.0,63746,15936.5
41254,3012454834,1984329,Gabriela Saavedra Latorre,Colsubsidio,0.0607,3319800.0,0,29.0,4,Tradicional,10.0,63746,15936.5
41255,3235356023,1479854,Norbey Alejandro Duarte Ramirez,Banco de Bogot√°,0.6034,1140300.0,1,3.0,22,Cr. Parcial,0.0,4027,2013.5
41256,3235356023,1479855,Norbey Alejandro Duarte Ramirez,Rappipay,0.4635,2045400.0,1,7.0,22,Cr. Parcial,0.0,4027,2013.5


In [28]:
# -*- coding: utf-8 -*-
import os
import json
import pandas as pd
import gspread
from datetime import datetime
from google.oauth2.service_account import Credentials

# =====================================
# 1. Cargar MI_JSON (Colab / GitHub)
# =====================================
def get_mi_json():
    try:
        # ---- Colab ----
        from google.colab import userdata
        mi_json = userdata.get("MI_JSON")
        if not mi_json:
            raise RuntimeError("MI_JSON no encontrado en Colab userdata")
        print("Entorno detectado: Google Colab")
        return mi_json
    except Exception:
        # ---- GitHub Actions / local ----
        mi_json = os.environ.get("MI_JSON")
        if not mi_json:
            raise RuntimeError("MI_JSON no encontrado como variable de entorno (GitHub/local)")
        if os.environ.get("GITHUB_ACTIONS") == "true":
            print("Entorno detectado: GitHub Actions")
        else:
            print("Entorno detectado: Local")
        return mi_json

mi_json = get_mi_json()
creds_dict = json.loads(mi_json)

SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
creds = Credentials.from_service_account_info(creds_dict, scopes=SCOPES)
gc = gspread.authorize(creds)

# =====================================
# 2. Definir a√±o objetivo (regla 6 d√≠as)
# =====================================
today = datetime.today()

if today.month == 1 and today.day <= 6:
    target_year = today.year - 1
else:
    target_year = today.year

print(f"üìÖ A√±o objetivo detectado: {target_year}")

# =====================================
# 3. Abrir spreadsheet
# =====================================
SPREADSHEET_ID = "1O8OHuVhgwhLw8XYEBf1uBzLYrxQ45rPiZecHOnAa1Go"
sh = gc.open_by_key(SPREADSHEET_ID)

# =====================================
# 4. Buscar hoja por a√±o en el nombre
# =====================================
worksheet_found = None
for ws in sh.worksheets():
    if str(target_year) in ws.title:
        worksheet_found = ws
        break

if worksheet_found is None:
    raise ValueError(
        f"No se encontr√≥ ninguna hoja cuyo nombre contenga el a√±o {target_year}"
    )

print(f"‚úÖ Hoja seleccionada: '{worksheet_found.title}'")

# =====================================
# 5. Leer a DataFrame
# =====================================
records = worksheet_found.get_all_records()

df_act = pd.DataFrame(records)

print("‚úÖ df_act cargado correctamente")
print("Shape:", df_act.shape)
print(df_act.head())

Entorno detectado: Google Colab
üìÖ A√±o objetivo detectado: 2026
‚úÖ Hoja seleccionada: 'act 2026'
‚úÖ df_act cargado correctamente
Shape: (1350, 7)
                                        observations  \
0  8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...   
1  8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...   
2  8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...   
3  8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...   
4  8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...   

                               end  debt_id  payment_to_bank  \
0  vivian.rodriguez@gobravo.com.co  1898742  (314820000,COP)   
1  vivian.rodriguez@gobravo.com.co  1898737  (270710000,COP)   
2  vivian.rodriguez@gobravo.com.co  1898741  (313720000,COP)   
3  vivian.rodriguez@gobravo.com.co  1898738  (299530000,COP)   
4  vivian.rodriguez@gobravo.com.co  1898739  (304260000,COP)   

           inserted_at  bank_reference       Status_Act  
0  2026-01-02 13:10:20      3157363121  CONTRAPROPUESTA  
1  202

In [29]:
df_act

Unnamed: 0,observations,end,debt_id,payment_to_bank,inserted_at,bank_reference,Status_Act
0,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898742,"(314820000,COP)",2026-01-02 13:10:20,3157363121,CONTRAPROPUESTA
1,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898737,"(270710000,COP)",2026-01-02 13:10:53,3157363121,CONTRAPROPUESTA
2,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898741,"(313720000,COP)",2026-01-02 13:09:48,3157363121,CONTRAPROPUESTA
3,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898738,"(299530000,COP)",2026-01-02 13:11:34,3157363121,CONTRAPROPUESTA
4,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898739,"(304260000,COP)",2026-01-02 13:12:05,3157363121,CONTRAPROPUESTA
...,...,...,...,...,...,...,...
1345,2:30pm//Daniela Sanchez// Me comunico con la t...,maria.sarta@gobravo.com.co,1925961,"(2900000000,COP)",2026-01-07 19:40:56,3136619901,DESCUENTO
1346,NasAcevedo//07/01/2026//2:43//Se realiza valid...,ximena.acevedo@gobravo.com.co,1934269,"(946575000,COP)",2026-01-07 19:45:08,3006557635,CONTRAPROPUESTA
1347,2:30pm//Daniela Sanchez// Me comunico con la t...,maria.sarta@gobravo.com.co,1925961,"(2900000000,COP)",2026-01-07 19:45:47,3136619901,EN_CREDITO
1348,2:46 p.m| Niyiret8068 || Tt recibe dt. Falabel...,niyiret.santos@gobravo.com.co,1797266,"(537400000,COP)",2026-01-07 19:46:35,1026556683,CONTRAPROPUESTA


In [30]:
import pandas as pd
import numpy as np
import re

df_act = df_act.copy()

s = df_act["inserted_at"]

# Pasar todo a string limpio (sin romper NaN)
s_str = s.astype("string").str.strip()

# Serie destino
dt = pd.Series(pd.NaT, index=df_act.index, dtype="datetime64[ns]")

# -----------------------------
# 1) Formato ISO: 2025-12-18 02:09:59 (o 2025-12-18T02:09:59Z)
# -----------------------------
mask_iso = s_str.str.match(r"^\d{4}-\d{2}-\d{2}", na=False)
iso_clean = (
    s_str[mask_iso]
    .str.replace("T", " ", regex=False)
    .str.replace("Z", "", regex=False)
)
dt.loc[mask_iso] = pd.to_datetime(iso_clean, errors="coerce")  # conserva hora

# -----------------------------
# 2) Formato con slash: 2/1/2025 (puede ser d/m/y o m/d/y)
# -----------------------------
mask_slash = s_str.str.contains(r"/", na=False) & (~mask_iso)

# extraer d√≠a/mes/a√±o como n√∫meros
parts = s_str[mask_slash].str.extract(r"^\s*(\d{1,2})/(\d{1,2})/(\d{4})\s*$")
a = pd.to_numeric(parts[0], errors="coerce")  # primera parte
b = pd.to_numeric(parts[1], errors="coerce")  # segunda parte

# reglas para decidir dayfirst vs monthfirst
mask_dayfirst = (a > 12) & (b <= 12)
mask_monthfirst = (b > 12) & (a <= 12)
mask_ambigua = ~(mask_dayfirst | mask_monthfirst)

idx_slash = parts.index

# dayfirst seguro
idx_day = idx_slash[mask_dayfirst.fillna(False)]
dt.loc[idx_day] = pd.to_datetime(s_str.loc[idx_day], errors="coerce", dayfirst=True)

# monthfirst seguro
idx_mon = idx_slash[mask_monthfirst.fillna(False)]
dt.loc[idx_mon] = pd.to_datetime(s_str.loc[idx_mon], errors="coerce", dayfirst=False)

# ambigua -> por defecto dayfirst=True (tu est√°ndar)
idx_amb = idx_slash[mask_ambigua.fillna(True)]
tmp = pd.to_datetime(s_str.loc[idx_amb], errors="coerce", dayfirst=True)

# si alguna ambigua falla, reintenta monthfirst
mask_fail = tmp.isna()
if mask_fail.any():
    tmp.loc[mask_fail] = pd.to_datetime(s_str.loc[idx_amb[mask_fail]], errors="coerce", dayfirst=False)

dt.loc[idx_amb] = tmp

# -----------------------------
# 3) Guardar resultado final
# -----------------------------
df_act["inserted_at"] = dt

# (Opcional) Ver cu√°ntos quedaron NaT
print("NaT en inserted_at:", df_act["inserted_at"].isna().sum())
print("dtype:", df_act["inserted_at"].dtype)

NaT en inserted_at: 0
dtype: datetime64[ns]


In [31]:
import pandas as pd
import numpy as np
import re

df_act["payment_to_bank"] = (
    df_act["payment_to_bank"]
    .astype(str)
    .str.extract(r"\(?\s*([\d]+)\s*,?\s*COP?\s*\)?", expand=False)
    .astype(float)
)

In [32]:
df_act['payment_to_bank'] = df_act['payment_to_bank']/100

In [33]:
df_act = df_act.rename(columns={"Status_Act": "CATEGORIA_PRED"})

In [34]:
df_act

Unnamed: 0,observations,end,debt_id,payment_to_bank,inserted_at,bank_reference,CATEGORIA_PRED
0,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898742,3148200.0,2026-01-02 13:10:20,3157363121,CONTRAPROPUESTA
1,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898737,2707100.0,2026-01-02 13:10:53,3157363121,CONTRAPROPUESTA
2,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898741,3137200.0,2026-01-02 13:09:48,3157363121,CONTRAPROPUESTA
3,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898738,2995300.0,2026-01-02 13:11:34,3157363121,CONTRAPROPUESTA
4,8:09 a.¬†m. 2/01/2026 VivianR// Se valida con c...,vivian.rodriguez@gobravo.com.co,1898739,3042600.0,2026-01-02 13:12:05,3157363121,CONTRAPROPUESTA
...,...,...,...,...,...,...,...
1345,2:30pm//Daniela Sanchez// Me comunico con la t...,maria.sarta@gobravo.com.co,1925961,29000000.0,2026-01-07 19:40:56,3136619901,DESCUENTO
1346,NasAcevedo//07/01/2026//2:43//Se realiza valid...,ximena.acevedo@gobravo.com.co,1934269,9465750.0,2026-01-07 19:45:08,3006557635,CONTRAPROPUESTA
1347,2:30pm//Daniela Sanchez// Me comunico con la t...,maria.sarta@gobravo.com.co,1925961,29000000.0,2026-01-07 19:45:47,3136619901,EN_CREDITO
1348,2:46 p.m| Niyiret8068 || Tt recibe dt. Falabel...,niyiret.santos@gobravo.com.co,1797266,5374000.0,2026-01-07 19:46:35,1026556683,CONTRAPROPUESTA


In [35]:
import pandas as pd

# ‚úÖ Compatibilidad ZoneInfo: en Py<3.9 (algunos entornos) usa backports
try:
    from zoneinfo import ZoneInfo
except ImportError:  # pragma: no cover
    from backports.zoneinfo import ZoneInfo  # type: ignore


def construir_timeline_mes(
    df_cartera: pd.DataFrame,
    df_act: pd.DataFrame,
    ref_date=None,
    tz: str = "America/Bogota",
) -> pd.DataFrame:
    """
    Timeline por deuda:
    - √öltima observaci√≥n antes del mes
    - Todas las observaciones del mes actual
    Requiere que df_act ya tenga columna 'CATEGORIA_PRED' si quieres usarla.
    """

    # =========================
    # 0) Copias defensivas
    # =========================
    df_c = df_cartera.copy()
    df_a = df_act.copy()

    # =========================
    # 1) Fecha de referencia
    # =========================
    tzinfo = ZoneInfo(tz)

    if ref_date is None:
        ref_date = pd.Timestamp.now(tzinfo)
    else:
        ref_date = pd.Timestamp(ref_date)
        if ref_date.tzinfo is None:
            ref_date = ref_date.tz_localize(tzinfo)
        else:
            ref_date = ref_date.tz_convert(tzinfo)

    month_start = ref_date.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
    next_month_start = month_start + pd.offsets.MonthBegin(1)

    # =========================
    # 2) Normalizar fechas
    # =========================
    df_a["inserted_at"] = pd.to_datetime(df_a["inserted_at"], errors="coerce")

    if getattr(df_a["inserted_at"].dt, "tz", None) is None:
        df_a["inserted_at"] = df_a["inserted_at"].dt.tz_localize(
            tzinfo,
            nonexistent="shift_forward",
            ambiguous="NaT",
        )
    else:
        df_a["inserted_at"] = df_a["inserted_at"].dt.tz_convert(tzinfo)

    # =========================
    # 3) Separar eventos
    # =========================
    mask_mes = (df_a["inserted_at"] >= month_start) & (df_a["inserted_at"] < next_month_start)

    df_mes = df_a.loc[mask_mes].copy()
    df_prev = df_a.loc[df_a["inserted_at"] < month_start].copy()

    # =========================
    # 4) √öltima antes del mes
    # =========================
    df_prev = df_prev.sort_values(["debt_id", "inserted_at"])
    ultima_prev = df_prev.groupby("debt_id", as_index=False).tail(1)
    ultima_prev["tipo_fila"] = "ultima_antes_mes"

    # =========================
    # 5) Todas las del mes
    # =========================
    df_mes = df_mes.sort_values(["debt_id", "inserted_at"])
    df_mes["tipo_fila"] = "mes_actual"

    # =========================
    # 6) Unir eventos
    # =========================
    eventos = pd.concat([ultima_prev, df_mes], ignore_index=True)

    # Asegura columnas m√≠nimas (evita KeyError si falta alguna)
    cols_eventos = [
        "bank_reference",
        "debt_id",
        "inserted_at",
        "end",
        "payment_to_bank",
        "CATEGORIA_PRED",
        "observations",
        "tipo_fila",
    ]
    for c in cols_eventos:
        if c not in eventos.columns:
            eventos[c] = pd.NA

    eventos = eventos[cols_eventos]

    # =========================
    # 7) Merge con cartera
    # =========================
    df_timeline = df_c.merge(
        eventos,
        left_on=["Referencia", "Id deuda"],
        right_on=["bank_reference", "debt_id"],
        how="left",
    )

    df_timeline = df_timeline.drop(columns=["bank_reference", "debt_id"])

    # =========================
    # 8) Orden final
    # =========================
    df_timeline = (
        df_timeline.sort_values(["Id deuda", "inserted_at"], na_position="first").reset_index(drop=True)
    )

    return df_timeline


# ===== EJECUCI√ìN =====
df_timeline = construir_timeline_mes(df_cartera, df_act)
df_timeline.head(20)

Unnamed: 0,Referencia,Id deuda,Negociador,BANCOS_ESTANDAR,Descuento,D_BRAVO,Estructurable,Potencial,Meses en el Programa,Tipo de Liquidacion,Bucket,Ahorro total,Ahorro medio,inserted_at,end,payment_to_bank,CATEGORIA_PRED,observations,tipo_fila
0,3012650,111266,Negodito,Scotiabank Colpatria,0.7126,9256000.0,1,5.0,116,Cr. Parcial,,0,0.0,NaT,,,,,
1,3122962933,225561,William Santiago Abril Esguerra,Banco de Occidente,0.5,2138165.0,1,6.0,80,Tradicional,10.0,324649,108216.3333,NaT,,,,,
2,3122962933,225562,William Santiago Abril Esguerra,Banco de Occidente,0.56,3299116.0,1,8.0,80,Tradicional,10.0,324649,108216.3333,NaT,,,,,
3,3122962933,225563,William Santiago Abril Esguerra,Tuya,0.6715,5959400.0,0,13.0,80,Tradicional,10.0,324649,108216.3333,NaT,,,,,
4,3196892494,404243,Dayana Isabel Ojito Ortiz,Banco Popular,0.48,1921650.0,1,40.0,75,Cr. Parcial,,0,0.0,NaT,,,,,
5,3183092531,427629,Laura Yineth Torres Moncada,Tuya,0.7126,7481250.0,0,19.0,73,Tradicional,,241,241.0,NaT,,,,,
6,3003853673,434218,Cindy Viviana Barrera Buitrago,Scotiabank Colpatria,0.58,1279300.0,1,4.0,72,Cr. Parcial,,0,0.0,NaT,,,,,
7,3003853673,434219,Cindy Viviana Barrera Buitrago,Scotiabank Colpatria,0.58,1408000.0,1,5.0,72,Cr. Parcial,,0,0.0,NaT,,,,,
8,3003853673,434220,Cindy Viviana Barrera Buitrago,Serfinanza,0.6584,2440000.0,1,7.0,72,Cr. Parcial,,0,0.0,NaT,,,,,
9,3142828344,445585,Vivian Caterin Rodriguez Verano,Banco Popular,0.54,3886592.0,1,43.0,72,Cr. Parcial,,0,0.0,NaT,,,,,


In [36]:
df_timeline

Unnamed: 0,Referencia,Id deuda,Negociador,BANCOS_ESTANDAR,Descuento,D_BRAVO,Estructurable,Potencial,Meses en el Programa,Tipo de Liquidacion,Bucket,Ahorro total,Ahorro medio,inserted_at,end,payment_to_bank,CATEGORIA_PRED,observations,tipo_fila
0,3012650,111266,Negodito,Scotiabank Colpatria,0.7126,9256000.0,1,5.0,116,Cr. Parcial,,0,0.0000,NaT,,,,,
1,3122962933,225561,William Santiago Abril Esguerra,Banco de Occidente,0.5000,2138165.0,1,6.0,80,Tradicional,10.0,324649,108216.3333,NaT,,,,,
2,3122962933,225562,William Santiago Abril Esguerra,Banco de Occidente,0.5600,3299116.0,1,8.0,80,Tradicional,10.0,324649,108216.3333,NaT,,,,,
3,3122962933,225563,William Santiago Abril Esguerra,Tuya,0.6715,5959400.0,0,13.0,80,Tradicional,10.0,324649,108216.3333,NaT,,,,,
4,3196892494,404243,Dayana Isabel Ojito Ortiz,Banco Popular,0.4800,1921650.0,1,40.0,75,Cr. Parcial,,0,0.0000,NaT,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41536,3225833196,2091518,Gabriela Saavedra Latorre,Banco Davivienda,0.3637,13695024.0,1,5.0,2,Tradicional,10.0,2507,626.7500,NaT,,,,,
41537,3225833196,2091519,Gabriela Saavedra Latorre,Scotiabank Colpatria,0.5000,50463309.0,0,15.0,2,Tradicional,10.0,2507,626.7500,NaT,,,,,
41538,3225833196,2091520,Gabriela Saavedra Latorre,Bancolombia,0.4700,82022139.0,0,24.0,2,Tradicional,10.0,2507,626.7500,NaT,,,,,
41539,3105507630,2091523,Julian Andres Ossa Bohorquez,Bancolombia,0.4700,130935011.0,0,30.0,2,Tradicional,10.0,1049515,209903.0000,NaT,,,,,


In [37]:
# -*- coding: utf-8 -*-
import os
import json
import pandas as pd
import gspread
from google.oauth2.service_account import Credentials

# =========================
# CONFIG
# =========================
SPREADSHEET_ID = "1H3sYEtkeu47POnu8xZMaMtID1Vj53YIcWblWeZ8d0rc"
GID = "1033250632"

# =========================
# 1) Leer MI_JSON seg√∫n entorno (Colab vs GitHub/local)
# =========================
def get_mi_json():
    # --- Colab ---
    try:
        from google.colab import userdata  # solo existe en Colab
        mi_json = userdata.get("MI_JSON")
        if not mi_json:
            raise RuntimeError("MI_JSON no encontrado en Colab userdata.")
        print("Entorno detectado: Google Colab")
        return mi_json
    except Exception:
        # --- GitHub Actions / local ---
        mi_json = os.environ.get("MI_JSON")
        if not mi_json:
            raise RuntimeError("MI_JSON no encontrado como variable de entorno (GitHub/local).")
        if os.environ.get("GITHUB_ACTIONS") == "true":
            print("Entorno detectado: GitHub Actions")
        else:
            print("Entorno detectado: Local")
        return mi_json

mi_json = get_mi_json()
info = json.loads(mi_json)

SCOPES = [
    "https://www.googleapis.com/auth/spreadsheets.readonly",
    "https://www.googleapis.com/auth/drive.readonly",
]
creds = Credentials.from_service_account_info(info, scopes=SCOPES)
gc = gspread.authorize(creds)

sh = gc.open_by_key(SPREADSHEET_ID)

# =========================
# 2) Resolver el nombre de la hoja a partir del gid
# =========================
meta = sh.fetch_sheet_metadata()
sheet_title = None
for s in meta.get("sheets", []):
    props = s.get("properties", {})
    if str(props.get("sheetId")) == str(GID):
        sheet_title = props.get("title")
        break

if not sheet_title:
    raise ValueError(f"No encontr√© ninguna hoja con gid={GID}. Revisa el link.")

ws = sh.worksheet(sheet_title)

# =========================
# 3) Leer a DataFrame
# =========================
values = ws.get_all_values()

if not values or len(values) < 2:
    df_liq = pd.DataFrame()
else:
    headers = values[0]
    rows = values[1:]
    df_liq = pd.DataFrame(rows, columns=headers)

print("‚úÖ Hoja le√≠da:", sheet_title)
print("Shape:", df_liq.shape)
print(df_liq.head(20))

Entorno detectado: Google Colab
‚úÖ Hoja le√≠da: BD del mes
Shape: (2658, 27)
   Pago a banco Deuda Resuelve  Fecha de Liquidaci√≥n Comisi√≥n  \
0        500000         5000100            6/01/2026  1222515   
1       1000000         7225050            6/01/2026            
2         78000           40000            5/01/2025   815466   
3        128000          196441            5/01/2025            
4        208000          200000            5/01/2025            
5         60000          600000            5/01/2025            
6         70000          720000            5/01/2025            
7         87000          900000            5/01/2025            
8        335000         3700000            5/01/2025            
9       3000000        17234700            5/01/2026  2135205   
10      2154000         9620100            6/01/2026  1493220   
11       180100          674100            6/01/2026    98800   
12       200000         1600200            6/01/2026   280040   
13      52

In [38]:
df_liq

Unnamed: 0,Pago a banco,Deuda Resuelve,Fecha de Liquidaci√≥n,Comisi√≥n,Negociador,Portafolio,Deuda Berex,Referencia,Nombre cliente,Banco,...,Tipo EST,Dia operativo,Check,Casa de cobro,Lider,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21
0,500000,5000100,6/01/2026,1222515,William Santiago Abril Esguerra,SI,1826700,3156617917,Diego Fernando Gaitan Quintero,Davivienda,...,comisi√≥n,2,,0,Juan Jose Hernandez Galvis,TRUE,"27,17%",,1681103,comisi√≥n
1,1000000,7225050,6/01/2026,,William Santiago Abril Esguerra,SI,1826701,3156617917,Diego Fernando Gaitan Quintero,Davivienda,...,,2,,0,Juan Jose Hernandez Galvis,FALSE,"0,00%",,1726199,comisi√≥n
2,78000,40000,5/01/2025,815466,Vivian Caterin Rodriguez Verano,SI,1892087,3104999127,Edison Fernando Hincapie Arcila,Bancolombia,...,comisi√≥n,1,,0,Natalia Valentina Castro Jimenez,TRUE,"-2145,96%",,1773378,comisi√≥n
3,128000,196441,5/01/2025,,Vivian Caterin Rodriguez Verano,SI,1892088,3104999127,Edison Fernando Hincapie Arcila,Bancolombia,...,,1,,0,Natalia Valentina Castro Jimenez,FALSE,"0,00%",,1203220,comisi√≥n
4,208000,200000,5/01/2025,,Vivian Caterin Rodriguez Verano,SI,1892089,3104999127,Edison Fernando Hincapie Arcila,Bancolombia,...,,1,,0,Natalia Valentina Castro Jimenez,FALSE,"0,00%",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2653,,,,,,,,,,,...,,,,0,,,,,,
2654,,,,,,,,,,,...,,,,0,,,,,,
2655,,,,,,,,,,,...,,,,0,,,,,,
2656,,,,,,,,,,,...,,,,0,,,,,,


In [39]:
pip install fuzzywuzzy[speedup]



In [40]:
from rapidfuzz import process, fuzz
import pandas as pd
import re

nombres_correctos = [
    'Bancolombia', 'Banco Falabella', 'Banco de Bogot√°', 'Banco Davivienda',
    'Scotiabank Colpatria', 'BBVA Colombia', 'SisteCredito', 'Banco AV Villas',
    'Banco de Occidente', 'Alkomprar', 'Tuya', 'Codensa', 'Ita√∫',
    'Serfinanza', 'Credivalores', 'Banco Popular', 'Rappipay',
    'Banco Finandina', 'Banco Caja Social', 'Covinoc', 'Bancoomeva',
    'Rapicredit', 'Credijamar', 'Flamingo', 'Zinobe', 'Bancamia',
    'Refinancia', 'Compensar', 'Pichincha', 'Colsubsidio', 'Mundo Mujer',
    'Agaval', 'Sistemcobro', 'Serlefin', 'Rappicard', 'Fincomercio',
    'GRUPO JURIDICO DEUDU', 'AECSA', 'Sufi', 'Comultrasan', 'Fundaci√≥n',
    'Cobrando', 'Aslegal', 'Coltefinanciera', 'Reestructura', 'Nu',
    'Juancho te Presta', 'Muebles', 'JOHN', 'GNB Sudameris', 'Confiar',
    'Baninca', 'Systemgroup', 'Efecty'
]

# Bancos que se usan con el prefijo "banco" completo como referencia
bancos_excepciones = ['Banco de Bogot√°', 'Bancolombia', 'Bancoomeva']

# ==============================
# 2. Sin√≥nimos / patrones por banco (texto original, sin limpiar)
# ==============================
PATRONES_POR_BANCO = {
    'Bancolombia': [
        'bancolombia', 'contento bancolombia', 'qnt bancolombia', 'sufi'
    ],
    'Banco Davivienda': [
        'davivienda', 'intercredito davivienda', 'gestiones profesionales davivienda',
        'inversionistas estrat√©gicos davivienda', 'inversionistas estrategicos davivienda',
        'management davivienda', 'davivienda cobrado sas', 'davivienda cobrando sas',
        'deudu davivienda', 'qnt davivienda'
    ],
    'BBVA Colombia': [
        'bbva', 'cobrando bbva', 'beta bbva', 'cobranzas beta origen: bbva',
        'aecsa bbva', 'grupo juridico bbva', 'grupo jur√≠dico bbva', 'qnt bbva'
    ],
    'Banco Falabella': [
        'falabella', 'bfalabella', 'bancofalab', 'bancofalab citisumma',
        'banco falabella casa de cobro', 'eyc falabella',
        'cobrando falabella', 'acr logros f ori falabella',
        'deudu falabella', 'deudo falabella', 'citisumma falabella',
        'logros factoring falabella'
    ],
    'Tuya': [
        ' tuya', 'viva-tuya', 'viva tuya', 'qnt tuya', 'aecsa tuya',
        'tuya contacto soluciones', 'tuya s.a contactosol', 'tuya s a contactosol',
        'tuya contacto soluciones', 'qnt tuya',
        '√©xito', 'exito', 'carulla', 'Alkosto', 'Corbeta'
    ],
    'Scotiabank Colpatria': [
        'scotiabank', 'skotiabank', 'colpatria', 'peruzzi skotiabank colpatria',
        'serlefin colpatria', 'adamantine scotiabank', 'gc andino colpatria',
        'scotiabank citibank', 'qnt colpatria', 'crc colpatria',
        'grupo consulto colpatria', 'grupo consultor andino colpatria',
        'gr.consulto colpatria', 'Codensa'
    ],
    'Banco de Bogot√°': [
        'banco de bogota', 'banco de bogot√°', 'qnt bogota', 'qnt bogot√°',
        'crear pa√≠s banco de bogot√°', 'crear pais banco de bogota'
    ],
    'Banco de Occidente': [
        'banco de occidente', 'qnt banco de occidente', 'deudu-banco de occidente',
        'deudu banco de occidente'
    ],
    'Banco Popular': [
        'banco popular', 'banco popular casa de cobro', 'banco popular contactosol',
        'banco popular contactosolsas', 'banco popular citisumma',
        'banco popular-adcore', 'deudu banco popular', 'peruzzicol bcopopular'
    ],
    'Banco AV Villas': [
        'av villas', 'banco av villas', 'grupo consultor andino av villas',
        'grupo juridico av villas', 'grupo jur√≠dico av villas',
        'ae csa av villas', 'aecsa av villas', 'crear pa√≠s banco av villas',
        'crear pais banco av villas', 'deudu av villas', 'qnt av villas'
    ],
    'Banco Caja Social': [
        'banco caja social', 'caja social', 'pic caja social'
    ],
    'Bancoomeva': [
        'banco coomeva', 'bancoomeva', 'coomeva'
    ],
    'Bancamia': [
        'bancamia', 'bancamia s.a', 'bancamia s a'
    ],
    'Mundo Mujer': [
        'banco mundo mujer', 'fundacion de la mujer', 'fundaci√≥n de la mujer'
    ],
    'SisteCredito': [
        'sistecredito', 'sistecr√©dito'
    ],
    'Covinoc': ['covinoc'],
    'Compensar': ['compensar'],
    'Pichincha': ['pichincha', 'pichincha educativo'],
    'Agaval': ['agaval'],
    'Banco Finandina': [
        'finandina', 'finandina incomercio', 'finandina incomercio'
    ],
    'Fincomercio': ['fincomercio'],
    'Serfinanza': [
        'serfinanza', 'serfinansa', 'serfinanza contactosol',
        'contacto solucion serfinanza', 'contacto soluci√≥n serfinanza'
    ],
    'Credijamar': [
        'credijamar', 'muebles jamar'
    ],
    'Juancho te Presta': ['juancho te presta'],
    'Rapicredit': ['rapicredit'],
    'Zinobe': ['zinobe'],
    'Coltefinanciera': ['coltefinanciera'],
    'Sistemcobro': ['sistemcobro'],
    'Systemgroup': ['systemgroup'],
    'Baninca': ['baninca'],
    'GNB Sudameris': ['gnb sudameris'],
    'Confiar': ['confiar'],
    'AECSA': ['aecsa'],
    'Comultrasan': ['comultrasan'],
    'Nu': ['nu bank', 'nubank', 'logros factoring nubank'],
    'Rappi':['Rappicard', 'Rappipay'],
    'Lulo Bank': ['LuloBank', 'Lulo Banck'],
    'Banco union': ['QNT GIROS&FINANZAS']

}

# ==============================
# 3. Alias manuales sobre texto LIMPIO
# ==============================
alias_manuales = {
    'bfalabella': 'Banco Falabella',
    'contactosol': 'Banco Falabella',
    'qnt itau': 'Ita√∫',
    'itau helm': 'Ita√∫',
    'itau corpbanca': 'Ita√∫',
    'viva tuya': 'Tuya',
    'carulla': 'Tuya',
    'sufi': 'Bancolombia',
    'adamantine scotiabank': 'Scotiabank Colpatria',
    'gc andino colpatria': 'Scotiabank Colpatria',
    'beta bbva': 'BBVA Colombia',
    'banco popular': 'Banco Popular',
    'qnt bogota': 'Banco de Bogot√°',
    'banco caja social': 'Banco Caja Social',
    'banco av villas': 'Banco AV Villas',
    'banco davivienda': 'Banco Davivienda',
    'bancofalab citisumma': 'Banco Falabella',
    'exito': 'Tuya',
    '√©xito': 'Tuya',
    'Alkosto': 'Tuya',
    'Corbeta': 'Tuya',
    'Rappipay': 'Rappi',
    'Rappicard': 'Rappi',
    'Lulo Banck': 'Lulo Bank',
    'LuloBank': 'Lulo Bank',
    'QNT GIROS&FINANZAS': 'Banco Union',
    'Codensa': 'Scotiabank Colpatria'


}

# ==============================
# 4. Funci√≥n de limpieza
# ==============================
def limpiar_texto(texto):
    """
    Limpia y normaliza el texto: min√∫sculas, elimina caracteres no alfab√©ticos
    (excepto acentos y √±/√º), elimina palabras irrelevantes y quita espacios dobles.
    """
    texto = str(texto).lower()

    # Dejar solo letras, acentos, √±, √º y espacios
    texto = re.sub(r'[^a-z√°√©√≠√≥√∫√±√º\s]', ' ', texto)

    # Eliminar palabras "de relleno" frecuentes en reparadoras / BPO
    texto = re.sub(
        r'\b('
        r'grupo|juridico|jur√≠dico|sas|sa|s a|ltda|suma|financiera|'
        r'contactosol|contacto|solucion|soluciones|citisumma|'
        r'cobrando|cobranzas|adcore|logros|factoring|origen|origem|'
        r'gestiones|gestion|profesionales|bpo|inversionistas|'
        r'estrategicos|estrat√©gicos|casa|de|cobro|servicios|'
        r'creditos|credito|abogados|asociados|'
        r'outsourcing|risk|patrimonio|autonomo|aut√≥nomo|central|'
        r'inversiones|valora|punto|com|puntocom|activos|'
        r'recuperacion|recuperaci√≥n|financiera|financiero|'
        r'asesores|asociados|gest|prof|eyc|gca|summa'
        r')\b',
        '',
        texto
    )

    # Espacios m√∫ltiples -> uno solo
    texto = re.sub(r'\s+', ' ', texto).strip()
    return texto

# ==============================
# 5. Claves de referencia para fuzzy
# ==============================
claves_referencia = {
    (n.lower() if n in bancos_excepciones else re.sub(r'^banco\s*', '', n.lower())): n
    for n in nombres_correctos
}

# ==============================
# 6. Mapeo por patr√≥n (texto original)
# ==============================
def mapear_por_patron(nombre_incorrecto):
    """
    Intenta identificar el banco a partir del texto ORIGINAL (sin limpiar),
    buscando los patrones definidos en PATRONES_POR_BANCO.
    """
    if pd.isna(nombre_incorrecto):
        return None

    texto = str(nombre_incorrecto).lower()

    for banco_estandar, patrones in PATRONES_POR_BANCO.items():
        for patron in patrones:
            if patron in texto:
                return banco_estandar

    return None

# ==============================
# 7. Funci√≥n principal de correcci√≥n
# ==============================
def corregir_nombre(nombre_incorrecto):
    """
    Corrige un nombre de banco usando 3 capas:
    1. Mapeo por patr√≥n sobre el texto original.
    2. Limpieza de texto + alias manuales.
    3. Fuzzy matching contra claves_referencia.
    """
    # Si es NaN/None, lo dejamos igual
    if pd.isna(nombre_incorrecto):
        return nombre_incorrecto

    # 1. Intentar primero con patrones evidentes en el texto original
    banco_patron = mapear_por_patron(nombre_incorrecto)
    if banco_patron is not None:
        return banco_patron

    # 2. Limpiar texto
    limpio = limpiar_texto(nombre_incorrecto)

    # Si despu√©s de limpiar no queda nada, devolvemos el original
    if limpio == '':
        return nombre_incorrecto

    # 3. Alias manuales sobre texto limpio
    for alias, banco_estandar in alias_manuales.items():
        if alias in limpio:
            return banco_estandar

    # 4. Fuzzy matching con RapidFuzz
    mejor_match, score, _ = process.extractOne(
        limpio,
        claves_referencia.keys(),
        scorer=fuzz.token_set_ratio
    )

    # Si la similitud es alta, usamos el banco est√°ndar; si no, dejamos el texto original
    return claves_referencia[mejor_match] if score > 70 else nombre_incorrecto

# ==============================
# 8. Aplicar al DataFrame
# ==============================

# Crear nueva columna estandarizada en reparadoras_df
df_liq['BANCOS_ESTANDAR'] = df_liq['Banco'].apply(corregir_nombre)

In [41]:
import pandas as pd
import numpy as np

# ‚úÖ ZoneInfo compatible con Colab y GitHub (Py < 3.9)
try:
    from zoneinfo import ZoneInfo
except ImportError:  # pragma: no cover
    from backports.zoneinfo import ZoneInfo  # type: ignore


def _to_float_money(x):
    if pd.isna(x):
        return np.nan
    s = str(x).strip()
    if s == "":
        return np.nan

    s = s.replace("$", "").replace("COP", "").replace("cop", "").replace(" ", "")

    if "," in s and "." in s:
        if s.rfind(",") > s.rfind("."):
            s = s.replace(".", "")
            s = s.replace(",", ".")
        else:
            s = s.replace(",", "")
    else:
        if "," in s:
            if s.count(",") > 1:
                s = s.replace(",", "")
            else:
                tail = s.split(",")[-1]
                s = s.replace(",", ".") if len(tail) in (1, 2) else s.replace(",", "")
        if "." in s:
            if s.count(".") > 1:
                s = s.replace(".", "")
            else:
                tail = s.split(".")[-1]
                if len(tail) not in (1, 2):
                    s = s.replace(".", "")

    try:
        return float(s)
    except Exception:
        return np.nan


def _to_bogota_datetime_ddmmyyyy(series, tz="America/Bogota"):
    tzinfo = ZoneInfo(tz)
    dt = pd.to_datetime(series, errors="coerce", dayfirst=True)
    if getattr(dt.dt, "tz", None) is None:
        dt = dt.dt.tz_localize(tzinfo, nonexistent="shift_forward", ambiguous="NaT")
    else:
        dt = dt.dt.tz_convert(tzinfo)
    return dt


def _modo(s: pd.Series):
    s = s.dropna().astype(str)
    if s.empty:
        return np.nan
    return s.value_counts().index[0]


def agregar_liquidaciones_al_timeline_con_fallback(
    df_timeline: pd.DataFrame,
    df_liq: pd.DataFrame,
) -> pd.DataFrame:
    df_tl = df_timeline.copy()
    df_l = df_liq.copy()

    # asegurar columnas nuevas
    for c in ["Negociador liquidacion", "Por?"]:
        if c not in df_tl.columns:
            df_tl[c] = np.nan

    # normalizar llaves
    df_l["Id deuda"] = pd.to_numeric(df_l["Deuda Berex"], errors="coerce").astype("Int64")
    df_l["Referencia"] = pd.to_numeric(df_l["Referencia"], errors="coerce").astype("Int64")

    # fecha dd/mm/yyyy -> datetime Bogot√°
    df_l["inserted_at"] = _to_bogota_datetime_ddmmyyyy(df_l["Fecha de Liquidaci√≥n"])

    # pago a banco float
    df_l["payment_to_bank"] = df_l["Pago a banco"].apply(_to_float_money).astype(float)

    # mapas desde timeline para fallback
    base_by_debt = (
        df_tl[
            ["Id deuda", "Referencia", "BANCOS_ESTANDAR", "Descuento", "D_BRAVO", "Tipo de Liquidacion"]
        ]
        .dropna(subset=["Id deuda"])
        .drop_duplicates(subset=["Id deuda"])
        .set_index("Id deuda")
    )

    map_ref_by_debt = base_by_debt["Referencia"].to_dict()
    map_banco_by_debt = base_by_debt["BANCOS_ESTANDAR"].to_dict()
    map_desc_by_debt = base_by_debt["Descuento"].to_dict()
    map_deuda_by_debt = base_by_debt["D_BRAVO"].to_dict()
    map_tipol_by_debt = base_by_debt["Tipo de Liquidacion"].to_dict()

    # mapa negociador por referencia
    map_neg_ref = (
        df_tl[["Referencia", "Negociador"]]
        .dropna(subset=["Referencia", "Negociador"])
        .groupby("Referencia")["Negociador"]
        .apply(_modo)
        .to_dict()
    )

    banco_liq = df_l["BANCO_ESTANDAR"] if "BANCO_ESTANDAR" in df_l.columns else pd.Series(np.nan, index=df_l.index)
    deuda_res_liq = df_l["D_BRAVO"] if "D_BRAVO" in df_l.columns else pd.Series(np.nan, index=df_l.index)

    if "descuento" in df_l.columns:
        desc_liq = pd.to_numeric(df_l["descuento"], errors="coerce")
    else:
        desc_liq = pd.Series(np.nan, index=df_l.index)

    tipo_liq_liq = df_l["Tipo de liquidacion"].astype(object)

    filas_liq = pd.DataFrame({
        "Referencia": df_l["Referencia"],
        "Id deuda": df_l["Id deuda"],
        "Negociador": np.nan,
        "BANCOS_ESTANDAR": banco_liq.astype(object),
        "Descuento": desc_liq.astype(float),
        "D_BRAVO": deuda_res_liq,
        "Tipo de Liquidacion": tipo_liq_liq,
        "inserted_at": df_l["inserted_at"],
        "end": np.nan,
        "payment_to_bank": df_l["payment_to_bank"],
        "CATEGORIA_PRED": "Liquidado",
        "observations": np.nan,
        "tipo_fila": "Liquidaci√≥n",
        "Negociador liquidacion": df_l["Negociador"].astype(str),
        "Por?": df_l["Tipo de liquidacion"].astype(str),
    })

    filas_liq = (
        filas_liq.dropna(subset=["Id deuda"])
        .sort_values(["Id deuda", "inserted_at"])
        .groupby("Id deuda", as_index=False)
        .tail(1)
    )

    filas_liq["Referencia"] = filas_liq["Referencia"].fillna(filas_liq["Id deuda"].map(map_ref_by_debt))
    filas_liq["Negociador"] = filas_liq["Referencia"].map(map_neg_ref)

    filas_liq["BANCOS_ESTANDAR"] = filas_liq["BANCOS_ESTANDAR"].fillna(filas_liq["Id deuda"].map(map_banco_by_debt))
    filas_liq["Descuento"] = filas_liq["Descuento"].fillna(filas_liq["Id deuda"].map(map_desc_by_debt))

    filas_liq["D_BRAVO"] = pd.to_numeric(filas_liq["D_BRAVO"], errors="coerce")
    filas_liq["D_BRAVO"] = filas_liq["D_BRAVO"].fillna(filas_liq["Id deuda"].map(map_deuda_by_debt))

    filas_liq["Tipo de Liquidacion"] = filas_liq["Tipo de Liquidacion"].replace("", np.nan)
    filas_liq["Tipo de Liquidacion"] = filas_liq["Tipo de Liquidacion"].fillna(
        filas_liq["Id deuda"].map(map_tipol_by_debt)
    )

    filas_liq["Referencia"] = filas_liq["Referencia"].astype("Int64")
    filas_liq["Id deuda"] = filas_liq["Id deuda"].astype("Int64")
    filas_liq["Descuento"] = pd.to_numeric(filas_liq["Descuento"], errors="coerce").astype(float)
    filas_liq["D_BRAVO"] = pd.to_numeric(filas_liq["D_BRAVO"], errors="coerce").astype(float)
    filas_liq["payment_to_bank"] = pd.to_numeric(filas_liq["payment_to_bank"], errors="coerce").astype(float)
    filas_liq["inserted_at"] = _to_bogota_datetime_ddmmyyyy(filas_liq["inserted_at"])

    for c in df_tl.columns:
        if c not in filas_liq.columns:
            filas_liq[c] = np.nan
    for c in filas_liq.columns:
        if c not in df_tl.columns:
            df_tl[c] = np.nan

    filas_liq = filas_liq[df_tl.columns]

    out = (
        pd.concat([df_tl, filas_liq], ignore_index=True)
        .sort_values(["Id deuda", "inserted_at"], na_position="first")
        .reset_index(drop=True)
    )

    return out


# ===== USO =====
df_timeline_final = agregar_liquidaciones_al_timeline_con_fallback(df_timeline, df_liq)
df_timeline_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41559 entries, 0 to 41558
Data columns (total 21 columns):
 #   Column                  Non-Null Count  Dtype                         
---  ------                  --------------  -----                         
 0   Referencia              41559 non-null  Int64                         
 1   Id deuda                41559 non-null  Int64                         
 2   Negociador              41548 non-null  object                        
 3   BANCOS_ESTANDAR         41541 non-null  object                        
 4   Descuento               36590 non-null  float64                       
 5   D_BRAVO                 41541 non-null  float64                       
 6   Estructurable           41541 non-null  float64                       
 7   Potencial               36452 non-null  float64                       
 8   Meses en el Programa    41541 non-null  float64                       
 9   Tipo de Liquidacion     41559 non-null  object    

  filas_liq["BANCOS_ESTANDAR"] = filas_liq["BANCOS_ESTANDAR"].fillna(filas_liq["Id deuda"].map(map_banco_by_debt))


In [42]:
# -*- coding: utf-8 -*-
import os
import json
import pandas as pd
import gspread
from google.oauth2.service_account import Credentials

# =========================
# 1) Leer MI_JSON seg√∫n entorno
# =========================
def get_mi_json():
    try:
        # --- Colab ---
        from google.colab import userdata
        mi_json = userdata.get("MI_JSON")
        if not mi_json:
            raise RuntimeError("MI_JSON no encontrado en Colab userdata.")
        print("Entorno detectado: Google Colab")
        return mi_json
    except Exception:
        # --- GitHub Actions / local ---
        mi_json = os.environ.get("MI_JSON")
        if not mi_json:
            raise RuntimeError("MI_JSON no encontrado como variable de entorno (GitHub/local).")
        if os.environ.get("GITHUB_ACTIONS") == "true":
            print("Entorno detectado: GitHub Actions")
        else:
            print("Entorno detectado: Local")
        return mi_json

mi_json = get_mi_json()
info = json.loads(mi_json)

SCOPES = [
    "https://www.googleapis.com/auth/spreadsheets.readonly",
    "https://www.googleapis.com/auth/drive.readonly",
]
creds = Credentials.from_service_account_info(info, scopes=SCOPES)
gc = gspread.authorize(creds)

# =========================
# 2) Abrir spreadsheet
# =========================
SPREADSHEET_ID = "1sMGQCCDiEzZI3f2w5BwFO8iWtxY5AEDJC2dypZ0NrCY"
sh = gc.open_by_key(SPREADSHEET_ID)

# =========================
# 3) Leer df_priorizar
# =========================
SHEET_PRIORIZAR = "Priorizar"
ws_prio = sh.worksheet(SHEET_PRIORIZAR)
values_prio = ws_prio.get_all_values()

if not values_prio or len(values_prio) < 2:
    df_priorizar = pd.DataFrame()
else:
    df_priorizar = pd.DataFrame(values_prio[1:], columns=values_prio[0])

print("‚úÖ df_priorizar le√≠da correctamente")
print("Shape:", df_priorizar.shape)
print(df_priorizar.head())

# =========================
# 4) Leer df_asignar
# =========================
SHEET_ASIGNAR = "Asignar"
ws_asig = sh.worksheet(SHEET_ASIGNAR)
values_asig = ws_asig.get_all_values()

if not values_asig or len(values_asig) < 2:
    df_asignar = pd.DataFrame()
else:
    df_asignar = pd.DataFrame(values_asig[1:], columns=values_asig[0])

print("‚úÖ df_asignar le√≠da correctamente")
print("Shape:", df_asignar.shape)
print(df_asignar.head())

Entorno detectado: Google Colab
‚úÖ df_priorizar le√≠da correctamente
Shape: (1887, 35)
  Referencia    Cedula          Nombre del cliente Id deuda  \
0    3276111   3276111         FRAZIER MELO PRIETO  1903825   
1    3276111   3276111         FRAZIER MELO PRIETO  1903823   
2    5829693   5829693   Rudi Andres lasso Giraldo  1862219   
3    5829693   5829693   Rudi Andres lasso Giraldo  1862218   
4   10932694  10932694  ARMANDO ILICK HUMANEZ USTA  1922148   

                       correo                 Banco Deuda Resuelve        DBT  \
0     fraziermelo@hotmail.com             Falabella        5656200  112184200   
1     fraziermelo@hotmail.com                 √âxito        1678950  112184200   
2  rudyandreslasso@hotmail.es             Av Villas        4468142   49700682   
3  rudyandreslasso@hotmail.es  Scotiabank Colpatria        4361500   49700682   
4     armandousta@hotmail.com       Banco AV Villas        3383503   73687327   

  tipo_cliente Comisi√≥n Mensual  ... Ultima 

In [43]:
import pandas as pd
import numpy as np

# 1) Normalizar Id deuda a num√©rico en las 3 bases (sin romper)
tl_ids = pd.to_numeric(df_timeline_final["Id deuda"], errors="coerce").astype("Int64")

prio_ids = pd.to_numeric(df_priorizar["Id deuda"], errors="coerce").astype("Int64")
asig_ids = pd.to_numeric(df_asignar["Id deuda"], errors="coerce").astype("Int64")

# 2) Sets (quitamos NA)
set_prio = set(prio_ids.dropna().astype("int64"))
set_asig = set(asig_ids.dropna().astype("int64"))

# 3) Crear columnas 1/0
df_timeline_final = df_timeline_final.copy()

df_timeline_final["prioridad_tradicional"] = (
    tl_ids.dropna().astype("int64").isin(set_prio)
    .reindex(df_timeline_final.index, fill_value=False)
    .astype(int)
)

df_timeline_final["prioridad_asignada"] = (
    tl_ids.dropna().astype("int64").isin(set_asig)
    .reindex(df_timeline_final.index, fill_value=False)
    .astype(int)
)

# (Opcional) chequeo r√°pido
print("IDs en df_priorizar:", len(set_prio))
print("IDs en df_asignar:", len(set_asig))
print(df_timeline_final[["Id deuda", "prioridad_tradicional", "prioridad_asignada"]].head(10))

IDs en df_priorizar: 1887
IDs en df_asignar: 1152
   Id deuda  prioridad_tradicional  prioridad_asignada
0    111266                      0                   0
1    225561                      0                   0
2    225562                      0                   0
3    225563                      0                   0
4    404243                      0                   0
5    427629                      0                   0
6    434218                      0                   0
7    434219                      0                   0
8    434220                      0                   0
9    445585                      0                   0


In [44]:
import numpy as np

df_timeline = df_timeline_final.copy()

# -------------------------
# 1) Pago banco esperado
# -------------------------
df_timeline["Pago_banco_esperado"] = (
    df_timeline["D_BRAVO"]
    - (df_timeline["D_BRAVO"] * df_timeline["Descuento"])
)

# -------------------------
# 2) Ingreso esperado (CE = 0.15)
# -------------------------
CE = 0.15

df_timeline["Ingreso_esperado"] = np.maximum(
    0,
    (df_timeline["D_BRAVO"] - df_timeline["Pago_banco_esperado"]) * 1.19 * CE
)

# -------------------------
# 3) Mover columna al lado de D_BRAVO
# -------------------------
cols = df_timeline.columns.tolist()
idx = cols.index("D_BRAVO")

# quitamos y reinsertamos
cols.remove("Ingreso_esperado")
cols.insert(idx + 1, "Ingreso_esperado")

df_timeline = df_timeline[cols]

df_timeline[[
    "D_BRAVO",
    "Descuento",
    "Pago_banco_esperado",
    "Ingreso_esperado"
]].head()

Unnamed: 0,D_BRAVO,Descuento,Pago_banco_esperado,Ingreso_esperado
0,9256000.0,0.7126,2660174.4,1177355.0
1,2138165.0,0.5,1069082.5,190831.2
2,3299116.0,0.56,1451611.04,329779.6
3,5959400.0,0.6715,1957662.9,714310.1
4,1921650.0,0.48,999258.0,164647.0


In [45]:
df_timeline

Unnamed: 0,Referencia,Id deuda,Negociador,BANCOS_ESTANDAR,Descuento,D_BRAVO,Ingreso_esperado,Estructurable,Potencial,Meses en el Programa,...,end,payment_to_bank,CATEGORIA_PRED,observations,tipo_fila,Negociador liquidacion,Por?,prioridad_tradicional,prioridad_asignada,Pago_banco_esperado
0,3012650,111266,Negodito,Scotiabank Colpatria,0.7126,9256000.0,1.177355e+06,1.0,5.0,116.0,...,,,,,,,,0,0,2.660174e+06
1,3122962933,225561,William Santiago Abril Esguerra,Banco de Occidente,0.5000,2138165.0,1.908312e+05,1.0,6.0,80.0,...,,,,,,,,0,0,1.069082e+06
2,3122962933,225562,William Santiago Abril Esguerra,Banco de Occidente,0.5600,3299116.0,3.297796e+05,1.0,8.0,80.0,...,,,,,,,,0,0,1.451611e+06
3,3122962933,225563,William Santiago Abril Esguerra,Tuya,0.6715,5959400.0,7.143101e+05,0.0,13.0,80.0,...,,,,,,,,0,0,1.957663e+06
4,3196892494,404243,Dayana Isabel Ojito Ortiz,Banco Popular,0.4800,1921650.0,1.646470e+05,1.0,40.0,75.0,...,,,,,,,,0,0,9.992580e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41554,3225833196,2091518,Gabriela Saavedra Latorre,Banco Davivienda,0.3637,13695024.0,8.890871e+05,1.0,5.0,2.0,...,,,,,,,,0,0,8.714144e+06
41555,3225833196,2091519,Gabriela Saavedra Latorre,Scotiabank Colpatria,0.5000,50463309.0,4.503850e+06,0.0,15.0,2.0,...,,,,,,,,0,0,2.523165e+07
41556,3225833196,2091520,Gabriela Saavedra Latorre,Bancolombia,0.4700,82022139.0,6.881247e+06,0.0,24.0,2.0,...,,,,,,,,0,0,4.347173e+07
41557,3105507630,2091523,Julian Andres Ossa Bohorquez,Bancolombia,0.4700,130935011.0,1.098479e+07,0.0,30.0,2.0,...,,,,,,,,0,0,6.939556e+07


In [46]:
import pandas as pd
import numpy as np
import re
import unicodedata

df_liq_aux = df_liq.copy()

def _norm_col(x: str) -> str:
    # normaliza: lower, quita tildes, colapsa espacios
    x = str(x).replace("\u00a0", " ")
    x = re.sub(r"\s+", " ", x).strip().lower()
    x = "".join(ch for ch in unicodedata.normalize("NFD", x) if unicodedata.category(ch) != "Mn")
    return x

# --- 1) Encontrar el nombre REAL de columnas (robusto)
cols_norm = {c: _norm_col(c) for c in df_liq_aux.columns}

# deuda (lo que t√∫ quieres para D_BRAVO) = "Deuda Resuelve"
col_deuda = next((c for c, cn in cols_norm.items() if cn in ("d_bravo", "deuda resuelve", "deuda_resuelve")), None)

# banco
col_banco = next((c for c, cn in cols_norm.items() if cn in ("bancos_estandar", "banco estandar", "banco_estandar")), None)

# id berex
col_berex = next((c for c, cn in cols_norm.items() if cn in ("deuda berex", "deuda_berex", "id deuda berex", "id_deuda_berex")), None)

print("Columna deuda detectada:", col_deuda)
print("Columna banco detectada:", col_banco)
print("Columna berex detectada:", col_berex)

if col_deuda is None:
    raise KeyError("No encontr√© la columna de deuda (esperaba 'Deuda Resuelve' o 'D_BRAVO').")
if col_banco is None:
    raise KeyError("No encontr√© la columna 'BANCOS_ESTANDAR' (ni variantes).")
if col_berex is None:
    raise KeyError("No encontr√© la columna 'Deuda Berex' (ni variantes).")

# --- 2) Construir Id deuda desde Deuda Berex
df_liq_aux["Id deuda"] = pd.to_numeric(df_liq_aux[col_berex], errors="coerce").astype("Int64")

# --- 3) Parse D_BRAVO a float (robusto)
s = df_liq_aux[col_deuda].astype(str).str.strip()

# limpiar s√≠mbolos y separar miles/decimales t√≠pico de COP
s = s.str.replace(r"[^\d,\.]", "", regex=True)

# caso t√≠pico: "45.488.000" -> quitar puntos miles
# y "45,488,000" -> quitar comas miles
# y si viene con decimal, lo intentamos preservar
# estrategia: si tiene ambos, asumimos decimal el √∫ltimo separador
def _parse_money_str(x):
    if x in ("", "nan", "None"):
        return np.nan
    if "," in x and "." in x:
        # el separador que aparezca m√°s a la derecha es decimal
        if x.rfind(",") > x.rfind("."):
            x = x.replace(".", "")
            x = x.replace(",", ".")
        else:
            x = x.replace(",", "")
    else:
        # si solo tiene comas: puede ser miles o decimal
        if "," in x:
            parts = x.split(",")
            # si √∫ltimo bloque tiene 1-2 d√≠gitos, decimal
            if len(parts[-1]) in (1,2):
                x = x.replace(".", "")
                x = x.replace(",", ".")
            else:
                x = x.replace(",", "")
        # si solo tiene puntos: miles o decimal
        if "." in x:
            parts = x.split(".")
            if len(parts[-1]) not in (1,2):  # no parece decimal
                x = x.replace(".", "")
    try:
        return float(x)
    except:
        return np.nan

df_liq_aux["Deuda_Resuelve_num"] = s.map(_parse_money_str)

# --- 4) Mapas por Id deuda
map_banco_liq = (
    df_liq_aux.dropna(subset=["Id deuda", col_banco])
             .drop_duplicates(subset=["Id deuda"])
             .set_index("Id deuda")[col_banco]
)

map_deuda_liq = (
    df_liq_aux.dropna(subset=["Id deuda", "Deuda_Resuelve_num"])
             .drop_duplicates(subset=["Id deuda"])
             .set_index("Id deuda")["Deuda_Resuelve_num"]
)

# --- 5) Rellenar SOLO filas Liquidaci√≥n y SOLO si est√° nulo
df = df_timeline.copy()
mask_liq = df["tipo_fila"].astype("string").eq("Liquidaci√≥n")

df.loc[mask_liq, "BANCOS_ESTANDAR"] = (
    df.loc[mask_liq, "BANCOS_ESTANDAR"]
      .fillna(df.loc[mask_liq, "Id deuda"].map(map_banco_liq))
)

df.loc[mask_liq, "D_BRAVO"] = (
    df.loc[mask_liq, "D_BRAVO"]
      .fillna(df.loc[mask_liq, "Id deuda"].map(map_deuda_liq))
)

df["D_BRAVO"] = pd.to_numeric(df["D_BRAVO"], errors="coerce").astype(float)

df_timeline_final = df

# --- 6) Chequeo
m = df_timeline_final["tipo_fila"].astype("string").eq("Liquidaci√≥n")
print("Liquidaci√≥n sin BANCOS_ESTANDAR:", df_timeline_final.loc[m, "BANCOS_ESTANDAR"].isna().sum())
print("Liquidaci√≥n sin D_BRAVO:", df_timeline_final.loc[m, "D_BRAVO"].isna().sum())

Columna deuda detectada: Deuda Resuelve 
Columna banco detectada: BANCOS_ESTANDAR
Columna berex detectada: Deuda Berex
Liquidaci√≥n sin BANCOS_ESTANDAR: 0
Liquidaci√≥n sin D_BRAVO: 0


In [47]:
df_timeline_final

Unnamed: 0,Referencia,Id deuda,Negociador,BANCOS_ESTANDAR,Descuento,D_BRAVO,Ingreso_esperado,Estructurable,Potencial,Meses en el Programa,...,end,payment_to_bank,CATEGORIA_PRED,observations,tipo_fila,Negociador liquidacion,Por?,prioridad_tradicional,prioridad_asignada,Pago_banco_esperado
0,3012650,111266,Negodito,Scotiabank Colpatria,0.7126,9256000.0,1.177355e+06,1.0,5.0,116.0,...,,,,,,,,0,0,2.660174e+06
1,3122962933,225561,William Santiago Abril Esguerra,Banco de Occidente,0.5000,2138165.0,1.908312e+05,1.0,6.0,80.0,...,,,,,,,,0,0,1.069082e+06
2,3122962933,225562,William Santiago Abril Esguerra,Banco de Occidente,0.5600,3299116.0,3.297796e+05,1.0,8.0,80.0,...,,,,,,,,0,0,1.451611e+06
3,3122962933,225563,William Santiago Abril Esguerra,Tuya,0.6715,5959400.0,7.143101e+05,0.0,13.0,80.0,...,,,,,,,,0,0,1.957663e+06
4,3196892494,404243,Dayana Isabel Ojito Ortiz,Banco Popular,0.4800,1921650.0,1.646470e+05,1.0,40.0,75.0,...,,,,,,,,0,0,9.992580e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41554,3225833196,2091518,Gabriela Saavedra Latorre,Banco Davivienda,0.3637,13695024.0,8.890871e+05,1.0,5.0,2.0,...,,,,,,,,0,0,8.714144e+06
41555,3225833196,2091519,Gabriela Saavedra Latorre,Scotiabank Colpatria,0.5000,50463309.0,4.503850e+06,0.0,15.0,2.0,...,,,,,,,,0,0,2.523165e+07
41556,3225833196,2091520,Gabriela Saavedra Latorre,Bancolombia,0.4700,82022139.0,6.881247e+06,0.0,24.0,2.0,...,,,,,,,,0,0,4.347173e+07
41557,3105507630,2091523,Julian Andres Ossa Bohorquez,Bancolombia,0.4700,130935011.0,1.098479e+07,0.0,30.0,2.0,...,,,,,,,,0,0,6.939556e+07


In [48]:
# =========================================================
# BASE FUNNEL COMPLETA (1 fila por Id deuda) desde df_timeline_final
# Compatible con Google Colab y GitHub / ejecuci√≥n local
# =========================================================

import pandas as pd
import numpy as np

# -----------------------------
# 0) Copia segura
# -----------------------------
df = df_timeline_final.copy()

# -----------------------------
# 1) Tipos / limpieza base
# -----------------------------
df["Id deuda"] = pd.to_numeric(df.get("Id deuda"), errors="coerce").astype("Int64")
df["Referencia"] = pd.to_numeric(df.get("Referencia"), errors="coerce").astype("Int64")

df["inserted_at"] = pd.to_datetime(df.get("inserted_at"), errors="coerce")

df["D_BRAVO"] = pd.to_numeric(df.get("D_BRAVO"), errors="coerce")
df["Ingreso_esperado"] = pd.to_numeric(df.get("Ingreso_esperado"), errors="coerce")
df["payment_to_bank"] = pd.to_numeric(df.get("payment_to_bank"), errors="coerce")

if "CATEGORIA_PRED" not in df.columns:
    raise ValueError("df_timeline_final no tiene la columna 'CATEGORIA_PRED'.")

df["_cat_all_norm"] = (
    df["CATEGORIA_PRED"]
    .astype("string")
    .str.strip()
    .str.upper()
)

# -----------------------------
# 2) Hist√≥rico liquidado
# -----------------------------
liq_hist = (
    df.groupby("Id deuda")["_cat_all_norm"]
      .apply(lambda s: (s == "LIQUIDADO").any())
)

# -----------------------------
# 3) √öltimo registro por deuda
# -----------------------------
df = df.sort_values(["Id deuda", "inserted_at"], na_position="last")
df_ult = df.groupby("Id deuda", as_index=False).tail(1).copy()

df_ult = df_ult.rename(columns={
    "inserted_at": "inserted_at_ultima",
    "observations": "observations_ultima",
    "CATEGORIA_PRED": "CATEGORIA_PRED_ultima",
    "payment_to_bank": "payment_to_bank_ultima",
    "end": "end_ultima",
})

# -----------------------------
# 4) Flags observaci√≥n / mes actual (Bogot√°)
# -----------------------------
obs = (
    df_ult.get("observations_ultima", pd.Series(pd.NA, index=df_ult.index))
    .astype("string")
    .str.strip()
)
df_ult["tiene_obs"] = obs.notna() & (obs != "")

hoy = pd.Timestamp.now(tz="America/Bogota")
inicio_mes = hoy.replace(day=1, hour=0, minute=0, second=0, microsecond=0)

col = "inserted_at_ultima"
df_ult[col] = pd.to_datetime(df_ult[col], errors="coerce")

if df_ult[col].dt.tz is None:
    df_ult[col] = df_ult[col].dt.tz_localize(
        "America/Bogota",
        nonexistent="shift_forward",
        ambiguous="NaT"
    )
else:
    df_ult[col] = df_ult[col].dt.tz_convert("America/Bogota")

df_ult["es_este_mes"] = df_ult[col].notna() & (df_ult[col] >= inicio_mes)

# -----------------------------
# 5) Normalizar categor√≠a √∫ltima
# -----------------------------
df_ult["cat_norm"] = (
    df_ult["CATEGORIA_PRED_ultima"]
    .astype("string")
    .str.strip()
    .str.upper()
)

# -----------------------------
# 6) Liquidado definitivo
# -----------------------------
df_ult["tiene_liquidado_historico"] = df_ult["Id deuda"].map(liq_hist).fillna(False)
cond_f4 = df_ult["tiene_liquidado_historico"]

# -----------------------------
# 7) Fase 3 ‚Äî Avance
# -----------------------------
es_avance_cat = df_ult["cat_norm"].isin(
    ["ACUERDO", "DESCUENTO", "CONTRAPROPUESTA"]
).fillna(False)

deuda = pd.to_numeric(df_ult["D_BRAVO"], errors="coerce")
pab = pd.to_numeric(df_ult["payment_to_bank_ultima"], errors="coerce")
cond_f3 = (es_avance_cat & ((deuda - pab) >= 10000)).fillna(False)

# -----------------------------
# 8) Fase 1 ‚Äî Sin actualizar
# -----------------------------
cond_f1 = ((~df_ult["tiene_obs"]) | (~df_ult["es_este_mes"])).fillna(False)

# -----------------------------
# 9) Fase 2 ‚Äî Actualizado
# -----------------------------
cond_f2 = ((~cond_f1) & (~cond_f3) & (~cond_f4)).fillna(False)

# -----------------------------
# 10) Asignar FASE
# -----------------------------
df_ult["FASE"] = np.select(
    [
        cond_f4.to_numpy(bool),
        cond_f3.to_numpy(bool),
        cond_f2.to_numpy(bool),
        cond_f1.to_numpy(bool),
    ],
    [
        "Fase 4 ‚Äî Liquidado",
        "Fase 3 ‚Äî Avance",
        "Fase 2 ‚Äî Actualizado",
        "Fase 1 ‚Äî Sin actualizar / antes de mes",
    ],
    default="Fase 2 ‚Äî Actualizado"
)

# -----------------------------
# 11) STATUS
# -----------------------------
status_cat = df_ult["cat_norm"].str.title()

df_ult["STATUS"] = np.select(
    [
        cond_f4.to_numpy(bool),
        cond_f3.to_numpy(bool),
        cond_f2.to_numpy(bool),
        (~df_ult["tiene_obs"]).to_numpy(bool),
        (df_ult["tiene_obs"] & ~df_ult["es_este_mes"]).to_numpy(bool),
    ],
    [
        "Liquidado",
        status_cat,
        status_cat,
        "Sin actualizaci√≥n",
        "Actualizado antes",
    ],
    default=status_cat
)

# -----------------------------
# 12) Ingreso funnel
# -----------------------------
df_ult["Ingreso_funnel"] = np.where(
    cond_f4.to_numpy(bool),
    0,
    df_ult["Ingreso_esperado"].fillna(0)
)

# -----------------------------
# 13) Base final
# -----------------------------
cols_final = [
    "Referencia",
    "Id deuda",
    "Negociador",
    "BANCOS_ESTANDAR",
    "Tipo de Liquidacion",
    "prioridad_tradicional",
    "prioridad_asignada",
    "D_BRAVO",
    "Ingreso_esperado",
    "Ingreso_funnel",
    "inserted_at_ultima",
    "end_ultima",
    "CATEGORIA_PRED_ultima",
    "payment_to_bank_ultima",
    "FASE",
    "STATUS",
    "tiene_obs",
    "es_este_mes",
    "tiene_liquidado_historico",
    "Bucket",
]
cols_final = [c for c in cols_final if c in df_ult.columns]

df_base_funnel = df_ult[cols_final].copy()

print("‚úÖ df_base_funnel listo")
print("shape:", df_base_funnel.shape)
print("\nDistribuci√≥n por FASE:")
print(df_base_funnel["FASE"].value_counts(dropna=False))

print("\nLiquidado (Fase 4) - filas:",
      (df_base_funnel["FASE"] == "Fase 4 ‚Äî Liquidado").sum())

# En GitHub / local usamos print; en Colab se ver√° igual
print(df_base_funnel.head(20))

‚úÖ df_base_funnel listo
shape: (41276, 20)

Distribuci√≥n por FASE:
FASE
Fase 1 ‚Äî Sin actualizar / antes de mes    40325
Fase 2 ‚Äî Actualizado                        758
Fase 3 ‚Äî Avance                             175
Fase 4 ‚Äî Liquidado                           18
Name: count, dtype: int64

Liquidado (Fase 4) - filas: 18
    Referencia  Id deuda                       Negociador  \
0      3012650    111266                         Negodito   
1   3122962933    225561  William Santiago Abril Esguerra   
2   3122962933    225562  William Santiago Abril Esguerra   
3   3122962933    225563  William Santiago Abril Esguerra   
4   3196892494    404243        Dayana Isabel Ojito Ortiz   
5   3183092531    427629      Laura Yineth Torres Moncada   
6   3003853673    434218   Cindy Viviana Barrera Buitrago   
7   3003853673    434219   Cindy Viviana Barrera Buitrago   
8   3003853673    434220   Cindy Viviana Barrera Buitrago   
9   3142828344    445585  Vivian Caterin Rodriguez Verano 

In [49]:
cols_eliminar_base = [
    'prioridad_tradicional',
    'prioridad_asignada'
]

df_base_funnel = df_base_funnel.drop(
    columns=[c for c in cols_eliminar_base if c in df_base_funnel.columns],
    errors="ignore"
)

df_timeline_final = df_timeline_final.drop(
    columns=[c for c in cols_eliminar_base if c in df_timeline_final.columns],
    errors="ignore"
)

In [50]:
df_base_funnel

Unnamed: 0,Referencia,Id deuda,Negociador,BANCOS_ESTANDAR,Tipo de Liquidacion,D_BRAVO,Ingreso_esperado,Ingreso_funnel,inserted_at_ultima,end_ultima,CATEGORIA_PRED_ultima,payment_to_bank_ultima,FASE,STATUS,tiene_obs,es_este_mes,tiene_liquidado_historico,Bucket
0,3012650,111266,Negodito,Scotiabank Colpatria,Cr. Parcial,9256000.0,1.177355e+06,1.177355e+06,NaT,,,,Fase 1 ‚Äî Sin actualizar / antes de mes,Sin actualizaci√≥n,False,False,False,
1,3122962933,225561,William Santiago Abril Esguerra,Banco de Occidente,Tradicional,2138165.0,1.908312e+05,1.908312e+05,NaT,,,,Fase 1 ‚Äî Sin actualizar / antes de mes,Sin actualizaci√≥n,False,False,False,10.0
2,3122962933,225562,William Santiago Abril Esguerra,Banco de Occidente,Tradicional,3299116.0,3.297796e+05,3.297796e+05,NaT,,,,Fase 1 ‚Äî Sin actualizar / antes de mes,Sin actualizaci√≥n,False,False,False,10.0
3,3122962933,225563,William Santiago Abril Esguerra,Tuya,Tradicional,5959400.0,7.143101e+05,7.143101e+05,NaT,,,,Fase 1 ‚Äî Sin actualizar / antes de mes,Sin actualizaci√≥n,False,False,False,10.0
4,3196892494,404243,Dayana Isabel Ojito Ortiz,Banco Popular,Cr. Parcial,1921650.0,1.646470e+05,1.646470e+05,NaT,,,,Fase 1 ‚Äî Sin actualizar / antes de mes,Sin actualizaci√≥n,False,False,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41554,3225833196,2091518,Gabriela Saavedra Latorre,Banco Davivienda,Tradicional,13695024.0,8.890871e+05,8.890871e+05,NaT,,,,Fase 1 ‚Äî Sin actualizar / antes de mes,Sin actualizaci√≥n,False,False,False,10.0
41555,3225833196,2091519,Gabriela Saavedra Latorre,Scotiabank Colpatria,Tradicional,50463309.0,4.503850e+06,4.503850e+06,NaT,,,,Fase 1 ‚Äî Sin actualizar / antes de mes,Sin actualizaci√≥n,False,False,False,10.0
41556,3225833196,2091520,Gabriela Saavedra Latorre,Bancolombia,Tradicional,82022139.0,6.881247e+06,6.881247e+06,NaT,,,,Fase 1 ‚Äî Sin actualizar / antes de mes,Sin actualizaci√≥n,False,False,False,10.0
41557,3105507630,2091523,Julian Andres Ossa Bohorquez,Bancolombia,Tradicional,130935011.0,1.098479e+07,1.098479e+07,NaT,,,,Fase 1 ‚Äî Sin actualizar / antes de mes,Sin actualizaci√≥n,False,False,False,10.0


In [51]:
df_timeline_final

Unnamed: 0,Referencia,Id deuda,Negociador,BANCOS_ESTANDAR,Descuento,D_BRAVO,Ingreso_esperado,Estructurable,Potencial,Meses en el Programa,...,Ahorro medio,inserted_at,end,payment_to_bank,CATEGORIA_PRED,observations,tipo_fila,Negociador liquidacion,Por?,Pago_banco_esperado
0,3012650,111266,Negodito,Scotiabank Colpatria,0.7126,9256000.0,1.177355e+06,1.0,5.0,116.0,...,0.0000,NaT,,,,,,,,2.660174e+06
1,3122962933,225561,William Santiago Abril Esguerra,Banco de Occidente,0.5000,2138165.0,1.908312e+05,1.0,6.0,80.0,...,108216.3333,NaT,,,,,,,,1.069082e+06
2,3122962933,225562,William Santiago Abril Esguerra,Banco de Occidente,0.5600,3299116.0,3.297796e+05,1.0,8.0,80.0,...,108216.3333,NaT,,,,,,,,1.451611e+06
3,3122962933,225563,William Santiago Abril Esguerra,Tuya,0.6715,5959400.0,7.143101e+05,0.0,13.0,80.0,...,108216.3333,NaT,,,,,,,,1.957663e+06
4,3196892494,404243,Dayana Isabel Ojito Ortiz,Banco Popular,0.4800,1921650.0,1.646470e+05,1.0,40.0,75.0,...,0.0000,NaT,,,,,,,,9.992580e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41554,3225833196,2091518,Gabriela Saavedra Latorre,Banco Davivienda,0.3637,13695024.0,8.890871e+05,1.0,5.0,2.0,...,626.7500,NaT,,,,,,,,8.714144e+06
41555,3225833196,2091519,Gabriela Saavedra Latorre,Scotiabank Colpatria,0.5000,50463309.0,4.503850e+06,0.0,15.0,2.0,...,626.7500,NaT,,,,,,,,2.523165e+07
41556,3225833196,2091520,Gabriela Saavedra Latorre,Bancolombia,0.4700,82022139.0,6.881247e+06,0.0,24.0,2.0,...,626.7500,NaT,,,,,,,,4.347173e+07
41557,3105507630,2091523,Julian Andres Ossa Bohorquez,Bancolombia,0.4700,130935011.0,1.098479e+07,0.0,30.0,2.0,...,209903.0000,NaT,,,,,,,,6.939556e+07


In [52]:
df_base_funnel.info()

<class 'pandas.core.frame.DataFrame'>
Index: 41276 entries, 0 to 41558
Data columns (total 18 columns):
 #   Column                     Non-Null Count  Dtype                         
---  ------                     --------------  -----                         
 0   Referencia                 41276 non-null  Int64                         
 1   Id deuda                   41276 non-null  Int64                         
 2   Negociador                 41265 non-null  object                        
 3   BANCOS_ESTANDAR            41276 non-null  object                        
 4   Tipo de Liquidacion        41276 non-null  object                        
 5   D_BRAVO                    41276 non-null  float64                       
 6   Ingreso_esperado           36308 non-null  float64                       
 7   Ingreso_funnel             41276 non-null  float64                       
 8   inserted_at_ultima         951 non-null    datetime64[ns, America/Bogota]
 9   end_ultima            

In [53]:
df_timeline_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41559 entries, 0 to 41558
Data columns (total 23 columns):
 #   Column                  Non-Null Count  Dtype                         
---  ------                  --------------  -----                         
 0   Referencia              41559 non-null  Int64                         
 1   Id deuda                41559 non-null  Int64                         
 2   Negociador              41548 non-null  object                        
 3   BANCOS_ESTANDAR         41559 non-null  object                        
 4   Descuento               36590 non-null  float64                       
 5   D_BRAVO                 41559 non-null  float64                       
 6   Ingreso_esperado        36590 non-null  float64                       
 7   Estructurable           41541 non-null  float64                       
 8   Potencial               36452 non-null  float64                       
 9   Meses en el Programa    41541 non-null  float64   

In [54]:
# =========================================================
# SUBIR DATAFRAMES A GOOGLE SHEETS
# Compatible con: Google Colab + GitHub
# Hojas: Timeline y Funnel
# =========================================================

import os
import json
import re
import numpy as np
import pandas as pd

# -----------------------------
# 1) Instalar dependencias
# -----------------------------
try:
    import gspread
    from gspread_dataframe import set_with_dataframe
    from google.oauth2.service_account import Credentials
except ImportError:
    import subprocess, sys
    subprocess.check_call([
        sys.executable, "-m", "pip", "install", "-q",
        "gspread", "gspread-dataframe", "google-auth"
    ])
    import gspread
    from gspread_dataframe import set_with_dataframe
    from google.oauth2.service_account import Credentials


# -----------------------------
# 2) Parse robusto del secret
# -----------------------------
def _robust_json_loads(s: str) -> dict:
    """
    Intenta convertir un string a dict JSON manejando:
    - comillas extra
    - \n escapados vs saltos reales
    - caracteres de control
    """
    if not isinstance(s, str):
        raise TypeError("Se esperaba string para parsear JSON")

    s0 = s.strip()

    # quitar comillas envolventes si vienen
    if (s0.startswith('"') and s0.endswith('"')) or (s0.startswith("'") and s0.endswith("'")):
        s0 = s0[1:-1].strip()

    # 1) intento directo
    try:
        return json.loads(s0)
    except Exception:
        pass

    # 2) reemplazar \\n por saltos reales (t√≠pico en secrets)
    s1 = s0.replace("\\n", "\n")
    try:
        return json.loads(s1)
    except Exception:
        pass

    # 3) al rev√©s: si viene con saltos reales y rompe el JSON,
    # escapamos saltos DENTRO de private_key a \\n
    # (esto suele arreglar el "Invalid control character")
    def fix_private_key(text: str) -> str:
        # captura el valor de private_key incluyendo saltos
        m = re.search(r'"private_key"\s*:\s*"([\s\S]*?)"\s*,\s*"client_email"', text)
        if not m:
            return text
        pk = m.group(1)
        pk_fixed = pk.replace("\n", "\\n")
        return text.replace(pk, pk_fixed)

    s2 = fix_private_key(s0)
    try:
        return json.loads(s2)
    except Exception as e:
        raise ValueError(f"No pude parsear MI_JSON como JSON v√°lido. Error final: {e}")


def load_service_account_info():
    """
    Devuelve dict con credenciales:
    - Colab: secret MI_JSON
    - GitHub: env GOOGLE_SERVICE_ACCOUNT_JSON
    """

    # ---- 1) Colab: MI_JSON
    try:
        from google.colab import userdata  # existe solo en Colab
        mi_json = userdata.get("MI_JSON")
        if mi_json is None:
            raise ValueError("MI_JSON est√° vac√≠o o no existe en Colab Secrets.")

        if isinstance(mi_json, dict):
            return mi_json

        if isinstance(mi_json, str):
            return _robust_json_loads(mi_json)

        raise TypeError(f"MI_JSON tiene tipo inesperado: {type(mi_json)}")

    except Exception as e:
        print("‚ÑπÔ∏è No usando MI_JSON (Colab):", str(e))

    # ---- 2) GitHub: env
    sa = os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
    if not sa:
        raise ValueError(
            "‚ùå No se encontraron credenciales.\n"
            "‚úî En Colab: arregla el secret MI_JSON (debe ser JSON v√°lido)\n"
            "‚úî En GitHub: define GOOGLE_SERVICE_ACCOUNT_JSON"
        )

    return _robust_json_loads(sa)


# -----------------------------
# 3) Subida a Sheets
# -----------------------------
def upload_df_to_sheet(spreadsheet, df: pd.DataFrame, sheet_name: str):
    df_out = df.copy()
    df_out = df_out.replace([np.inf, -np.inf], np.nan)

    for c in df_out.columns:
        if pd.api.types.is_datetime64_any_dtype(df_out[c]):
            df_out[c] = df_out[c].dt.strftime("%Y-%m-%d %H:%M:%S")

    try:
        ws = spreadsheet.worksheet(sheet_name)
    except gspread.WorksheetNotFound:
        ws = spreadsheet.add_worksheet(title=sheet_name, rows="100", cols="20")

    ws.clear()
    ws.resize(rows=max(len(df_out) + 1, 2), cols=max(len(df_out.columns), 1))

    set_with_dataframe(ws, df_out, include_index=False, include_column_header=True, resize=False)


# -----------------------------
# 4) Autenticaci√≥n y ejecuci√≥n
# -----------------------------
SERVICE_ACCOUNT_INFO = load_service_account_info()

SCOPES = [
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive",
]

creds = Credentials.from_service_account_info(SERVICE_ACCOUNT_INFO, scopes=SCOPES)
gc = gspread.authorize(creds)

SPREADSHEET_ID = "1-shiHJuvJXdzen4s1SkZ4WeXl9N-7XzpRp3qN0vl1lE"
spreadsheet = gc.open_by_key(SPREADSHEET_ID)

upload_df_to_sheet(spreadsheet, df_timeline_final, "Timeline")
upload_df_to_sheet(spreadsheet, df_base_funnel, "Funnel")

print("‚úÖ √âxito: Timeline y Funnel cargadas correctamente")

‚úÖ √âxito: Timeline y Funnel cargadas correctamente
