# Exploración inicial de datos de pricing

Notebook preparado para entender la base disponible y definir próximos pasos del proyecto de pricing.

In [28]:
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [29]:
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

pd.options.display.float_format = "{:,.2f}".format

BASE_PATH = Path("base_con_sin_trx_cleaned.csv")
DATA_DIR = Path("data")
RAW_TERMINAL_FILE = DATA_DIR / "terminales_con_sin_transacciones_mensual.csv"
PRICING_FILE = DATA_DIR / "precios_actuales_klap.xlsx"
COMPETITOR_FILE = DATA_DIR / "precios_Competidores.xlsx"
BRAND_COST_FILE = DATA_DIR / "costos_marca_25_1.xlsx"
INTERCHANGE_FILE = DATA_DIR / "Tasa_Intercambio_Chile_Visa_y_Mastercard.csv"

## 1. Carga y validación de la base transaccional mensual

In [30]:
# Cargar dataset principal (terminales con/sin transacciones) limpio

df = pd.read_csv(BASE_PATH, low_memory=False)
print(f"Filas: {len(df):,}")
print(f"Columnas: {len(df.columns)}")
print(df.dtypes.head(10))

df.head()


Filas: 1,398,340
Columnas: 36
periodo                 object
rut_comercio            object
codigo_local             int64
numero_terminal          int64
estado_terminal         object
tecnologia_instalar     object
fecha_instalacion      float64
fecha_baja             float64
pos_con_trx              int64
modelo_equipo           object
dtype: object


Unnamed: 0,periodo,rut_comercio,codigo_local,numero_terminal,estado_terminal,tecnologia_instalar,fecha_instalacion,fecha_baja,pos_con_trx,modelo_equipo,...,monto_vale_electronico,qtrx_ripley,monto_ripley,qtrx_hites,monto_hites,qtrx_adquriencia_general,monto_adquriencia_general,margen_bruto_adquirencia,qtrx_total,monto_clp
0,2024-01,8123393-4,639,631,BAJA_POR_PERDIDA,POS GPRS,20080729.0,,0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2024-01,13453225-4,372,364,HABILITADO,POS GPRS,20080623.0,,1,,...,0.0,0.0,0.0,0.0,0.0,3326.0,14914534.0,0.0,6652.0,29829068.0
2,2024-01,7101190-9,560,552,HABILITADO,POS GPRS,20080708.0,,1,,...,0.0,0.0,0.0,0.0,0.0,502.0,2421660.0,0.0,1004.0,4843320.0
3,2024-01,12665285-2,593,585,BAJA_POR_PERDIDA,POS GPRS,20080717.0,,0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2024-01,6402350-0,711,703,HABILITADO,POS GPRS,20080822.0,,1,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
# Conversión de tipos esenciales


def parse_numeric_date(series):
    numeric = pd.to_numeric(series, errors="coerce")
    result = pd.Series(pd.NaT, index=series.index, dtype="datetime64[ns]")
    valid = numeric.notna()
    if valid.any():
        numeric_int = numeric[valid].round().astype("Int64")
        formatted = numeric_int.astype(str).str.zfill(8)
        parsed = pd.to_datetime(formatted, format="%Y%m%d", errors="coerce")
        result.loc[valid] = parsed.values
    return result


df["periodo"] = pd.to_datetime(
    df["periodo"] + "-01", format="%Y-%m-%d", errors="coerce"
).dt.to_period("M")
df["fecha_instalacion"] = parse_numeric_date(df["fecha_instalacion"])
df["fecha_baja"] = parse_numeric_date(df["fecha_baja"])

value_cols = [c for c in df.columns if c.startswith("qtrx_") or c.startswith("monto_")]
for col in value_cols:
    df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

qtrx_cols = [c for c in df.columns if c.startswith("qtrx_") and c != "qtrx_total"]
monto_cols = [c for c in df.columns if c.startswith("monto_") and c != "monto_clp"]

df["qtrx_total"] = pd.to_numeric(df["qtrx_total"], errors="coerce").fillna(0)
df["monto_clp"] = pd.to_numeric(df["monto_clp"], errors="coerce").fillna(0)
if df["qtrx_total"].eq(0).all():
    df["qtrx_total"] = df[qtrx_cols].sum(axis=1)
if df["monto_clp"].eq(0).all():
    df["monto_clp"] = df[monto_cols].sum(axis=1)

df[["periodo", "fecha_instalacion", "fecha_baja", "qtrx_total", "monto_clp"]].head()


Unnamed: 0,periodo,fecha_instalacion,fecha_baja,qtrx_total,monto_clp
0,2024-01,2008-07-29,NaT,0.0,0.0
1,2024-01,2008-06-23,NaT,6652.0,29829068.0
2,2024-01,2008-07-08,NaT,1004.0,4843320.0
3,2024-01,2008-07-17,NaT,0.0,0.0
4,2024-01,2008-08-22,NaT,0.0,0.0


### 1.1 Estadísticas generales del dataset

In [32]:
summary = {
    "filas": len(df),
    "periodo_min": df["periodo"].min(),
    "periodo_max": df["periodo"].max(),
    "meses_distintos": df["periodo"].nunique(),
    "comercios_distintos": df["rut_comercio"].nunique(),
    "locales_distintos": df["codigo_local"].nunique(),
    "terminales_distintos": df["numero_terminal"].nunique(),
    "total_transacciones": int(df["qtrx_total"].sum()),
    "total_monto_clp": df["monto_clp"].sum(),
    "share_filas_monto_cero": df["monto_clp"].eq(0).mean(),
}
summary


{'filas': 1398340,
 'periodo_min': Period('2024-01', 'M'),
 'periodo_max': Period('2024-12', 'M'),
 'meses_distintos': 12,
 'comercios_distintos': 75282,
 'locales_distintos': 98872,
 'terminales_distintos': 141756,
 'total_transacciones': 1779746258,
 'total_monto_clp': np.float64(17160132763231.0),
 'share_filas_monto_cero': np.float64(0.8397228141939729)}

In [33]:
estado_counts = df["estado_terminal"].value_counts(dropna=False).to_frame("filas")
tecnologia_counts = (
    df["tecnologia_instalar"].value_counts(dropna=False).to_frame("filas")
)
vertical_counts = df["vertical"].value_counts(dropna=False).to_frame("filas")
adquirencia_counts = df["adquirencia"].value_counts(dropna=False).to_frame("filas")

estado_counts.head()


Unnamed: 0_level_0,filas
estado_terminal,Unnamed: 1_level_1
INGRESADO,564926
HABILITADO,531861
BAJA,123795
CON TRANSACCIONES,76321
BAJA_POR_PERDIDA,47673


In [34]:
tecnologia_counts.head()

Unnamed: 0_level_0,filas
tecnologia_instalar,Unnamed: 1_level_1
HOST TO HOST,230769
POS MOVIL,228563
APP,183518
POS ANDROID MOVIL,133562
POS GPRS,130571


In [35]:
vertical_counts.head()

Unnamed: 0_level_0,filas
vertical,Unnamed: 1_level_1
ALIMENTACION,495258
C. BARRIO,491507
RETAIL,212387
SALUD,162055
COMBUSTIBLE,26390


In [36]:
adquirencia_counts

Unnamed: 0_level_0,filas
adquirencia,Unnamed: 1_level_1
No,849900
Si,547543
,897


In [37]:
brand_monto_cols = [
    c for c in df.columns if c.startswith("monto_") and c != "monto_clp"
]
brand_totals = (
    df[brand_monto_cols].sum().sort_values(ascending=False).to_frame("monto_total_clp")
)
brand_totals


Unnamed: 0,monto_total_clp
monto_adquriencia_general,8576206940319.0
monto_visa,6408079155108.0
monto_mastercard,2167070829369.0
monto_casas_comerciales,3860336623.0
monto_hites,3860336623.0
monto_amex,1055165189.0
monto_vale_electronico,0.0
monto_ripley,0.0


### 1.2 Agregación comercio × mes

In [38]:
merchant_month = (
    df.groupby(["periodo", "rut_comercio"], as_index=False)
    .agg(
        {
            "monto_clp": "sum",
            "qtrx_total": "sum",
            "codigo_local": "nunique",
            "numero_terminal": "nunique",
            "estado_terminal": lambda s: s.mode().iat[0]
            if not s.mode().empty
            else None,
            "tecnologia_instalar": lambda s: "|".join(
                sorted(set(filter(None, s.astype(str))))
            ),
        }
    )
    .rename(columns={"codigo_local": "n_locales", "numero_terminal": "n_terminales"})
)
merchant_month["ticket_promedio"] = np.where(
    merchant_month["qtrx_total"] > 0,
    merchant_month["monto_clp"] / merchant_month["qtrx_total"],
    np.nan,
)
merchant_month.head()


Unnamed: 0,periodo,rut_comercio,monto_clp,qtrx_total,n_locales,n_terminales,estado_terminal,tecnologia_instalar,ticket_promedio
0,2024-01,07289970-9,0.0,0.0,1,1,INGRESADO,TAP TO PHONE,
1,2024-01,10000001-6,10207680.0,2296.0,1,1,HABILITADO,POS MOVIL,4445.85
2,2024-01,10000058-K,0.0,0.0,1,1,HABILITADO,APP,
3,2024-01,10000217-5,0.0,0.0,2,2,INGRESADO,APP|WEB,
4,2024-01,10000373-2,0.0,0.0,1,1,INGRESADO,WEB,


In [39]:
agg_summary = {
    "filas": len(merchant_month),
    "meses_activos_share": merchant_month["monto_clp"].gt(0).mean(),
    "ticket_promedio_describe": merchant_month["ticket_promedio"].describe(
        percentiles=[0.25, 0.5, 0.75, 0.9]
    ),
}
agg_summary


{'filas': 754473,
 'meses_activos_share': np.float64(0.20151814577857657),
 'ticket_promedio_describe': count      152,040.00
 mean        16,442.06
 std         75,493.12
 min             50.00
 25%          4,015.60
 50%          6,323.79
 75%         13,501.06
 90%         31,312.34
 max     16,551,758.00
 Name: ticket_promedio, dtype: float64}

In [40]:
region_totals = df.groupby("region_suc")["monto_clp"].sum().sort_values(ascending=False)
region_totals.head(10)


region_suc
METROPOLITANA   13,623,427,230,590.00
VIII REGION        625,641,338,158.00
V REGION           620,620,304,889.00
IX REGION          392,501,944,776.00
VI REGION          385,654,472,809.00
VII REGION         325,265,994,685.00
X REGION           296,794,341,428.00
IV REGION          278,148,513,098.00
II REGION          146,665,476,919.00
III REGION         142,975,399,399.00
Name: monto_clp, dtype: float64

### 1.3 Validaciones y aclaraciones clave
- `monto_adquriencia_general` representa el total procesado de pagos con tarjeta por registro, por lo que se utiliza como referencia del volumen global.
- En la base limpia `monto_clp` coincide con `monto_adquriencia_general`; los campos por marca (`monto_visa`, `monto_mastercard`, etc.) son desgloses del mismo total.
- `estado_terminal` refleja el estado vigente al momento de la extracción y no necesariamente el estado histórico del periodo, por lo que conviene complementarlo con fechas de instalación/baja y métricas de actividad.

In [41]:
# Validación de consistencia entre monto_clp y monto_adquriencia_general
monto_diff = df["monto_clp"] - df["monto_adquriencia_general"]
validacion_montos = pd.DataFrame(
    {
        "suma_absoluta_diferencias": [monto_diff.abs().sum()],
        "maxima_diferencia_absoluta": [monto_diff.abs().max()],
        "porcentaje_coincidencia": [monto_diff.eq(0).mean()],
    }
)
validacion_montos


Unnamed: 0,suma_absoluta_diferencias,maxima_diferencia_absoluta,porcentaje_coincidencia
0,8583925822912.0,180210893820.0,0.84


## 2. Información complementaria de pricing y mercado

In [42]:
pricing_grid = pd.read_excel(PRICING_FILE)
competitor_prices = pd.read_excel(COMPETITOR_FILE)
brand_costs = pd.read_excel(BRAND_COST_FILE)
interchange_caps = pd.read_csv(INTERCHANGE_FILE)

pricing_grid.head()


Unnamed: 0,Segmento,Rango de ventas (MM CLP),Medio,Variable %,Fijo UF,Fijo CLP (aprox)
0,Estándar,0 a 8,Crédito,1.29,0.0,95
1,Estándar,0 a 8,Débito,0.57,0.0,95
2,Estándar,0 a 8,Prepago,0.99,0.0,95
3,PRO,8 a 30,Crédito,1.24,0.0,93
4,PRO,8 a 30,Débito,0.52,0.0,77


In [43]:
competitor_prices.head()

Unnamed: 0,nombre_tarifa,marca,categoria_producto,merchant_discount,merchant_discount_fijo
0,TRANSBANK,MASTERCARD,CREDITO,0.01,0.0
1,TRANSBANK,MASTERCARD,DEBITO,0.01,0.0
2,TRANSBANK,MASTERCARD,PREPAGO,0.01,0.0
3,TRANSBANK,MASTERCARD,INTERNACIONAL,0.03,0.0
4,TRANSBANK,VISA,CREDITO,0.01,0.0


In [44]:
brand_costs.head()

Unnamed: 0,Marca,Mes-año,Total costos de marca %
0,Mastercard,2025-01-01,0.0
1,Mastercard,2025-02-01,0.0
2,Mastercard,2025-03-01,0.0
3,Mastercard,2025-04-01,0.0
4,Mastercard,2025-05-01,0.0


In [45]:
interchange_caps.head()

Unnamed: 0,Marca,Canal,Segmento,Tipo de tarjeta,Subtipo,TI %
0,Mastercard,CNP,Airlines Large,Crédito,Black,1.14
1,Mastercard,CNP,Airlines Large,Crédito,Commercial,1.18
2,Mastercard,CNP,Airlines Large,Crédito,Platinum,1.14
3,Mastercard,CNP,Airlines Large,Crédito,Standard & Gold,1.14
4,Mastercard,CP,Airlines Large,Crédito,Black,1.1


### 2.1 Estadísticas resumidas de costos e interchange

In [46]:
brand_cost_summary = (
    brand_costs.groupby("Marca")["Total costos de marca %"]
    .agg(["mean", "min", "max"])
    .rename(columns={"mean": "promedio", "min": "minimo", "max": "maximo"})
)
brand_cost_summary


Unnamed: 0_level_0,promedio,minimo,maximo
Marca,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mastercard,0.0,0.0,0.0
Visa,0.0,0.0,0.0


In [47]:
cp_interchange = interchange_caps[interchange_caps["Canal"] == "CP"]
interchange_summary = (
    cp_interchange.groupby(["Marca", "Tipo de tarjeta"])["TI %"]
    .median()
    .unstack(level="Tipo de tarjeta")
)
interchange_summary


Tipo de tarjeta,Crédito,Débito,Prepago
Marca,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mastercard,0.85,,
Visa,1.14,0.5,0.94


## 3. Ingeniería de variables para pricing

Construimos una tabla base a nivel comercio con indicadores de volumen, actividad, mezcla de marcas y estimaciones de costo para alimentar escenarios de pricing.

In [48]:
brand_detail_cols = [
    "monto_visa",
    "monto_mastercard",
    "monto_amex",
    "monto_casas_comerciales",
    "monto_vale_electronico",
    "monto_ripley",
    "monto_hites",
    "monto_adquriencia_general",
]
merchant_brand = (
    df.groupby("rut_comercio")[brand_detail_cols]
    .sum()
    .rename(columns={"monto_adquriencia_general": "monto_total_tarjetas"})
    .reset_index()
)
brand_value_cols = [
    c
    for c in merchant_brand.columns
    if c.startswith("monto_") and c != "monto_total_tarjetas"
]
for col in brand_value_cols:
    share_col = col.replace("monto_", "share_")
    merchant_brand[share_col] = np.where(
        merchant_brand["monto_total_tarjetas"] > 0,
        merchant_brand[col] / merchant_brand["monto_total_tarjetas"],
        np.nan,
    )
merchant_brand.head()


Unnamed: 0,rut_comercio,monto_visa,monto_mastercard,monto_amex,monto_casas_comerciales,monto_vale_electronico,monto_ripley,monto_hites,monto_total_tarjetas,share_visa,share_mastercard,share_amex,share_casas_comerciales,share_vale_electronico,share_ripley,share_hites
0,07289970-9,0,0,0,0,0.0,0.0,0.0,0.0,,,,,,,
1,10000001-6,71414624,6616365,0,0,0.0,0.0,0.0,78030989.0,0.92,0.08,0.0,0.0,0.0,0.0,0.0
2,10000058-K,0,0,0,0,0.0,0.0,0.0,0.0,,,,,,,
3,10000217-5,0,0,0,0,0.0,0.0,0.0,0.0,,,,,,,
4,10000372-4,2439691,58420,0,0,0.0,0.0,0.0,2498111.0,0.98,0.02,0.0,0.0,0.0,0.0,0.0


In [49]:
merchant_features = (
    merchant_month.groupby("rut_comercio")
    .agg(
        monto_total_anual=("monto_clp", "sum"),
        qtrx_total_anual=("qtrx_total", "sum"),
        meses_reportados=("periodo", "nunique"),
        meses_con_ventas=("monto_clp", lambda s: int((s > 0).sum())),
        monto_promedio_mensual=("monto_clp", "mean"),
        monto_max_mensual=("monto_clp", "max"),
        qtrx_promedio_mensual=("qtrx_total", "mean"),
        ticket_promedio_mensual=("ticket_promedio", "mean"),
        n_locales_max=("n_locales", "max"),
        n_terminales_max=("n_terminales", "max"),
    )
    .reset_index()
)
merchant_features["share_meses_activos"] = np.where(
    merchant_features["meses_reportados"] > 0,
    merchant_features["meses_con_ventas"] / merchant_features["meses_reportados"],
    np.nan,
)
tech_counts = (
    df.groupby("rut_comercio")["tecnologia_instalar"]
    .nunique()
    .reset_index(name="n_tecnologias_unicas")
)
estado_actual = (
    df.sort_values("periodo")
    .groupby("rut_comercio")["estado_terminal"]
    .last()
    .reset_index(name="estado_terminal_actual")
)
merchant_features = merchant_features.merge(
    tech_counts, on="rut_comercio", how="left"
).merge(estado_actual, on="rut_comercio", how="left")
merchant_features.head()


Unnamed: 0,rut_comercio,monto_total_anual,qtrx_total_anual,meses_reportados,meses_con_ventas,monto_promedio_mensual,monto_max_mensual,qtrx_promedio_mensual,ticket_promedio_mensual,n_locales_max,n_terminales_max,share_meses_activos,n_tecnologias_unicas,estado_terminal_actual
0,07289970-9,0.0,0.0,11,0,0.0,0.0,0.0,,1,1,0.0,1,INGRESADO
1,10000001-6,156061978.0,28802.0,11,11,14187452.55,19680080.0,2618.36,5420.65,1,3,1.0,1,HABILITADO
2,10000058-K,0.0,0.0,12,0,0.0,0.0,0.0,,1,1,0.0,1,HABILITADO
3,10000217-5,0.0,0.0,12,0,0.0,0.0,0.0,,2,2,0.0,2,PROCESO_BAJA
4,10000372-4,4996222.0,1998.0,6,6,832703.67,995680.0,333.0,2515.63,1,1,1.0,1,HABILITADO


In [50]:
brand_cost_promedio = (
    brand_costs.groupby("Marca")["Total costos de marca %"].mean().to_dict()
)
brand_cost_promedio = {k.lower(): v for k, v in brand_cost_promedio.items()}

assumed_mix = {"Crédito": 0.6, "Débito": 0.35, "Prepago": 0.05}
cp_interchange = interchange_caps[interchange_caps["Canal"] == "CP"]
interchange_median_rates = (
    cp_interchange.groupby("Tipo de tarjeta")["TI %"].median().div(100).to_dict()
)
interchange_floor_rate = sum(
    assumed_mix.get(tipo, 0) * interchange_median_rates.get(tipo, 0)
    for tipo in assumed_mix
)

for col in [
    c
    for c in merchant_brand.columns
    if c.startswith("monto_") and c != "monto_total_tarjetas"
]:
    brand_key = col.replace("monto_", "")
    merchant_brand[f"costo_marca_{brand_key}"] = merchant_brand[
        col
    ] * brand_cost_promedio.get(brand_key.lower(), 0)
merchant_brand["costo_marca_estimado"] = merchant_brand[
    [c for c in merchant_brand.columns if c.startswith("costo_marca_")]
].sum(axis=1)

merchant_pricing_base = merchant_features.merge(
    merchant_brand, on="rut_comercio", how="left"
)

merchant_pricing_base["interchange_floor_estimado"] = (
    merchant_pricing_base["monto_total_anual"] * interchange_floor_rate
)
merchant_pricing_base["costo_min_estimado"] = (
    merchant_pricing_base["interchange_floor_estimado"]
    + merchant_pricing_base["costo_marca_estimado"]
)

merchant_pricing_base["monto_promedio_mensual"] = merchant_pricing_base[
    "monto_promedio_mensual"
].fillna(0)
segment_bins = [0, 8_000_000, 30_000_000, 75_000_000, float("inf")]
segment_labels = ["Estándar", "PRO", "PRO Max", "Enterprise"]
segment_assignment = pd.cut(
    merchant_pricing_base["monto_promedio_mensual"],
    bins=segment_bins,
    labels=segment_labels,
    right=False,
    include_lowest=True,
)
merchant_pricing_base["segmento_promedio_volumen"] = segment_assignment.astype("string")
merchant_pricing_base.loc[
    merchant_pricing_base["monto_promedio_mensual"] == 0, "segmento_promedio_volumen"
] = "Sin ventas"

feature_cols_preview = [
    "rut_comercio",
    "monto_total_anual",
    "monto_promedio_mensual",
    "meses_reportados",
    "meses_con_ventas",
    "share_meses_activos",
    "n_locales_max",
    "n_terminales_max",
    "n_tecnologias_unicas",
    "estado_terminal_actual",
    "segmento_promedio_volumen",
    "monto_total_tarjetas",
    "share_visa",
    "share_mastercard",
    "interchange_floor_estimado",
    "costo_marca_estimado",
    "costo_min_estimado",
]
merchant_pricing_base[feature_cols_preview].head()


Unnamed: 0,rut_comercio,monto_total_anual,monto_promedio_mensual,meses_reportados,meses_con_ventas,share_meses_activos,n_locales_max,n_terminales_max,n_tecnologias_unicas,estado_terminal_actual,segmento_promedio_volumen,monto_total_tarjetas,share_visa,share_mastercard,interchange_floor_estimado,costo_marca_estimado,costo_min_estimado
0,07289970-9,0.0,0.0,11,0,0.0,1,1,1,INGRESADO,Sin ventas,0.0,,,0.0,0.0,0.0
1,10000001-6,156061978.0,14187452.55,11,11,1.0,1,3,1,HABILITADO,PRO,78030989.0,0.92,0.08,1376466.65,163626.55,1540093.19
2,10000058-K,0.0,0.0,12,0,0.0,1,1,1,HABILITADO,Sin ventas,0.0,,,0.0,0.0,0.0
3,10000217-5,0.0,0.0,12,0,0.0,2,2,2,PROCESO_BAJA,Sin ventas,0.0,,,0.0,0.0,0.0
4,10000372-4,4996222.0,832703.67,6,6,1.0,1,1,1,HABILITADO,Estándar,2498111.0,0.98,0.02,44066.68,4933.79,49000.47


#### Supuestos aplicados
- Se utiliza una mezcla estándar de tarjetas (`Crédito` 60%, `Débito` 35%, `Prepago` 5%) para estimar el piso de interchange, dado que la base no distingue el tipo de plástico.
- Los costos de marca se aproximan con el promedio histórico 2025 provisto (`costos_marca_25_1.xlsx`) y sólo están disponibles para Visa y Mastercard; el resto de las redes se modelan con costo 0 hasta contar con información adicional.
- Las cuotas de mezcla por marca (`share_*`) se calculan dividiendo los montos por red sobre `monto_total_tarjetas`, equivalente a `monto_adquriencia_general`.

In [51]:
feature_output = DATA_DIR / "processed" / "merchant_pricing_feature_base.parquet"
feature_output.parent.mkdir(parents=True, exist_ok=True)
merchant_pricing_base.to_parquet(feature_output, index=False)
feature_output


PosixPath('data/processed/merchant_pricing_feature_base.parquet')

### 3.1 Modelo de pricing y margen
Construimos el modelo que estima ingresos, costos y brecha competitiva por comercio para priorizar acciones de pricing.

In [52]:
# Preparar tabla de tarifas vigentes de Klap por segmento y medio
pricing_grid["Variable_pct"] = pricing_grid["Variable %"] / 100
pricing_matrix = pricing_grid.pivot_table(
    index="Segmento", columns="Medio", values=["Variable_pct", "Fijo CLP (aprox)"]
)
pricing_matrix


Unnamed: 0_level_0,Fijo CLP (aprox),Fijo CLP (aprox),Fijo CLP (aprox),Variable_pct,Variable_pct,Variable_pct
Medio,Crédito,Débito,Prepago,Crédito,Débito,Prepago
Segmento,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Estándar,95.0,95.0,95.0,0.01,0.01,0.01
PRO,93.0,77.0,77.0,0.01,0.01,0.01
PRO Max,89.0,73.0,73.0,0.01,0.01,0.01


In [53]:
# Mezcla asumida de medios de pago (ante la ausencia de detalle por tipo de tarjeta)
assumed_mix = {"Crédito": 0.6, "Débito": 0.35, "Prepago": 0.05}
pd.Series(assumed_mix, name="mix")


Crédito   0.60
Débito    0.35
Prepago   0.05
Name: mix, dtype: float64

In [54]:
# Cálculo del MDR y fijo efectivos de Klap por segmento con el mix asumido
var_matrix = pricing_matrix.xs("Variable_pct", level=0, axis=1)
fijo_matrix = pricing_matrix.xs("Fijo CLP (aprox)", level=0, axis=1)

segment_effective = []
for segment in pricing_matrix.index:
    var_cols = var_matrix.loc[segment]
    fijo_cols = fijo_matrix.loc[segment]
    var_effective = 0.0
    fijo_effective = 0.0
    for medio, share in assumed_mix.items():
        var_valor = var_cols.get(medio, np.nan)
        if pd.notna(var_valor):
            var_effective += share * float(var_valor)
        fijo_valor = fijo_cols.get(medio, np.nan)
        if pd.notna(fijo_valor):
            fijo_effective += share * float(fijo_valor)
    segment_effective.append(
        {
            "Segmento": segment,
            "mdr_effectivo": var_effective,
            "fijo_effectivo": fijo_effective,
        }
    )

segment_effective = pd.DataFrame(segment_effective).set_index("Segmento").sort_index()
if "PRO Max" not in segment_effective.index:
    raise KeyError("No se encontró la tarifa PRO Max en la grilla oficial.")
if "Enterprise" not in segment_effective.index:
    segment_effective.loc["Enterprise"] = segment_effective.loc["PRO Max"]
segment_effective.loc["Sin ventas"] = {"mdr_effectivo": 0.0, "fijo_effectivo": 0.0}
segment_effective = segment_effective.reset_index()
segment_effective

Unnamed: 0,Segmento,mdr_effectivo,fijo_effectivo
0,Estándar,0.01,95.0
1,PRO,0.01,86.6
2,PRO Max,0.01,82.6
3,Enterprise,0.01,82.6
4,Sin ventas,0.0,0.0


In [55]:
# Enriquecer la base de comercios con tarifas vigentes
pricing_lookup = segment_effective.set_index("Segmento")[
    ["mdr_effectivo", "fijo_effectivo"]
]

merchant_pricing_base["segmento_norm"] = (
    merchant_pricing_base["segmento_promedio_volumen"]
    .fillna("Sin ventas")
    .replace({"": "Sin ventas"})
)

merchant_pricing_base["klap_mdr"] = (
    merchant_pricing_base["segmento_norm"]
    .map(pricing_lookup["mdr_effectivo"])
    .fillna(0.0)
)
merchant_pricing_base["klap_fijo_clp"] = (
    merchant_pricing_base["segmento_norm"]
    .map(pricing_lookup["fijo_effectivo"])
    .fillna(0.0)
)
merchant_pricing_base[["segmento_promedio_volumen", "klap_mdr", "klap_fijo_clp"]].head()

Unnamed: 0,segmento_promedio_volumen,klap_mdr,klap_fijo_clp
0,Sin ventas,0.0,0.0
1,PRO,0.01,86.6
2,Sin ventas,0.0,0.0
3,Sin ventas,0.0,0.0
4,Estándar,0.01,95.0


In [56]:
# Ingresos y márgenes estimados con las tarifas actuales
merchant_pricing_base["ingreso_variable"] = (
    merchant_pricing_base["monto_total_anual"] * merchant_pricing_base["klap_mdr"]
)
merchant_pricing_base["ingreso_fijo"] = (
    merchant_pricing_base["qtrx_total_anual"] * merchant_pricing_base["klap_fijo_clp"]
)
merchant_pricing_base["ingreso_total_klap"] = (
    merchant_pricing_base["ingreso_variable"] + merchant_pricing_base["ingreso_fijo"]
)
merchant_pricing_base["margen_estimado"] = (
    merchant_pricing_base["ingreso_total_klap"]
    - merchant_pricing_base["costo_min_estimado"]
)
merchant_pricing_base["margen_pct_volumen"] = np.where(
    merchant_pricing_base["monto_total_anual"] > 0,
    merchant_pricing_base["margen_estimado"]
    / merchant_pricing_base["monto_total_anual"],
    np.nan,
)
merchant_pricing_base[
    [
        "ingreso_total_klap",
        "costo_min_estimado",
        "margen_estimado",
        "margen_pct_volumen",
    ]
].describe()


Unnamed: 0,ingreso_total_klap,costo_min_estimado,margen_estimado,margen_pct_volumen
count,75282.0,75282.0,75282.0,24225.0
mean,4186797.5,2287310.96,1899486.54,0.04
std,432139287.52,211092374.71,221818910.76,0.12
min,0.0,0.0,-13053904.73,-0.0
25%,0.0,0.0,0.0,0.01
50%,0.0,0.0,0.0,0.02
75%,3008.63,1364.21,1014.77,0.03
max,103225440513.9,49822314788.13,53403125725.77,1.9


In [57]:
# Exploración de categorías de productos en el benchmark de competidores
competitor_prices["categoria_producto"].value_counts()


categoria_producto
INTERNACIONAL    12
CREDITO           9
DEBITO            9
PREPAGO           9
Name: count, dtype: int64

In [58]:
# Seleccionar Transbank como benchmark principal y calcular tasas promedio
comp_primary = competitor_prices[
    competitor_prices["nombre_tarifa"] == "TRANSBANK"
].copy()
comp_primary["merchant_discount_pct"] = comp_primary["merchant_discount"]
comp_primary["merchant_discount_fijo_clp"] = comp_primary["merchant_discount_fijo"]
comp_summary = comp_primary.groupby("categoria_producto")[
    ["merchant_discount_pct", "merchant_discount_fijo_clp"]
].median()
comp_summary


Unnamed: 0_level_0,merchant_discount_pct,merchant_discount_fijo_clp
categoria_producto,Unnamed: 1_level_1,Unnamed: 2_level_1
CREDITO,0.01,0.0
DEBITO,0.01,0.0
INTERNACIONAL,0.03,0.0
PREPAGO,0.01,0.0


In [59]:
# Mapeo de categoría -> medio y cálculo del MDR/fijo efectivos del benchmark
categoria_to_medio = {"Crédito": "Crédito", "Débito": "Débito", "Prepago": "Prepago"}
competitor_mix = []
for medio, share in assumed_mix.items():
    categoria = categoria_to_medio.get(medio)
    if categoria in comp_summary.index:
        row = comp_summary.loc[categoria]
    else:
        row = comp_summary.median()
    competitor_mix.append(
        {
            "medio": medio,
            "share": share,
            "mdr": row["merchant_discount_pct"],
            "fijo": row["merchant_discount_fijo_clp"],
        }
    )
competitor_mix = pd.DataFrame(competitor_mix)
competitor_mix


Unnamed: 0,medio,share,mdr,fijo
0,Crédito,0.6,0.01,0.0
1,Débito,0.35,0.01,0.0
2,Prepago,0.05,0.01,0.0


In [60]:
competitor_effective_mdr = (competitor_mix["share"] * competitor_mix["mdr"]).sum()
competitor_effective_fijo = (competitor_mix["share"] * competitor_mix["fijo"]).sum()
competitor_effective_mdr, competitor_effective_fijo


(np.float64(0.012150000000000001), np.float64(0.0018494999999999998))

In [61]:
merchant_pricing_base["competidor_mdr"] = competitor_effective_mdr
merchant_pricing_base["competidor_fijo_clp"] = competitor_effective_fijo
merchant_pricing_base["ingreso_comp_variable"] = (
    merchant_pricing_base["monto_total_anual"] * merchant_pricing_base["competidor_mdr"]
)
merchant_pricing_base["ingreso_comp_fijo"] = (
    merchant_pricing_base["qtrx_total_anual"]
    * merchant_pricing_base["competidor_fijo_clp"]
)
merchant_pricing_base["ingreso_total_comp"] = (
    merchant_pricing_base["ingreso_comp_variable"]
    + merchant_pricing_base["ingreso_comp_fijo"]
)
merchant_pricing_base["gap_pricing_mdr"] = (
    merchant_pricing_base["klap_mdr"] - merchant_pricing_base["competidor_mdr"]
)
merchant_pricing_base[["klap_mdr", "competidor_mdr", "gap_pricing_mdr"]].describe()


Unnamed: 0,klap_mdr,competidor_mdr,gap_pricing_mdr
count,75282.0,75282.0,75282.0
mean,0.0,0.01,-0.01
std,0.0,0.0,0.0
min,0.0,0.01,-0.01
25%,0.0,0.01,-0.01
50%,0.0,0.01,-0.01
75%,0.01,0.01,-0.0
max,0.01,0.01,-0.0


In [62]:
# Reglas de decisión para recomendaciones
THRESHOLD_MARGEN = 0.0
THRESHOLD_COMPETENCIA = 0.0015  # 0.15 p.p.
THRESHOLD_INACTIVIDAD = 0.2

share_activos = merchant_pricing_base["share_meses_activos"].fillna(0)
condiciones = [
    merchant_pricing_base["monto_total_anual"].eq(0),
    merchant_pricing_base["margen_estimado"] <= THRESHOLD_MARGEN,
    merchant_pricing_base["gap_pricing_mdr"] > THRESHOLD_COMPETENCIA,
    share_activos < THRESHOLD_INACTIVIDAD,
]
opciones = [
    "Reactivación comercial",
    "Ajustar MDR urgente",
    "Revisar competitividad",
    "Monitorear baja actividad",
]
merchant_pricing_base["accion_sugerida"] = np.select(
    condiciones, opciones, default="Mantener / Upsell servicios"
)
merchant_pricing_base["accion_sugerida"].value_counts()


accion_sugerida
Reactivación comercial         51057
Mantener / Upsell servicios    20569
Monitorear baja actividad       3529
Ajustar MDR urgente              127
Name: count, dtype: int64

In [63]:
# Impacto agregado por segmento y acción sugerida
impact_summary = (
    merchant_pricing_base.groupby(["segmento_promedio_volumen", "accion_sugerida"])
    .agg(
        n_comercios=("rut_comercio", "count"),
        monto_total_anual=("monto_total_anual", "sum"),
        margen_estimado=("margen_estimado", "sum"),
    )
    .sort_values("monto_total_anual", ascending=False)
)
impact_summary.head(10)


Unnamed: 0_level_0,Unnamed: 1_level_0,n_comercios,monto_total_anual,margen_estimado
segmento_promedio_volumen,accion_sugerida,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Enterprise,Mantener / Upsell servicios,781,15953224915445.0,128570229907.76
PRO,Mantener / Upsell servicios,3488,531432135885.0,6651131937.44
PRO Max,Mantener / Upsell servicios,783,348602667268.0,3145364065.11
Estándar,Mantener / Upsell servicios,15517,279462478050.0,4565306280.8
Enterprise,Ajustar MDR urgente,4,30217508916.0,-14147699.25
Estándar,Monitorear baja actividad,3518,4302571432.0,53205616.18
Enterprise,Monitorear baja actividad,2,3355301208.0,13039738.51
PRO,Ajustar MDR urgente,22,3295295168.0,-481998.47
PRO Max,Monitorear baja actividad,5,2653851883.0,12387306.66
PRO Max,Ajustar MDR urgente,7,2564326578.0,-556380.59


### 3.2 Interpretación inicial
- `klap_mdr` y `klap_fijo_clp` representan la tarifa efectiva pagada con la mezcla asumida.
- `margen_estimado` es el excedente sobre el piso de costos (interchange + marca); si es ≤ 0, hay riesgo de rentabilidad.
- `gap_pricing_mdr` cuantifica la brecha frente al benchmark Transbank; valores superiores a 0.15 p.p. motivan ajustes.
- `accion_sugerida` prioriza acciones: reactivación (sin ventas), ajuste urgente (margen nulo/negativo), revisión competitiva (brecha alta), monitoreo (baja actividad) y mantener/up-sell cuando la situación es saludable.
- `segmento_cluster_label` clasifica a los comercios en arquetipos (Alta contribución, Brecha competitiva, Margen en riesgo, Baja actividad, Optimización gradual, Sin ventas) para orientar estrategias específicas.

### 3.3 Nueva segmentación de comercios
Aplicamos clustering sobre las métricas de volumen, actividad y margen para identificar arquetipos accionables.

In [64]:
# Selección de variables para clustering (solo comercios con ventas)
feature_columns = [
    "monto_promedio_mensual",
    "share_meses_activos",
    "n_terminales_max",
    "n_tecnologias_unicas",
    "margen_pct_volumen",
    "gap_pricing_mdr",
    "klap_mdr",
    "competidor_mdr",
    "share_visa",
    "share_mastercard",
]
mask_activos = merchant_pricing_base["monto_total_anual"] > 0
seg_dataset = merchant_pricing_base.loc[mask_activos, feature_columns].fillna(0)
scaler = StandardScaler()
seg_scaled = scaler.fit_transform(seg_dataset)


In [65]:
# Revisión rápida de inercia para distintos k
wcss = []
for k in range(3, 7):
    km = KMeans(n_clusters=k, random_state=42, n_init=20)
    km.fit(seg_scaled)
    wcss.append({"k": k, "inercia": km.inertia_})
wc_ss_df = pd.DataFrame(wcss)
wc_ss_df


Unnamed: 0,k,inercia
0,3,128873.78
1,4,109643.51
2,5,92475.65
3,6,76152.02


In [66]:
# Elegimos k=4 como equilibrio entre granularidad y estabilidad
k_optimo = 4
kmeans = KMeans(n_clusters=k_optimo, random_state=42, n_init=20)
clusters = kmeans.fit_predict(seg_scaled)
merchant_pricing_base.loc[mask_activos, "segmento_cluster"] = clusters
merchant_pricing_base["segmento_cluster"] = (
    merchant_pricing_base["segmento_cluster"].fillna(-1).astype(int)
)


In [67]:
# Resumen por cluster
cluster_summary = (
    merchant_pricing_base.loc[mask_activos]
    .groupby("segmento_cluster")
    .agg(
        n_comercios=("rut_comercio", "count"),
        monto_prom_mensual=("monto_promedio_mensual", "median"),
        margen_pct_medio=("margen_pct_volumen", "median"),
        margen_estimado_millones=("margen_estimado", lambda s: s.sum() / 1e6),
        share_activos_medio=("share_meses_activos", "median"),
        gap_mdr_medio=("gap_pricing_mdr", "median"),
        n_terminales_medio=("n_terminales_max", "median"),
    )
    .sort_values("monto_prom_mensual", ascending=False)
)
cluster_summary


Unnamed: 0_level_0,n_comercios,monto_prom_mensual,margen_pct_medio,margen_estimado_millones,share_activos_medio,gap_mdr_medio,n_terminales_medio
segmento_cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,18,2360490297.42,0.01,94667.0,1.0,-0.0,387.5
1,5080,18058166.3,0.01,43724.83,1.0,-0.0,2.0
0,17391,550485.45,0.02,4611.72,0.75,-0.0,1.0
3,1736,2390.91,0.04,-6.4,0.25,-0.0,1.0


In [68]:
# Asignación de etiquetas descriptivas según patrones observados
vol_high = merchant_pricing_base.loc[mask_activos, "monto_promedio_mensual"].quantile(
    0.75
)
vol_low = merchant_pricing_base.loc[mask_activos, "monto_promedio_mensual"].quantile(
    0.25
)
margin_low = merchant_pricing_base.loc[mask_activos, "margen_pct_volumen"].quantile(
    0.25
)
gap_high = merchant_pricing_base.loc[mask_activos, "gap_pricing_mdr"].quantile(0.75)

labels = {}
for cluster_id, row in cluster_summary.iterrows():
    if row["margen_estimado_millones"] <= 0:
        labels[cluster_id] = "Margen en riesgo"
    elif row["gap_mdr_medio"] >= gap_high:
        labels[cluster_id] = "Brecha competitiva"
    elif row["monto_prom_mensual"] >= vol_high and row["margen_pct_medio"] > margin_low:
        labels[cluster_id] = "Alta contribución"
    elif row["share_activos_medio"] < 0.4 or row["monto_prom_mensual"] <= vol_low:
        labels[cluster_id] = "Baja actividad"
    else:
        labels[cluster_id] = "Optimización gradual"
cluster_summary["etiqueta_cluster"] = cluster_summary.index.map(labels)
cluster_summary


Unnamed: 0_level_0,n_comercios,monto_prom_mensual,margen_pct_medio,margen_estimado_millones,share_activos_medio,gap_mdr_medio,n_terminales_medio,etiqueta_cluster
segmento_cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,18,2360490297.42,0.01,94667.0,1.0,-0.0,387.5,Alta contribución
1,5080,18058166.3,0.01,43724.83,1.0,-0.0,2.0,Alta contribución
0,17391,550485.45,0.02,4611.72,0.75,-0.0,1.0,Brecha competitiva
3,1736,2390.91,0.04,-6.4,0.25,-0.0,1.0,Margen en riesgo


In [69]:
merchant_pricing_base["segmento_cluster_label"] = (
    merchant_pricing_base["segmento_cluster"].map(labels).fillna("Sin ventas")
)

segmento_counts = merchant_pricing_base["segmento_cluster_label"].value_counts()
segmento_counts


segmento_cluster_label
Sin ventas            51057
Brecha competitiva    17391
Alta contribución      5098
Margen en riesgo       1736
Name: count, dtype: int64

In [70]:
# Guardamos un resumen legible de la segmentación
segmentation_summary = cluster_summary.reset_index().rename(
    columns={"segmento_cluster": "cluster_id"}
)
segmentation_summary


Unnamed: 0,cluster_id,n_comercios,monto_prom_mensual,margen_pct_medio,margen_estimado_millones,share_activos_medio,gap_mdr_medio,n_terminales_medio,etiqueta_cluster
0,2,18,2360490297.42,0.01,94667.0,1.0,-0.0,387.5,Alta contribución
1,1,5080,18058166.3,0.01,43724.83,1.0,-0.0,2.0,Alta contribución
2,0,17391,550485.45,0.02,4611.72,0.75,-0.0,1.0,Brecha competitiva
3,3,1736,2390.91,0.04,-6.4,0.25,-0.0,1.0,Margen en riesgo


In [71]:
# Cruce entre clusters y acción sugerida para priorizar iniciativas
cluster_action_summary = (
    merchant_pricing_base.groupby(["segmento_cluster_label", "accion_sugerida"])
    .agg(
        n_comercios=("rut_comercio", "count"),
        monto_total_anual=("monto_total_anual", "sum"),
        margen_estimado=("margen_estimado", "sum"),
    )
    .sort_values(
        ["segmento_cluster_label", "monto_total_anual"], ascending=[True, False]
    )
)
cluster_action_summary


Unnamed: 0_level_0,Unnamed: 1_level_0,n_comercios,monto_total_anual,margen_estimado
segmento_cluster_label,accion_sugerida,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alta contribución,Mantener / Upsell servicios,5054,16833263056398.0,138366791950.15
Alta contribución,Ajustar MDR urgente,32,15094354608.0,-2132173.58
Alta contribución,Monitorear baja actividad,12,6686539129.0,27167158.95
Brecha competitiva,Mantener / Upsell servicios,14501,278121530278.0,4558864873.3
Brecha competitiva,Monitorear baja actividad,2890,4155652584.0,52857960.41
Margen en riesgo,Ajustar MDR urgente,95,21327111414.0,-13127181.22
Margen en riesgo,Mantener / Upsell servicios,1014,1337609972.0,6375367.66
Margen en riesgo,Monitorear baja actividad,627,146908848.0,347451.66
Sin ventas,Reactivación comercial,51057,0.0,0.0


### 3.4 Estrategia y racional de pricing
El pipeline combina tres pilares para optimizar precios por segmento de comercio:
1. **Modelo financiero**: el cálculo de `margen_estimado` contrasta los ingresos actuales (MDR + fijo) contra el piso de costos (interchange y marca), identificando relaciones potencialmente deficitarias antes de que impacten resultados.
2. **Referente competitivo**: `gap_pricing_mdr` utiliza la tarifa efectiva de Transbank como benchmark, lo que permite detectar casos donde la propuesta de Klap queda sobre el mercado sin justificación en valor agregado.
3. **Segmentación basada en datos**: el clustering `segmento_cluster_label` agrupa comercios según volumen, actividad, tecnología y salud del margen, de modo que las decisiones (ajuste, reactivación, upsell) se tomen a nivel de arquetipo y no comercio por comercio.

El cruce `cluster_action_summary` muestra cómo se combinan los clusters con las acciones sugeridas, facilitando definir iniciativas específicas (por ejemplo, planes de retención para "Brecha competitiva" o programas de reactivación para "Baja actividad"). Esta metodología es robusta porque integra costos reales, competencia y comportamiento transaccional, permitiendo proponer ajustes de pricing sustentados en datos y accionables a escala.

### 3.5 Recomendación de planes y add-ons
Definimos un catálogo de planes fijos/variables y add-ons de alto valor (omnicanal, fidelización, analytics) para asignar propuestas por comercio.

In [72]:
# Catálogo de planes y add-ons basado en la grilla oficial
segment_mix = segment_effective.set_index("Segmento")

planes = [
    {
        "nombre": "Plan Estándar",
        "segmento_origen": "Estándar",
        "descripcion": "Tarifa oficial para comercios con ventas hasta 8 MM CLP mensuales.",
        "segmentos_objetivo_volumen": ["Estándar", "Sin ventas"],
        "segmentos_objetivo_cluster": [
            "Baja actividad",
            "Margen en riesgo",
            "Brecha competitiva",
        ],
    },
    {
        "nombre": "Plan PRO",
        "segmento_origen": "PRO",
        "descripcion": "Tarifa oficial PRO para comercios con 8-30 MM CLP mensuales.",
        "segmentos_objetivo_volumen": ["PRO", "Optimización gradual"],
        "segmentos_objetivo_cluster": ["Optimización gradual", "Brecha competitiva"],
    },
    {
        "nombre": "Plan PRO Max",
        "segmento_origen": "PRO Max",
        "descripcion": "Tarifa oficial PRO Max para comercios de alto volumen (>30 MM CLP).",
        "segmentos_objetivo_volumen": ["PRO Max", "Enterprise"],
        "segmentos_objetivo_cluster": ["Alta contribución"],
    },
]

for plan in planes:
    seg = plan["segmento_origen"]
    if seg not in segment_mix.index:
        raise KeyError(f"Segmento {seg} no encontrado en la grilla de precios oficial")
    plan["mdr"] = float(segment_mix.loc[seg, "mdr_effectivo"])
    plan["fijo"] = float(segment_mix.loc[seg, "fijo_effectivo"])

addons = [
    {
        "nombre": "Omnicanal Plus",
        "descripcion": "Incluye billeteras, QR, web checkout y soporte para marketplaces.",
        "fee_mensual": 35000,
        "criterio": lambda row: row.get("n_tecnologias_unicas", 0) < 2
        and row.get("monto_total_anual", 0) > 60_000_000,
    },
    {
        "nombre": "Insights & Fidelización",
        "descripcion": "Reportes avanzados, campañas de puntos y marketing SMS/Email.",
        "fee_mensual": 25000,
        "criterio": lambda row: row.get("share_meses_activos", 0) > 0.6
        and row.get("margen_estimado", 0) > 0,
    },
    {
        "nombre": "Pagos Internacionales",
        "descripcion": "Aceptación de tarjetas internacionales y pagos cross-border.",
        "fee_mensual": 45000,
        "criterio": lambda row: row.get("share_visa", 0) > 0.5
        and row.get("monto_total_anual", 0) > 120_000_000,
    },
]


In [73]:
def recomendar_plan(row):
    score_plan = []
    for plan in planes:
        score = 0
        if row.get("segmento_promedio_volumen") in plan["segmentos_objetivo_volumen"]:
            score += 2
        if row.get("segmento_cluster_label") in plan["segmentos_objetivo_cluster"]:
            score += 2
        if (
            row.get("monto_total_anual", 0) > 120_000_000
            and plan["nombre"] == "Plan PRO Max"
        ):
            score += 1
        if (
            row.get("monto_total_anual", 0) < 30_000_000
            and plan["nombre"] == "Plan Estándar"
        ):
            score += 1
        if row.get("margen_estimado", 0) <= 0:
            score -= 1
        score_plan.append((plan["nombre"], plan["mdr"], plan["fijo"], score))
    score_plan.sort(key=lambda x: x[3], reverse=True)
    best = score_plan[0]
    return {
        "plan_recomendado": best[0],
        "plan_mdr_propuesto": best[1],
        "plan_fijo_propuesto": best[2],
    }


plan_df = merchant_pricing_base.apply(recomendar_plan, axis=1, result_type="expand")
merchant_pricing_base = pd.concat([merchant_pricing_base, plan_df], axis=1)


### 3.6 Insights de priorización
- **Plan vs. margen**: `plan_summary` muestra qué planes concentran mayor volumen y margen; prioriza esos segmentos para renegociaciones o campañas.
- **Add-ons**: `addon_summary` evidencia la demanda potencial de servicios complementarios (Omnicanal, Fidelización, Pagos internacionales).
- **Cluster + acción**: el cruce `cluster_action_summary` permite diseñar scripts comerciales específicos para cada arquetipo (ej. ajustar MDR en "Brecha competitiva", activar add-ons en "Alta contribución").

In [74]:
# Resumen de propuestas por plan recomendado
plan_summary = (
    merchant_pricing_base.groupby("plan_recomendado")
    .agg(
        comercios=("rut_comercio", "count"),
        volumen=("monto_total_anual", "sum"),
        margen=("margen_estimado", "sum"),
    )
    .sort_values("volumen", ascending=False)
)
plan_summary


Unnamed: 0_level_0,comercios,volumen,margen
plan_recomendado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Plan PRO Max,3727,16757914005546.0,136974433088.21
Plan Estándar,70186,284109384842.0,4618438620.49
Plan PRO,1369,118109372843.0,1404273698.64


In [76]:
def recomendar_addons(row):
    sugeridos = []
    for addon in addons:
        try:
            aplica = addon["criterio"](row)
        except Exception:
            aplica = False
        if aplica:
            sugeridos.append(f"{addon['nombre']} (${addon['fee_mensual']:,})")
    return ", ".join(sugeridos) if sugeridos else "Sin add-ons sugeridos"


merchant_pricing_base["addons_recomendados"] = merchant_pricing_base.apply(
    recomendar_addons, axis=1
)


In [77]:
# Resumen de add-ons sugeridos
addon_summary = (
    merchant_pricing_base["addons_recomendados"]
    .value_counts()
    .rename_axis("addons_recomendados")
    .to_frame("comercios")
)
addon_summary.head(10)


Unnamed: 0_level_0,comercios
addons_recomendados,Unnamed: 1_level_1
Sin add-ons sugeridos,60008
"Insights & Fidelización ($25,000)",10053
"Omnicanal Plus ($35,000), Insights & Fidelización ($25,000), Pagos Internacionales ($45,000)",2000
"Insights & Fidelización ($25,000), Pagos Internacionales ($45,000)",1491
"Omnicanal Plus ($35,000), Insights & Fidelización ($25,000)",1469
"Pagos Internacionales ($45,000)",100
"Omnicanal Plus ($35,000), Pagos Internacionales ($45,000)",83
"Omnicanal Plus ($35,000)",78


In [78]:
# Guardamos tabla de propuestas comerciales
proposal_output = DATA_DIR / "processed" / "merchant_pricing_proposals.parquet"
merchant_pricing_base.to_parquet(proposal_output, index=False)
proposal_output


PosixPath('data/processed/merchant_pricing_proposals.parquet')

In [79]:
# Guardar resultados del modelo para usos posteriores (dashboards, app, etc.)
pricing_model_output = DATA_DIR / "processed" / "merchant_pricing_model_results.parquet"
merchant_pricing_base.to_parquet(pricing_model_output, index=False)
pricing_model_output


PosixPath('data/processed/merchant_pricing_model_results.parquet')

In [80]:
merchant_pricing_base

Unnamed: 0,rut_comercio,monto_total_anual,qtrx_total_anual,meses_reportados,meses_con_ventas,monto_promedio_mensual,monto_max_mensual,qtrx_promedio_mensual,ticket_promedio_mensual,n_locales_max,...,ingreso_comp_fijo,ingreso_total_comp,gap_pricing_mdr,accion_sugerida,segmento_cluster,segmento_cluster_label,plan_recomendado,plan_mdr_propuesto,plan_fijo_propuesto,addons_recomendados
0,07289970-9,0.00,0.00,11,0,0.00,0.00,0.00,,1,...,0.00,0.00,-0.01,Reactivación comercial,-1,Sin ventas,Plan Estándar,0.01,95.00,Sin add-ons sugeridos
1,10000001-6,156061978.00,28802.00,11,11,14187452.55,19680080.00,2618.36,5420.65,1,...,53.27,1896206.30,-0.00,Mantener / Upsell servicios,1,Alta contribución,Plan PRO Max,0.01,82.60,"Omnicanal Plus ($35,000), Insights & Fidelizac..."
2,10000058-K,0.00,0.00,12,0,0.00,0.00,0.00,,1,...,0.00,0.00,-0.01,Reactivación comercial,-1,Sin ventas,Plan Estándar,0.01,95.00,Sin add-ons sugeridos
3,10000217-5,0.00,0.00,12,0,0.00,0.00,0.00,,2,...,0.00,0.00,-0.01,Reactivación comercial,-1,Sin ventas,Plan Estándar,0.01,95.00,Sin add-ons sugeridos
4,10000372-4,4996222.00,1998.00,6,6,832703.67,995680.00,333.00,2515.63,1,...,3.70,60707.79,-0.00,Mantener / Upsell servicios,0,Brecha competitiva,Plan Estándar,0.01,95.00,"Insights & Fidelización ($25,000)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75277,9998724-3,0.00,0.00,12,0,0.00,0.00,0.00,,1,...,0.00,0.00,-0.01,Reactivación comercial,-1,Sin ventas,Plan Estándar,0.01,95.00,Sin add-ons sugeridos
75278,9999021-K,0.00,0.00,11,0,0.00,0.00,0.00,,1,...,0.00,0.00,-0.01,Reactivación comercial,-1,Sin ventas,Plan Estándar,0.01,95.00,Sin add-ons sugeridos
75279,9999409-6,25658656.00,1824.00,11,11,2332605.09,3803256.00,165.82,14297.33,1,...,3.37,311756.04,-0.00,Mantener / Upsell servicios,0,Brecha competitiva,Plan Estándar,0.01,95.00,"Insights & Fidelización ($25,000)"
75280,9999543-2,71303480.00,16054.00,12,12,5941956.67,7978280.00,1337.83,4474.02,2,...,29.69,866366.97,-0.00,Mantener / Upsell servicios,0,Brecha competitiva,Plan Estándar,0.01,95.00,"Insights & Fidelización ($25,000)"


## 4. Resumen narrativo

### Data snapshot y hallazgos iniciales
- **Cobertura temporal**: 12 meses (2024-01 a 2024-12) con 1.4 millones de registros de terminal-mes.
- **Base de clientes**: 75k comercios, 99k locales, 142k terminales; solo ~20% de los merchant-mes presentan ventas (>0 CLP), lo que evidencia espacio para reactivación.
- **Volumen procesado**: CLP 17.16 billones en 1.78 mil millones de transacciones, altamente concentrados en la Región Metropolitana y en tecnologías HOST TO HOST / POS móviles.
- **Mix transaccional**: `monto_adquriencia_general` equivale al total de pagos con tarjeta (`monto_clp`); los montos por marca son desgloses del mismo total, con Visa y Mastercard como redes dominantes.
- **Estado operativo**: `estado_terminal` captura la condición vigente de la terminal; para análisis históricos debe combinarse con actividad real y fechas de instalación/baja.
- **Referencias de pricing**: se cuenta con grillas actuales de Klap, tarifas de competidores, costos de marca e interchange oficiales para modelar márgenes sin alterar esos parámetros externos.
- **Datasets derivados**: `merchant_pricing_feature_base.parquet` concentra métricas de comportamiento y costo, mientras que `merchant_pricing_model_results.parquet` añade ingresos, márgenes, brecha competitiva, acción sugerida y la nueva `segmento_cluster_label`.
- **Nueva segmentación**: el clustering identifica arquetipos (Alta contribución, Brecha competitiva, Margen en riesgo, Baja actividad, Optimización gradual) para priorizar tácticas de pricing y fidelización, y el cruce `cluster_action_summary` cuantifica la magnitud de cada acción dentro de esos arquetipos.

### Próximos pasos sugeridos
1. Evaluar márgenes actuales versus costos mínimos por segmento (`segmento_promedio_volumen`) y detectar clientes con holgura o déficit.
2. Incorporar precios vigentes por comercio (si están disponibles) para estimar margen real vs. piso de costo y validar oportunidades de reajuste.
3. Desarrollar estrategias de reactivación para terminales inactivos considerando tecnología instalada y potencial de volumen.
4. Comparar la propuesta actual con tarifas de competidores para cuantificar riesgos de fuga y oportunidades de upsell.
