In [1]:
import pandas as pd
import numpy as np

# Corregir la ruta del archivo
df = pd.read_parquet("/home/donsson/proyectos/API/ventashistoricas56semanas.parquet") #movimiento  facturas
df_p = pd.read_parquet("/home/donsson/proyectos/API/costo_productos.parquet") #Costos unitarios
df_vp = pd.read_parquet("/home/donsson/proyectos/API/ventas_perdidas_2025.parquet") #ventas perdidas
vp_reales = pd.read_excel("/home/donsson/proyectos/INDICADOR NS/vp_agosto.xlsx") #vp reales


# EDA

## Facturas

In [2]:
import re
import unicodedata

# Diccionario de códigos a sucursales
mapa_codigos = {
    "FCAL": "SUCURSAL CALI",
    "FMED": "SUCURSAL MEDELLIN",
    "FMDE":"SUCURSAL MEDELLIN",
    "FCTG": "SUCURSAL CARTAGENA",
    "FBAQ": "SUCURSAL BARRANQUILLA",
    "FVAL": "SUCURSAL VALLADOLID",
    "FCOT":"PRINCIPAL COTA",
    "FBUC":"SUCURSAL BUCARAMANGA",
    "FNOR":"SUCURSAL NORTE",
    "FCL6":"SUCURSAL CALLE 6",
    "PV2E":"SUCURSAL CALLE 6",
    "PV3E":"SUCURSAL VALLADOLID",
    "CLL6":"SUCURSAL CALLE 6",
    "PV1E":"SUCURSAL COTA" ,#Las que comienzan por p son los mostradores
    "PV4E":"SUCURSAL NORTE",
    "PV9E":"SUCURSAL CALI"

}


# Equivalencias para normalizar nombres truncados o mal escritos
mapa_equivalencias = {
    "MEDELLIN": "SUCURSAL MEDELLIN",
    "MEDELLI": "SUCURSAL MEDELLIN",
    "MEDELL": "SUCURSAL MEDELLIN",
    "MEDELI": "SUCURSAL MEDELLIN",
    "CALI": "SUCURSAL CALI",
    "CLL6":"SUCURSAL CALLE 6",
    "BUCARAMANGA":"SUCURSAL BUCARAMANGA",
    "BARRANQUILLA": "SUCURSAL BARRANQUILLA",
    "VALLADOLID": "SUCURSAL VALLADOLID",
    "CALLE 6":"SUCURSAL CALLE 6",
    "COTA":"PRINCIPAL COTA",
    "NORTE":"SUCURSAL NORTE"
}

def normalizar(texto):
    """Quita tildes y pasa a mayúsculas"""
    texto = unicodedata.normalize("NFKD", texto)
    texto = "".join([c for c in texto if not unicodedata.combining(c)])
    return texto.upper()

def extraer_sucursal(nombre):
    if not isinstance(nombre, str):
        return "VENDEDOR EXTERNO"
    
    sucursal = None
    
    # 1) Buscar "Mostrador ..."
    match = re.search(r"Mostrador\s+([A-Za-z0-9\s]+)", nombre, re.IGNORECASE)
    if match:
        sucursal = match.group(1).strip()
    else:
        # 2) Buscar "Calle" o "Cota"
        match2 = re.search(r"(Calle\s+\d+|Cota)", nombre, re.IGNORECASE)
        if match2:
            sucursal = match2.group(1).strip()
        else:
            # 3) Buscar prefijo de código
            for prefijo, ciudad in mapa_codigos.items():
                if nombre.upper().startswith(prefijo):
                    return ciudad
            return "VENDEDOR EXTERNO"
    
    # Normalizar texto
    sucursal = normalizar(sucursal)
    
    # Limpiar T1, T2, T3 al final
    sucursal = re.sub(r"\s*T\d+$", "", sucursal).strip()
    
    # Aplicar equivalencias
    sucursal = mapa_equivalencias.get(sucursal, sucursal)
    
    return sucursal

# Aplicar al dataframe
df["Sucursal"] = df["invoice_name"].apply(extraer_sucursal)

## Ventas perdidas

In [3]:
import pandas as pd
import numpy as np

# ===============================
# Filtrar almacenamiento agotado
# ===============================
df_vp = df_vp[df_vp["almacenamiento_tipo"].str.lower() == "agotado"]

# ===============================
# Asegurar tipos correctos
# ===============================
df_vp = df_vp.copy()
df_vp["fecha"] = pd.to_datetime(df_vp["fecha"], errors="coerce")

# Numéricos
for col in ["cantidad", "cantidad_existencia", "cantidad_reservada"]:
    df_vp[col] = pd.to_numeric(df_vp[col], errors="coerce").fillna(0).clip(lower=0)

# ===============================
# Reglas Odoo vectorizadas
# ===============================
is_cot = df_vp["origen"].fillna("").str.lower() == "cotizacion"
ignore_mask = df_vp["cantidad"] >= 100

ajuste = np.where(
    is_cot,
    df_vp["cantidad"] - df_vp["cantidad_existencia"] - df_vp["cantidad_reservada"],
    df_vp["cantidad"] - df_vp["cantidad_reservada"]
)

# Aplicar reglas de descarte y piso en cero
ajuste = np.where(ignore_mask, 0, ajuste)
ajuste = np.where(ajuste > 0, ajuste, 0)

df_vp["ventas_perdidas"] = ajuste.astype(float)

# ===============================
# Columnas temporales
# ===============================
df_vp["Semana"] = df_vp["fecha"].dt.to_period("W").dt.start_time
df_vp["ano"]   = df_vp["Semana"].dt.year
df_vp["mes"]   = df_vp["Semana"].dt.month
df_vp["dia"]   = df_vp["Semana"].dt.day

# ===============================
# Filtro adicional: excluir SERV y CARCASA
# ===============================
mask_excluir = ~df_vp["product_ref"].str.contains("SERV|CARCASA", case=False, na=False)
df_vp = df_vp[mask_excluir]

# ===============================
# Agrupación por tienda + producto + semana
# ===============================
lost_by_week = (
    df_vp.groupby(["store_name", "product_ref", "Semana", "ano", "mes", "dia"])
    .agg(
        lost_sales=("ventas_perdidas", "sum"),   # suma total de ventas perdidas
        veces_vp=("ventas_perdidas", "count")    # número de veces que hubo pérdida
    )
    .reset_index()
)

# Mostrar resultado agrupado
vp_week = lost_by_week


In [4]:
vp_reales["product_ref"] = vp_reales["Descripcion"].str.extract(r"\[([A-Z0-9]+)\]")
vp_reales.head()

# Asegurar que ambos son strings para evitar problemas
vp_week["product_ref"] = vp_week["product_ref"].astype(str)
vp_reales["product_ref"] = vp_reales["product_ref"].astype(str)

# 1. Obtener listas únicas
refs_week = set(vp_week["product_ref"].unique())
refs_real = set(vp_reales["product_ref"].unique())

# 2. Diferencia: los que están en vp_week pero no en vp_real
refs_extra = refs_week - refs_real

# 3. Filtrar el dataframe para verlos completos
df_discrepantes = vp_week[vp_week["product_ref"].isin(refs_extra)]


df_discrepantes = df_discrepantes[(df_discrepantes["mes"]==8) & (df_discrepantes["lost_sales"]>0) ]
df_discrepantes = df_discrepantes.groupby("product_ref").agg({"lost_sales":"sum"})
print("Cantidad de vp que no deberia tomar:", df_discrepantes["lost_sales"].sum())
df_discrepantes #Los productos que no se movieron hace mucho tiempo no salen en el analisis de ns

Cantidad de vp que no deberia tomar: 47.0


Unnamed: 0_level_0,lost_sales
product_ref,Unnamed: 1_level_1
DAB28118025,27.0
DAR12123UHE,2.0
DCS00342118,2.0
DCS00342186,1.0
DLS00105011,1.0
DLX00393020,14.0


In [5]:
vp_agosot_2025 = vp_week[(vp_week["ano"]==2025) & (vp_week["mes"]==8) ]
#vp_agosot_2025.to_excel("vp_revisar.xlsx")

vp_agosot_2025.query("product_ref == 'DAB28118025' and store_name == 'PRINCIPAL COTA'")
#vp_agosot_2025.groupby("store_name")["lost_sales"].sum()

Unnamed: 0,store_name,product_ref,Semana,ano,mes,dia,lost_sales,veces_vp
4821,PRINCIPAL COTA,DAB28118025,2025-08-25,2025,8,25,5.0,5


# UNION

## EMA SEMANAL CON VP SEMANALES (SOLO 2025)

In [6]:
df.head()

Unnamed: 0,price_subtotal,id,quantity,product_id_num,product_name,invoice_id_num,invoice_name,date_invoice,Sucursal
0,210000.0,1614225,1.0,19708,[BHS00309125] GS309 FILTRO HIDRAULICO JOY (125...,437814,PV2E43352 Mostrador Calle 6 T2/21186,2025-09-15 19:50:53,SUCURSAL CALLE 6
1,127200.0,1614224,2.0,17284,[DAB02968025] DA2968 FILTRO AIRE- HINO (025 D...,437813,PV2E43351 Mostrador Calle 6 T1/25374,2025-09-15 19:48:23,SUCURSAL CALLE 6
2,205200.0,1614223,3.0,19578,"[BHS00260125] GS260 FILTRO HIDRAULICO GRESEN, ...",437812,PVE54073 Mostrador Medellín T2/6097,2025-09-15 19:47:54,SUCURSAL MEDELLIN
3,45000.0,1614221,2.0,17112,"[DAB02671025] DA2671 FILTROAIRE KUBOTA, NISSAN...",437811,FCOT26223,2025-09-15 19:46:20,PRINCIPAL COTA
4,343200.0,1614222,12.0,19329,[BCS00162125] GS162 FILTRO COMBUSTIBLE A.COPCO...,437811,FCOT26223,2025-09-15 19:46:20,PRINCIPAL COTA


In [7]:
# ===============================
# Procesar ventas normales
# ===============================
df_sales = df.copy()
df_sales["date_invoice"] = pd.to_datetime(df_sales["date_invoice"], errors="coerce")


# Referncia de producto
df_sales["product_ref"] = df_sales["product_name"].str.extract(r"\[([A-Z0-9]+)\]")


# Columnas temporales igual que en df_vp
df_sales["Semana"] = df_sales["date_invoice"].dt.to_period("W").dt.start_time
df_sales["ano"]    = df_sales["Semana"].dt.year
df_sales["mes"]    = df_sales["Semana"].dt.month
df_sales["dia"]    = df_sales["Semana"].dt.day

# ===============================
# Agrupación por tienda + producto + semana
# ===============================
sales_by_week = (
    df_sales.groupby(["Sucursal", "product_ref", "Semana", "ano", "mes", "dia"], as_index=False)["quantity"]
    .sum()
    .rename(columns={"quantity": "sales",
                     "Sucursal":"store_name"})
)

# Resultado
sales_by_week.sample(10)


Unnamed: 0,store_name,product_ref,Semana,ano,mes,dia,sales
25295,PRINCIPAL COTA,DVJ00179198,2025-03-24,2025,3,24,1.0
95824,SUCURSAL MEDELLIN,DAB02666025,2024-10-21,2024,10,21,7.0
45673,SUCURSAL BUCARAMANGA,BAC00124125,2025-01-27,2025,1,27,2.0
14233,PRINCIPAL COTA,DAB04872025,2025-01-13,2025,1,13,2.0
39196,SUCURSAL BARRANQUILLA,DAB09196025,2025-03-03,2025,3,3,1.0
80743,SUCURSAL CALLE 6,DAB04881025,2024-09-02,2024,9,2,5.0
68231,SUCURSAL CALI,DAS07801025,2024-10-07,2024,10,7,24.0
26449,SUCURSAL BARRANQUILLA,BCE00919125,2025-02-17,2025,2,17,4.0
102253,SUCURSAL MEDELLIN,DAR08242025,2025-01-06,2025,1,6,1.0
111253,SUCURSAL NORTE,DAE09001025,2024-10-07,2024,10,7,1.0


In [8]:
df_merged = pd.merge(
    sales_by_week[["store_name", "product_ref", "Semana", "sales"]],
    vp_week[["store_name", "product_ref", "Semana", "lost_sales","veces_vp"]],
    on=["store_name", "product_ref", "Semana"],
    how="outer"
).fillna(0)


In [9]:
df_merged["año"]    = df_merged["Semana"].dt.year
df_merged["mes"]    = df_merged["Semana"].dt.month
df_merged["dia"]    = df_merged["Semana"].dt.day

df_merged.head(10)

Unnamed: 0,store_name,product_ref,Semana,sales,lost_sales,veces_vp,año,mes,dia
0,PRINCIPAL COTA,AC10388020,2025-02-24,10.0,0.0,0.0,2025,2,24
1,PRINCIPAL COTA,AC10388020,2025-07-07,2.0,0.0,0.0,2025,7,7
2,PRINCIPAL COTA,AC10388020,2025-07-21,2.0,0.0,0.0,2025,7,21
3,PRINCIPAL COTA,AC10388020,2025-09-01,4.0,0.0,0.0,2025,9,1
4,PRINCIPAL COTA,AGB0GRAS030,2025-07-28,0.0,1.0,1.0,2025,7,28
5,PRINCIPAL COTA,AHB0TO30132,2025-01-13,0.0,1.0,1.0,2025,1,13
6,PRINCIPAL COTA,AHB80W90030,2025-07-07,0.0,1.0,1.0,2025,7,7
7,PRINCIPAL COTA,AHB80W90050,2025-08-11,0.0,1.0,1.0,2025,8,11
8,PRINCIPAL COTA,AHBNTO68030,2024-10-21,1.0,0.0,0.0,2024,10,21
9,PRINCIPAL COTA,AHBNTO68030,2025-01-27,1.0,1.0,1.0,2025,1,27


## NORMALIZAR DF DE COSTOS

In [10]:
df_p["product_ref"] = df_p["product_name"].str.extract(r"\[([A-Z0-9]+)\]")

df_p_unique = (
    df_p[["product_ref", "producto_costo_unitario"]]
    .drop_duplicates(subset=["product_ref"])
)



df_p.head()

Unnamed: 0,product_name,producto_costo_unitario,product_ref
0,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,13507.71,DAB02570025
1,"[BLS00037125] GS037 FILTRO ACEITE MACK, CATERP...",32797.97,BLS00037125
2,"[DAB02772025] DA2772 FILTRO AIRE BOBCAT, HITAC...",10680.97,DAB02772025
3,"[BCS00035125] GS035 FILTRO COMBUSTIBLE VOLVO,D...",20638.72,BCS00035125
4,[DAB14570025] DA4570A FILTRO AIRE 2_ DONSSON -...,9774.98,DAB14570025


### UNIR COSTO

In [11]:
df_merge_def = pd.merge(
    df_merged,
    df_p_unique,
    on="product_ref",
    how="left"
).fillna(0)


merge_def = df_merge_def[df_merge_def["producto_costo_unitario"] !=0].copy() #Eliminar productos sin costos unitarios

merge_def.head(10)

Unnamed: 0,store_name,product_ref,Semana,sales,lost_sales,veces_vp,año,mes,dia,producto_costo_unitario
0,PRINCIPAL COTA,AC10388020,2025-02-24,10.0,0.0,0.0,2025,2,24,20046.0
1,PRINCIPAL COTA,AC10388020,2025-07-07,2.0,0.0,0.0,2025,7,7,20046.0
2,PRINCIPAL COTA,AC10388020,2025-07-21,2.0,0.0,0.0,2025,7,21,20046.0
3,PRINCIPAL COTA,AC10388020,2025-09-01,4.0,0.0,0.0,2025,9,1,20046.0
4,PRINCIPAL COTA,AGB0GRAS030,2025-07-28,0.0,1.0,1.0,2025,7,28,267417.44
5,PRINCIPAL COTA,AHB0TO30132,2025-01-13,0.0,1.0,1.0,2025,1,13,222947.69
6,PRINCIPAL COTA,AHB80W90030,2025-07-07,0.0,1.0,1.0,2025,7,7,313811.44
7,PRINCIPAL COTA,AHB80W90050,2025-08-11,0.0,1.0,1.0,2025,8,11,460139.21
8,PRINCIPAL COTA,AHBNTO68030,2024-10-21,1.0,0.0,0.0,2024,10,21,342304.08
9,PRINCIPAL COTA,AHBNTO68030,2025-01-27,1.0,1.0,1.0,2025,1,27,342304.08


In [12]:
import pandas as pd
import numpy as np

def compute_demand_and_ema(df,
                           alpha=0.20,        # 20% como en tu config
                           n_init_weeks=12,   # semanas que usa el proceso (rango de evaluacion)
                           week_col="Semana",
                           sales_col="sales",
                           lost_col="lost_sales"):
    df = df.copy()

    # ---------- Asegurar tipos y semana iniciando lunes ----------
    # Si Semana no es datetime, intentamos convertir
    df[week_col] = pd.to_datetime(df[week_col], errors="coerce")

    # Normalizar semanas al lunes inicio (start of week, lunes)
    # Esto genera el timestamp del lunes de la semana ISO correspondiente
    # (equivalente al comportamiento del código original)
    df[week_col] = df[week_col].dt.to_period('W-MON').dt.start_time

    # Asegurar numéricos
    df[sales_col] = pd.to_numeric(df[sales_col], errors="coerce").fillna(0)
    df[lost_col]  = pd.to_numeric(df[lost_col], errors="coerce").fillna(0)

    # Orden y agrupación
    df = df.sort_values(["store_name", "product_ref", week_col])

    out_groups = []

    # Recorremos por tienda+producto
    for (store, prod), g in df.groupby(["store_name", "product_ref"], sort=False):
        g = g.sort_values(week_col).reset_index(drop=True)
        sales = g[sales_col].to_numpy(dtype=float)
        lost  = g[lost_col].to_numpy(dtype=float)

        L = len(g)
        demanda = np.zeros(L, dtype=float)
        ema_arr = np.zeros(L, dtype=float)

        # Inicial EMA (EMA_0) -> si hay suficientes semanas, usamos el promedio de las primeras n_init_weeks sales
        # Si hay menos semanas, usamos media de las sales disponibles.
        if L == 0:
            out_groups.append(g)
            continue

        # inicialización: usar promedio de 'sales' de las primeras n_init_weeks (o de lo que haya)
        init_n = min(n_init_weeks, L)
        # si no hay sales (todos ceros), ema_prev será 0
        ema_prev = float(np.nanmean(sales[:init_n])) if init_n > 0 else 0.0
        if np.isnan(ema_prev):
            ema_prev = 0.0

        # Iterar semanas y aplicar reglas del documento
        for i in range(L):
            s = sales[i]
            l = lost[i]

            # Regla 1: si ventas >= 2 * ventas_perdidas
            if s >= 2.0 * l:
                demand_candidate = s + l
                # aplicar tope MAX = 1.5 * ventas
                # (si s == 0 ese caso no entra porque s >= 2*l sería falso cuando l>0)
                demand = min(demand_candidate, 1.5 * s) if s > 0 else demand_candidate
            else:
                # Regla 2: ventas < 2 * ventas_perdidas
                # demanda = ventas + 0.5 * EMA(t-1)
                demand = s + 0.5 * ema_prev

            # Guardar demanda
            demanda[i] = demand

            # Calcular EMA (iterativo) con alpha
            ema = alpha * demand + (1.0 - alpha) * ema_prev
            ema =round(ema,2)
            ema_arr[i] = ema

            # actualizar para la próxima semana
            ema_prev = ema

        # Añadir columnas al grupo
        g = g.copy()
        g["demanda_ajustada"] = demanda
        g["EMA"] = ema_arr

        out_groups.append(g)

    # Concat y devolver
    result = pd.concat(out_groups, ignore_index=True, sort=False)

    # Mantener mismo orden original
    result = result.sort_values(["store_name", "product_ref", week_col]).reset_index(drop=True)
    return result

# ------------------ USO ------------------
# suponiendo merge_def es tu df final
# ajusta alpha y n_init_weeks si quieres (alpha=0.2, n_init_weeks=12 por defecto)
df_with_demand1 = compute_demand_and_ema(merge_def, alpha=0.2, n_init_weeks=12)

# ver primeras filas
df_with_demand1[["store_name","product_ref","Semana","sales","lost_sales","demanda_ajustada","EMA"]].head(20)


Unnamed: 0,store_name,product_ref,Semana,sales,lost_sales,demanda_ajustada,EMA
0,PRINCIPAL COTA,AC10388020,2025-02-18,10.0,0.0,10.0,5.6
1,PRINCIPAL COTA,AC10388020,2025-07-01,2.0,0.0,2.0,4.88
2,PRINCIPAL COTA,AC10388020,2025-07-15,2.0,0.0,2.0,4.3
3,PRINCIPAL COTA,AC10388020,2025-08-26,4.0,0.0,4.0,4.24
4,PRINCIPAL COTA,AGB0GRAS030,2025-07-22,0.0,1.0,0.0,0.0
5,PRINCIPAL COTA,AHB0TO30132,2025-01-07,0.0,1.0,0.0,0.0
6,PRINCIPAL COTA,AHB80W90030,2025-07-01,0.0,1.0,0.0,0.0
7,PRINCIPAL COTA,AHB80W90050,2025-08-05,0.0,1.0,0.0,0.0
8,PRINCIPAL COTA,AHBNTO68030,2024-10-15,1.0,0.0,1.0,1.07
9,PRINCIPAL COTA,AHBNTO68030,2025-01-21,1.0,1.0,1.535,1.16


In [13]:
df_with_demand1["semana_num"] = df_with_demand1["Semana"].dt.isocalendar().week

In [14]:
df_demand_2025 = df_with_demand1[df_with_demand1["año"]==2025]

demand_2025 = df_demand_2025[["store_name","product_ref","año","semana_num","EMA","producto_costo_unitario","demanda_ajustada"]]

filtro_bq = demand_2025[(demand_2025["store_name"]=="SUCURSAL BARRANQUILLA") & (demand_2025["product_ref"].str.contains("DAB02570025"))]

filtro_bq.head(36)

Unnamed: 0,store_name,product_ref,año,semana_num,EMA,producto_costo_unitario,demanda_ajustada
45065,SUCURSAL BARRANQUILLA,DAB02570025,2025,1,28.94,13507.71,15.0
45066,SUCURSAL BARRANQUILLA,DAB02570025,2025,2,34.55,13507.71,57.0
45067,SUCURSAL BARRANQUILLA,DAB02570025,2025,3,30.64,13507.71,15.0
45068,SUCURSAL BARRANQUILLA,DAB02570025,2025,4,36.51,13507.71,60.0
45069,SUCURSAL BARRANQUILLA,DAB02570025,2025,5,36.41,13507.71,36.0
45070,SUCURSAL BARRANQUILLA,DAB02570025,2025,6,44.33,13507.71,76.0
45071,SUCURSAL BARRANQUILLA,DAB02570025,2025,7,46.26,13507.71,54.0
45072,SUCURSAL BARRANQUILLA,DAB02570025,2025,8,40.81,13507.71,19.0
45073,SUCURSAL BARRANQUILLA,DAB02570025,2025,9,49.05,13507.71,82.0
45074,SUCURSAL BARRANQUILLA,DAB02570025,2025,10,69.44,13507.71,151.0


In [15]:
import pandas as pd
import numpy as np

def compute_demand_and_ema2(df,
                           alpha=0.20,
                           n_init_weeks=12,
                           week_col="Semana",
                           sales_col="sales",
                           lost_col="lost_sales"):
    df = df.copy()

    # ---------- Asegurar tipos y semana iniciando lunes ----------
    df[week_col] = pd.to_datetime(df[week_col], errors="coerce")
    df[week_col] = df[week_col].dt.to_period('W-MON').dt.start_time

    df[sales_col] = pd.to_numeric(df[sales_col], errors="coerce").fillna(0)
    df[lost_col]  = pd.to_numeric(df[lost_col], errors="coerce").fillna(0)

    # ---------- Agrupación con veces_vp corregido ----------
    agg = (
        df.groupby(["store_name", "product_ref", week_col], as_index=False)
        .agg({
            sales_col: "sum",
            lost_col: "sum",
            # contar SOLO filas con ventas perdidas > 0
            lost_col: [("sum", "sum"), ("veces_vp", lambda x: (x > 0).sum())]
        })
    )

    # el agg crea columnas multinivel → aplanamos
    agg.columns = ["store_name", "product_ref", week_col, sales_col, lost_col, "veces_vp"]

    # Orden
    agg = agg.sort_values(["store_name", "product_ref", week_col])

    out_groups = []

    for (store, prod), g in agg.groupby(["store_name", "product_ref"], sort=False):
        g = g.sort_values(week_col).reset_index(drop=True)
        sales = g[sales_col].to_numpy(dtype=float)
        lost  = g[lost_col].to_numpy(dtype=float)
        veces = g["veces_vp"].to_numpy(dtype=int)

        L = len(g)
        demanda = np.zeros(L, dtype=float)
        ema_arr = np.zeros(L, dtype=float)

        if L == 0:
            out_groups.append(g)
            continue

        init_n = min(n_init_weeks, L)
        ema_prev = float(np.nanmean(sales[:init_n])) if init_n > 0 else 0.0
        if np.isnan(ema_prev):
            ema_prev = 0.0

        for i in range(L):
            s = sales[i]
            l = lost[i]
            v = veces[i]  # número de veces con ventas perdidas reales

            # --- Ajuste de la demanda ---
            if s >= 2.0 * l:
                demand_candidate = s + l + v
                demand = min(demand_candidate, 1.5 * s) if s > 0 else demand_candidate
            else:
                demand = s + v + 0.5 * ema_prev

            demanda[i] = demand
            ema = alpha * demand + (1.0 - alpha) * ema_prev
            ema = round(ema, 2)
            ema_arr[i] = ema
            ema_prev = ema

        g = g.copy()
        g["demanda_ajustada"] = demanda
        g["EMA"] = ema_arr

        out_groups.append(g)

    result = pd.concat(out_groups, ignore_index=True, sort=False)
    result = result.sort_values(["store_name", "product_ref", week_col]).reset_index(drop=True)
    return result


# ------------------ USO ------------------
df_with_demand2 = compute_demand_and_ema2(merge_def, alpha=0.2, n_init_weeks=12)

# Para revisar
df_demanda2 = df_with_demand2[["store_name","product_ref","Semana","sales","lost_sales","veces_vp","demanda_ajustada","EMA"]]



In [16]:
df_demanda2["semana_num"] = df_demanda2["Semana"].dt.isocalendar().week
df_demanda2["año"] = df_demanda2["Semana"].dt.year

In [17]:
df_demand_2025 = df_demanda2[df_demanda2["año"]==2025]

demand_20252 = df_demand_2025[["store_name","product_ref","año","semana_num","EMA","demanda_ajustada"]]

filtro_bq2 = demand_20252[(demand_20252["store_name"]=="SUCURSAL BARRANQUILLA") & (demand_20252["product_ref"].str.contains("DAB02570025"))]

filtro_bq2.head(37)

Unnamed: 0,store_name,product_ref,año,semana_num,EMA,demanda_ajustada
45066,SUCURSAL BARRANQUILLA,DAB02570025,2025,2,34.55,57.0
45067,SUCURSAL BARRANQUILLA,DAB02570025,2025,3,30.64,15.0
45068,SUCURSAL BARRANQUILLA,DAB02570025,2025,4,36.51,60.0
45069,SUCURSAL BARRANQUILLA,DAB02570025,2025,5,36.41,36.0
45070,SUCURSAL BARRANQUILLA,DAB02570025,2025,6,44.33,76.0
45071,SUCURSAL BARRANQUILLA,DAB02570025,2025,7,46.26,54.0
45072,SUCURSAL BARRANQUILLA,DAB02570025,2025,8,40.81,19.0
45073,SUCURSAL BARRANQUILLA,DAB02570025,2025,9,49.05,82.0
45074,SUCURSAL BARRANQUILLA,DAB02570025,2025,10,69.44,151.0
45075,SUCURSAL BARRANQUILLA,DAB02570025,2025,11,56.55,5.0
