In [1]:
import pandas as pd
import numpy as np

# Corregir la ruta del archivo
df = pd.read_parquet("/home/donsson/proyectos/API/ventashistoricas56semanas.parquet") #movimiento  facturas
df_p = pd.read_parquet("/home/donsson/proyectos/API/costo_productos.parquet") #Costos unitarios
df_vp = pd.read_parquet("/home/donsson/proyectos/API/ventas_perdidas_2025.parquet") #ventas perdidas
vp_reales = pd.read_excel("/home/donsson/proyectos/INDICADOR NS/vp_agosto.xlsx") #vp reales


# EDA

## Facturas

In [2]:
import re
import unicodedata

# Diccionario de códigos a sucursales
mapa_codigos = {
    "FCAL": "SUCURSAL CALI",
    "FMED": "SUCURSAL MEDELLIN",
    "FMDE":"SUCURSAL MEDELLIN",
    "FCTG": "SUCURSAL CARTAGENA",
    "FBAQ": "SUCURSAL BARRANQUILLA",
    "FVAL": "SUCURSAL VALLADOLID",
    "FCOT":"PRINCIPAL COTA",
    "FBUC":"SUCURSAL BUCARAMANGA",
    "FNOR":"SUCURSAL NORTE",
    "FCL6":"SUCURSAL CALLE 6",
    "PV2E":"SUCURSAL CALLE 6",
    "PV3E":"SUCURSAL VALLADOLID",
    "CLL6":"SUCURSAL CALLE 6",
    "PV1E":"SUCURSAL COTA" ,#Las que comienzan por p son los mostradores
    "PV4E":"SUCURSAL NORTE",
    "PV9E":"SUCURSAL CALI"

}


# Equivalencias para normalizar nombres truncados o mal escritos
mapa_equivalencias = {
    "MEDELLIN": "SUCURSAL MEDELLIN",
    "MEDELLI": "SUCURSAL MEDELLIN",
    "MEDELL": "SUCURSAL MEDELLIN",
    "MEDELI": "SUCURSAL MEDELLIN",
    "CALI": "SUCURSAL CALI",
    "CLL6":"SUCURSAL CALLE 6",
    "BUCARAMANGA":"SUCURSAL BUCARAMANGA",
    "BARRANQUILLA": "SUCURSAL BARRANQUILLA",
    "VALLADOLID": "SUCURSAL VALLADOLID",
    "CALLE 6":"SUCURSAL CALLE 6",
    "COTA":"PRINCIPAL COTA",
    "NORTE":"SUCURSAL NORTE"
}

def normalizar(texto):
    """Quita tildes y pasa a mayúsculas"""
    texto = unicodedata.normalize("NFKD", texto)
    texto = "".join([c for c in texto if not unicodedata.combining(c)])
    return texto.upper()

def extraer_sucursal(nombre):
    if not isinstance(nombre, str):
        return "VENDEDOR EXTERNO"
    
    sucursal = None
    
    # 1) Buscar "Mostrador ..."
    match = re.search(r"Mostrador\s+([A-Za-z0-9\s]+)", nombre, re.IGNORECASE)
    if match:
        sucursal = match.group(1).strip()
    else:
        # 2) Buscar "Calle" o "Cota"
        match2 = re.search(r"(Calle\s+\d+|Cota)", nombre, re.IGNORECASE)
        if match2:
            sucursal = match2.group(1).strip()
        else:
            # 3) Buscar prefijo de código
            for prefijo, ciudad in mapa_codigos.items():
                if nombre.upper().startswith(prefijo):
                    return ciudad
            return "VENDEDOR EXTERNO"
    
    # Normalizar texto
    sucursal = normalizar(sucursal)
    
    # Limpiar T1, T2, T3 al final
    sucursal = re.sub(r"\s*T\d+$", "", sucursal).strip()
    
    # Aplicar equivalencias
    sucursal = mapa_equivalencias.get(sucursal, sucursal)
    
    return sucursal

# Aplicar al dataframe
df["Sucursal"] = df["invoice_name"].apply(extraer_sucursal)

In [3]:
df["product_name"].nunique()

2645

## Ventas perdidas

In [4]:
import pandas as pd
import numpy as np

# ===============================
# Filtrar almacenamiento agotado
# ===============================
df_vp = df_vp[df_vp["almacenamiento_tipo"].str.lower() == "agotado"]

# ===============================
# Asegurar tipos correctos
# ===============================
df_vp = df_vp.copy()
df_vp["fecha"] = pd.to_datetime(df_vp["fecha"], errors="coerce")

# Numéricos
for col in ["cantidad", "cantidad_existencia", "cantidad_reservada"]:
    df_vp[col] = pd.to_numeric(df_vp[col], errors="coerce").fillna(0).clip(lower=0)

# ===============================
# Reglas Odoo vectorizadas
# ===============================
is_cot = df_vp["origen"].fillna("").str.lower() == "cotizacion"
ignore_mask = df_vp["cantidad"] >= 100

ajuste = np.where(
    is_cot,
    df_vp["cantidad"] - df_vp["cantidad_existencia"] - df_vp["cantidad_reservada"],
    df_vp["cantidad"] - df_vp["cantidad_reservada"]
)

# Aplicar reglas de descarte y piso en cero
ajuste = np.where(ignore_mask, 0, ajuste)
ajuste = np.where(ajuste > 0, ajuste, 0)

df_vp["ventas_perdidas"] = ajuste.astype(float)

# ===============================
# Columnas temporales
# ===============================
df_vp["Semana"] = df_vp["fecha"].dt.to_period("W").dt.start_time
df_vp["ano"]   = df_vp["Semana"].dt.year
df_vp["mes"]   = df_vp["Semana"].dt.month
df_vp["dia"]   = df_vp["Semana"].dt.day

# ===============================
# Filtro adicional: excluir SERV y CARCASA
# ===============================
mask_excluir = ~df_vp["product_ref"].str.contains("SERV|CARCASA", case=False, na=False)
df_vp = df_vp[mask_excluir]

# ===============================
# Agrupación por tienda + producto + semana
# ===============================
lost_by_week = (
    df_vp.groupby(["store_name", "product_ref", "Semana", "ano", "mes", "dia"])
    .agg(
        lost_sales=("ventas_perdidas", "sum"),   # suma total de ventas perdidas
        veces_vp=("ventas_perdidas", "count")    # número de veces que hubo pérdida
    )
    .reset_index()
)

# Mostrar resultado agrupado
vp_week = lost_by_week


vp_week["product_ref"].nunique()

4229

In [5]:
vp_reales["product_ref"] = vp_reales["Descripcion"].str.extract(r"\[([A-Z0-9]+)\]")
vp_reales.head()

# Asegurar que ambos son strings para evitar problemas
vp_week["product_ref"] = vp_week["product_ref"].astype(str)
vp_reales["product_ref"] = vp_reales["product_ref"].astype(str)

# 1. Obtener listas únicas
refs_week = set(vp_week["product_ref"].unique())
refs_real = set(vp_reales["product_ref"].unique())

# 2. Diferencia: los que están en vp_week pero no en vp_real
refs_extra = refs_week - refs_real

# 3. Filtrar el dataframe para verlos completos
df_discrepantes = vp_week[vp_week["product_ref"].isin(refs_extra)]


df_discrepantes = df_discrepantes[(df_discrepantes["mes"]==8) & (df_discrepantes["lost_sales"]>0) ]
df_discrepantes = df_discrepantes.groupby("product_ref").agg({"lost_sales":"sum"})
print("Cantidad de vp que no deberia tomar:", df_discrepantes["lost_sales"].sum())
df_discrepantes #Los productos que no se movieron hace mucho tiempo no salen en el analisis de ns

Cantidad de vp que no deberia tomar: 47.0


Unnamed: 0_level_0,lost_sales
product_ref,Unnamed: 1_level_1
DAB28118025,27.0
DAR12123UHE,2.0
DCS00342118,2.0
DCS00342186,1.0
DLS00105011,1.0
DLX00393020,14.0


In [6]:
mes = 8
vp_agosot_2025 = vp_week[(vp_week["ano"]==2025)& (vp_week["mes"]==mes)]
vp_agosot_2025.to_excel(f"/home/donsson/proyectos/MODELO ABASTECIMIENTO/exceles/vp_definitivasparaanalisis{mes}.xlsx")

vp_agosot_2025.groupby("store_name")["lost_sales"].sum()

store_name
PRINCIPAL COTA           5814.0
SUCURSAL BARRANQUILLA    1590.0
SUCURSAL BUCARAMANGA      700.0
SUCURSAL CALI            1465.0
SUCURSAL CALLE 6         1255.0
SUCURSAL MEDELLIN         984.0
SUCURSAL NORTE            506.0
SUCURSAL VALLADOLID       772.0
Name: lost_sales, dtype: float64

# UNION

## EMA SEMANAL CON VP SEMANALES (SOLO 2025)

In [7]:
# ===============================
# Procesar ventas normales
# ===============================
df_sales = df.copy()
df_sales["date_invoice"] = pd.to_datetime(df_sales["date_invoice"], errors="coerce")


# Referncia de producto
df_sales["product_ref"] = df_sales["product_name"].str.extract(r"\[([A-Z0-9]+)\]")


# Columnas temporales igual que en df_vp
df_sales["Semana"] = df_sales["date_invoice"].dt.to_period("W").dt.start_time
df_sales["ano"]    = df_sales["Semana"].dt.year
df_sales["mes"]    = df_sales["Semana"].dt.month
df_sales["dia"]    = df_sales["Semana"].dt.day

# ===============================
# Agrupación por tienda + producto + semana
# ===============================
sales_by_week = (
    df_sales.groupby(["Sucursal", "product_ref", "Semana", "ano", "mes", "dia"], as_index=False)["quantity"]
    .sum()
    .rename(columns={"quantity": "sales",
                     "Sucursal":"store_name"})
)

# Resultado
sales_by_week.sample(10)



Unnamed: 0,store_name,product_ref,Semana,ano,mes,dia,sales
104806,SUCURSAL NORTE,BCE00609125,2025-02-03,2025,2,3,1.0
63999,SUCURSAL CALI,DAB02663025,2025-08-25,2025,8,25,1.0
120798,SUCURSAL VALLADOLID,DAB08195025,2025-01-20,2025,1,20,1.0
105779,SUCURSAL NORTE,BCS00236125,2025-03-10,2025,3,10,4.0
83044,SUCURSAL CALLE 6,DAE02672025,2024-11-11,2024,11,11,3.0
49772,SUCURSAL BUCARAMANGA,BLS00018125,2024-12-16,2024,12,16,1.0
60607,SUCURSAL CALI,BCS00367125,2024-12-23,2024,12,23,1.0
120845,SUCURSAL VALLADOLID,DAB08213025,2025-08-11,2025,8,11,1.0
63551,SUCURSAL CALI,BLS10286125,2024-12-09,2024,12,9,1.0
26013,SUCURSAL BARRANQUILLA,BCE00606125,2025-05-05,2025,5,5,6.0


In [8]:
df_merged = pd.merge(
    sales_by_week[["store_name", "product_ref", "Semana", "sales"]],
    vp_week[["store_name", "product_ref", "Semana", "lost_sales","veces_vp"]],
    on=["store_name", "product_ref", "Semana"],
    how="outer"
).fillna(0)


In [9]:
df_merged["año"]    = df_merged["Semana"].dt.year
df_merged["mes"]    = df_merged["Semana"].dt.month
df_merged["dia"]    = df_merged["Semana"].dt.day


df_merged.head(10)

Unnamed: 0,store_name,product_ref,Semana,sales,lost_sales,veces_vp,año,mes,dia
0,PRINCIPAL COTA,AC10388020,2025-02-24,10.0,0.0,0.0,2025,2,24
1,PRINCIPAL COTA,AC10388020,2025-07-07,2.0,0.0,0.0,2025,7,7
2,PRINCIPAL COTA,AC10388020,2025-07-21,2.0,0.0,0.0,2025,7,21
3,PRINCIPAL COTA,AC10388020,2025-09-01,4.0,0.0,0.0,2025,9,1
4,PRINCIPAL COTA,AGB0GRAS030,2025-07-28,0.0,1.0,1.0,2025,7,28
5,PRINCIPAL COTA,AHB0TO30132,2025-01-13,0.0,1.0,1.0,2025,1,13
6,PRINCIPAL COTA,AHB80W90030,2025-07-07,0.0,1.0,1.0,2025,7,7
7,PRINCIPAL COTA,AHB80W90050,2025-08-11,0.0,1.0,1.0,2025,8,11
8,PRINCIPAL COTA,AHBNTO68030,2024-10-21,1.0,0.0,0.0,2024,10,21
9,PRINCIPAL COTA,AHBNTO68030,2025-01-27,1.0,1.0,1.0,2025,1,27


## NORMALIZAR DF DE COSTOS

In [10]:
df_p["product_ref"] = df_p["product_name"].str.extract(r"\[([A-Z0-9]+)\]")

df_p_unique = (
    df_p[["product_ref", "producto_costo_unitario"]]
    .drop_duplicates(subset=["product_ref"])
)



df_p["product_ref"].nunique()

4835

### UNIR COSTO

In [11]:
df_merge_def = pd.merge(
    df_merged,
    df_p_unique,
    on="product_ref",
    how="left"
).fillna(0)


merge_def = df_merge_def[df_merge_def["producto_costo_unitario"] !=0].copy() #Eliminar productos sin costos unitarios



### PRUEBA EMA 1

In [12]:
import pandas as pd
import numpy as np

def compute_demand_and_ema(df,
                           alpha=0.20,        # peso de EMA
                           n_init_weeks=12,   # semanas para inicializar EMA
                           week_col="Semana",
                           sales_col="sales",
                           lost_col="lost_sales"):
    df = df.copy()

    # ---------- Asegurar tipos y semana iniciando lunes ----------
    df[week_col] = pd.to_datetime(df[week_col], errors="coerce")
    df[week_col] = df[week_col].dt.to_period('W-MON').dt.start_time

    # Asegurar numéricos
    df[sales_col] = pd.to_numeric(df[sales_col], errors="coerce").fillna(0)
    df[lost_col]  = pd.to_numeric(df[lost_col], errors="coerce").fillna(0)

    # ---------- Crear combinaciones completas ----------
    stores   = df["store_name"].unique()
    products = df["product_ref"].unique()
    weeks    = df[week_col].unique()

    full_index = pd.MultiIndex.from_product([stores, products, weeks],
                                            names=["store_name", "product_ref", week_col])

    df = df.set_index(["store_name", "product_ref", week_col]).reindex(full_index).reset_index()

    # Rellenar ventas y perdidas con 0 en combinaciones faltantes
    df[sales_col] = df[sales_col].fillna(0)
    df[lost_col]  = df[lost_col].fillna(0)

    # Ordenar
    df = df.sort_values(["store_name", "product_ref", week_col])

    out_groups = []

    # ---------- Calcular demanda ajustada y EMA ----------
    for (store, prod), g in df.groupby(["store_name", "product_ref"], sort=False):
        g = g.sort_values(week_col).reset_index(drop=True)
        sales = g[sales_col].to_numpy(dtype=float)
        lost  = g[lost_col].to_numpy(dtype=float)

        L = len(g)
        demanda = np.zeros(L, dtype=float)
        ema_arr = np.zeros(L, dtype=float)

        if L == 0:
            out_groups.append(g)
            continue

        # Inicialización EMA: promedio de primeras n_init_weeks ventas
        init_n = min(n_init_weeks, L)
        ema_prev = float(np.nanmean(sales[:init_n])) if init_n > 0 else 0.0
        if np.isnan(ema_prev):
            ema_prev = 0.0

        for i in range(L):
            s = sales[i]
            l = lost[i]

            # Regla 1
            if s >= 2.0 * l:
                demand_candidate = s + l
                demand = min(demand_candidate, 1.5 * s) if s > 0 else demand_candidate
            else:
                # Regla 2
                demand = s + 0.5 * ema_prev

            # Guardar redondeando
            demanda[i] = round(demand, 6)

            # Calcular EMA
            ema = alpha * demand + (1.0 - alpha) * ema_prev
            ema = round(ema, 6)
            ema_arr[i] = ema

            # actualizar para siguiente
            ema_prev = ema

        # Añadir columnas al grupo
        g = g.copy()
        g["demanda_ajustada"] = demanda
        g["EMA"] = ema_arr

        out_groups.append(g)

    # Concat resultados
    result = pd.concat(out_groups, ignore_index=True, sort=False)
    result = result.sort_values(["store_name", "product_ref", week_col]).reset_index(drop=True)
    return result

# ------------------ USO ------------------
df_with_demand1 = compute_demand_and_ema(merge_def, alpha=0.2, n_init_weeks=16)
df_with_demand1[["store_name","product_ref","Semana","sales","lost_sales","demanda_ajustada","EMA"]]

#APROX3min

Unnamed: 0,store_name,product_ref,Semana,sales,lost_sales,demanda_ajustada,EMA
0,PRINCIPAL COTA,AC000001222,2024-08-13,0.0,0.0,0.0,0.0
1,PRINCIPAL COTA,AC000001222,2024-08-20,0.0,0.0,0.0,0.0
2,PRINCIPAL COTA,AC000001222,2024-08-27,0.0,0.0,0.0,0.0
3,PRINCIPAL COTA,AC000001222,2024-09-03,0.0,0.0,0.0,0.0
4,PRINCIPAL COTA,AC000001222,2024-09-10,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
2133049,SUCURSAL VALLADOLID,RG133102222,2025-08-12,0.0,0.0,0.0,0.0
2133050,SUCURSAL VALLADOLID,RG133102222,2025-08-19,0.0,0.0,0.0,0.0
2133051,SUCURSAL VALLADOLID,RG133102222,2025-08-26,0.0,0.0,0.0,0.0
2133052,SUCURSAL VALLADOLID,RG133102222,2025-09-02,0.0,0.0,0.0,0.0


In [13]:
print(f"productos_unicos",df_with_demand1["product_ref"].nunique())

productos_unicos 4158


In [14]:
df_with_demand1["semana_num"] = df_with_demand1["Semana"].dt.isocalendar().week
#df_with_demand1= df_with_demand1[df_with_demand1["EMA"]>1]

In [15]:
df_demand_2025 = df_with_demand1[df_with_demand1["año"]==2025] #Ajustar año


demand_2025 = df_demand_2025[["store_name","product_ref","año","semana_num","EMA","demanda_ajustada","producto_costo_unitario"]] #"producto_costo_unitario","demanda_ajustada"

demand_2025_36 = demand_2025[demand_2025["semana_num"]==36]  #Ajustar semana numero


demand_2025_36.to_csv("ema_mio202536.csv")


filtro_bq1 = demand_2025_36[demand_2025["store_name"]=="SUCURSAL BARRANQUILLA"].sort_values(by=("EMA"), ascending=False)

filtro_bq1.head(30)

  filtro_bq1 = demand_2025_36[demand_2025["store_name"]=="SUCURSAL BARRANQUILLA"].sort_values(by=("EMA"), ascending=False)


Unnamed: 0,store_name,product_ref,año,semana_num,EMA,demanda_ajustada,producto_costo_unitario
343537,SUCURSAL BARRANQUILLA,DAB02570025,2025.0,36,45.69405,44.0,13507.71
326209,SUCURSAL BARRANQUILLA,BLS00037125,2025.0,36,37.222562,34.0,32797.97
344791,SUCURSAL BARRANQUILLA,DAB02772025,2025.0,36,36.339721,48.0,10680.97
361720,SUCURSAL BARRANQUILLA,DAB14570025,2025.0,36,33.086309,60.0,9774.98
282433,SUCURSAL BARRANQUILLA,BCS00035125,2025.0,36,26.137933,14.0,20638.72
295372,SUCURSAL BARRANQUILLA,BCS10035125,2025.0,36,25.603273,59.0,20956.6
344050,SUCURSAL BARRANQUILLA,DAB02666025,2025.0,36,25.477455,37.0,11371.65
281977,SUCURSAL BARRANQUILLA,BCS00025125,2025.0,36,22.65767,31.0,31058.67
361948,SUCURSAL BARRANQUILLA,DAB14772025,2025.0,36,20.996618,30.0,9112.4
285055,SUCURSAL BARRANQUILLA,BCS00249125,2025.0,36,20.939437,30.0,27520.25


In [16]:
filtro_bq1.query("product_ref == 'DLS10286189'")

Unnamed: 0,store_name,product_ref,año,semana_num,EMA,demanda_ajustada,producto_costo_unitario
472528,SUCURSAL BARRANQUILLA,DLS10286189,2025.0,36,1.359194,0.755108,26616.04


In [17]:
filtro_bq1["product_ref"].nunique()

623

In [18]:
import ast
ema_real = pd.read_csv("/home/donsson/proyectos/API/real_ema.csv")

# crear columna "sucursal" a partir del nombre en almacen_id
ema_real["sucursal"] = ema_real["almacen_id"].apply(lambda x: x[1] if isinstance(x, list) else None)

# si almacen_id viene como string "[39, 'BARRANQUILLA']"
ema_real["almacen_id"] = ema_real["almacen_id"].apply(
    lambda x: ast.literal_eval(x) if isinstance(x, str) else x
)

# ahora sí sacar la sucursal
ema_real["sucursal"] = ema_real["almacen_id"].apply(
lambda x: x[1] if isinstance(x, list) and len(x) > 1 else None
)

ema_real = ema_real[["sucursal","product_name","ano","semana","ema","producto_costo_unitario"]]

In [19]:
bq_real = ema_real[ema_real["sucursal"]=="BARRANQUILLA"]
bq_real = bq_real[bq_real["producto_costo_unitario"]>1]
bq_real = bq_real[bq_real["ema"]>1]
bq_real.head(30)

Unnamed: 0,sucursal,product_name,ano,semana,ema,producto_costo_unitario
0,BARRANQUILLA,DAB02570025,2025,36,44.932819,13507.71
1,BARRANQUILLA,BLS00037125,2025,36,36.205431,32797.97
2,BARRANQUILLA,DAB02772025,2025,36,35.10521,10552.22
3,BARRANQUILLA,DAB14570025,2025,36,32.510587,9774.98
4,BARRANQUILLA,BCS00035125,2025,36,25.634694,20638.72
5,BARRANQUILLA,DAB02666025,2025,36,24.973338,11371.65
6,BARRANQUILLA,BCS10035125,2025,36,24.841184,20956.6
7,BARRANQUILLA,BCS00025125,2025,36,22.210658,31058.67
8,BARRANQUILLA,BCS00249125,2025,36,20.799295,27520.25
9,BARRANQUILLA,DAB02982025,2025,36,20.742131,48785.61


In [20]:
bq_real["product_name"].nunique()

395