In [134]:
import pandas as pd
import numpy as np

# Corregir la ruta del archivo
df = pd.read_parquet("/home/donsson/proyectos/API/historico_ventas_1_año.parquet")
df_p = pd.read_parquet("/home/donsson/proyectos/API/costo_productos.parquet")


In [135]:
import re
import unicodedata

# Diccionario de códigos a sucursales
mapa_codigos = {
    "FCAL": "CALI",
    "FMDE": "MEDELLIN",
    "FBOG": "BOGOTA",
    "FCTG": "CARTAGENA",
    "FBAQ": "BARRANQUILLA",
    "FVAL":"VALLADOLID"
}


# Equivalencias para normalizar nombres truncados o mal escritos
mapa_equivalencias = {
    "MEDELLIN": "MEDELLIN",
    "MEDELLI": "MEDELLIN",
    "MEDELL": "MEDELLIN",
    "MEDELI": "MEDELLIN",
    "CALI": "CALI",
    "BARRANQUILLA": "BARRANQUILLA",
    "BOGOTA": "BOGOTA",
    "CARTAGENA": "CARTAGENA",
    "VALLADOLID": "VALLADOLID"
}

def normalizar(texto):
    """Quita tildes y pasa a mayúsculas"""
    texto = unicodedata.normalize("NFKD", texto)
    texto = "".join([c for c in texto if not unicodedata.combining(c)])
    return texto.upper()

def extraer_sucursal(nombre):
    if not isinstance(nombre, str):
        return "VENDEDOR EXTERNO"
    
    sucursal = None
    
    # 1) Buscar "Mostrador ..."
    match = re.search(r"Mostrador\s+([A-Za-z0-9\s]+)", nombre, re.IGNORECASE)
    if match:
        sucursal = match.group(1).strip()
    else:
        # 2) Buscar "Calle" o "Cota"
        match2 = re.search(r"(Calle\s+\d+|Cota)", nombre, re.IGNORECASE)
        if match2:
            sucursal = match2.group(1).strip()
        else:
            # 3) Buscar prefijo de código
            for prefijo, ciudad in mapa_codigos.items():
                if nombre.upper().startswith(prefijo):
                    return ciudad
            return "VENDEDOR EXTERNO"
    
    # Normalizar texto
    sucursal = normalizar(sucursal)
    
    # Limpiar T1, T2, T3 al final
    sucursal = re.sub(r"\s*T\d+$", "", sucursal).strip()
    
    # Aplicar equivalencias
    sucursal = mapa_equivalencias.get(sucursal, sucursal)
    
    return sucursal

# Aplicar al dataframe
df["Sucursal"] = df["invoice_name"].apply(extraer_sucursal)

In [136]:
df = pd.merge(df,df_p,on='product_name',how="left")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1773186 entries, 0 to 1773185
Data columns (total 10 columns):
 #   Column                   Dtype         
---  ------                   -----         
 0   price_subtotal           float64       
 1   id                       int64         
 2   quantity                 float64       
 3   product_id_num           int64         
 4   product_name             object        
 5   invoice_id_num           int64         
 6   invoice_name             object        
 7   date_invoice             datetime64[ns]
 8   Sucursal                 object        
 9   producto_costo_unitario  float64       
dtypes: datetime64[ns](1), float64(3), int64(3), object(3)
memory usage: 135.3+ MB


In [137]:
df = df[df["Sucursal"]!= "VENDEDOR EXTERNO"]

df = df[["quantity","product_name","date_invoice","Sucursal","producto_costo_unitario"]]

# Aseguramos que la columna sea datetime (por si acaso)
df["date_invoice"] = pd.to_datetime(df["date_invoice"])

# Tomamos la fecha máxima del DF
fecha_max = df["date_invoice"].max()

# Calculamos el límite de un año atrás
fecha_min = fecha_max - pd.Timedelta(weeks=52)

# Filtramos
df_12_semanas = df[df["date_invoice"] >= fecha_min]

print(df_12_semanas.shape)
print(df_12_semanas["date_invoice"].min(), df_12_semanas["date_invoice"].max())


df_12_semanas.info()

(1311046, 5)
2024-09-04 15:34:28 2025-09-03 15:24:02
<class 'pandas.core.frame.DataFrame'>
Index: 1311046 entries, 0 to 1770881
Data columns (total 5 columns):
 #   Column                   Non-Null Count    Dtype         
---  ------                   --------------    -----         
 0   quantity                 1311046 non-null  float64       
 1   product_name             1311046 non-null  object        
 2   date_invoice             1311046 non-null  datetime64[ns]
 3   Sucursal                 1311046 non-null  object        
 4   producto_costo_unitario  1311039 non-null  float64       
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 60.0+ MB


In [138]:
df_bq_2570 = df_12_semanas[(df["Sucursal"]=="BARRANQUILLA")&( df_12_semanas["product_name"].str.contains("DAB02570025"))]

  df_bq_2570 = df_12_semanas[(df["Sucursal"]=="BARRANQUILLA")&( df_12_semanas["product_name"].str.contains("DAB02570025"))]


In [139]:
df_bq_2570

Unnamed: 0,quantity,product_name,date_invoice,Sucursal,producto_costo_unitario
7425,1.0000,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,2025-09-02 15:33:08,BARRANQUILLA,13507.7100
7426,1.0000,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,2025-09-02 15:33:08,BARRANQUILLA,13507.7100
7427,1.0000,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,2025-09-02 15:33:08,BARRANQUILLA,13507.7100
7428,1.0000,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,2025-09-02 15:33:08,BARRANQUILLA,13507.7100
7429,1.0000,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,2025-09-02 15:33:08,BARRANQUILLA,13507.7100
...,...,...,...,...,...
1764496,12.0000,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,2024-09-06 17:41:10,BARRANQUILLA,13507.7100
1764497,12.0000,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,2024-09-06 17:41:10,BARRANQUILLA,13507.7100
1764498,12.0000,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,2024-09-06 17:41:10,BARRANQUILLA,13507.7100
1764499,12.0000,[DAB02570025] DA2570 FILTRO AIRE DONSSON - PER...,2024-09-06 17:41:10,BARRANQUILLA,13507.7100


In [149]:
ventas_semana = (
    df_bq_2570
    .groupby(pd.Grouper(key="date_invoice", freq="W"))["quantity"]
    .mean()
    .reset_index()
)

ventas_semana.head(60)


Unnamed: 0,date_invoice,quantity
0,2024-09-08,4.6667
1,2024-09-15,18.9231
2,2024-09-22,2.6
3,2024-09-29,3.5714
4,2024-10-06,6.2
5,2024-10-13,9.75
6,2024-10-20,6.0
7,2024-10-27,3.5
8,2024-11-03,4.8571
9,2024-11-10,11.5714


In [150]:
ventas_mes = (
    df_bq_2570
    .groupby(pd.Grouper(key="date_invoice", freq="M"))["quantity"]
    .sum()
    .reset_index()
)

ventas_mes


  .groupby(pd.Grouper(key="date_invoice", freq="M"))["quantity"]


Unnamed: 0,date_invoice,quantity
0,2024-09-30,2682.0
1,2024-10-31,2106.0
2,2024-11-30,1665.0
3,2024-12-31,1368.0
4,2025-01-31,1287.0
5,2025-02-28,1773.0
6,2025-03-31,2484.0
7,2025-04-30,1512.0
8,2025-05-31,2421.0
9,2025-06-30,1107.0


In [148]:
alpha = 0.2


ventas_semana["EMA1"] = np.nan


ventas_semana.loc[0, "EMA1"] = ventas_semana.loc[0, "quantity"]

for i in range(1, len(ventas_semana)):
    ventas_semana.loc[i, "EMA1"] = (
        ventas_semana.loc[i, "quantity"] * alpha
        + ventas_semana.loc[i-1, "EMA1"] * (1 - alpha)
    )


# Imprimimos el DataFrame con la nueva columna EMA
ventas_semana


Unnamed: 0,date_invoice,quantity,EMA1
0,2024-09-08,4.6667,4.6667
1,2024-09-15,18.9231,7.5179
2,2024-09-22,2.6,6.5344
3,2024-09-29,3.5714,5.9418
4,2024-10-06,6.2,5.9934
5,2024-10-13,9.75,6.7447
6,2024-10-20,6.0,6.5958
7,2024-10-27,3.5,5.9766
8,2024-11-03,4.8571,5.7527
9,2024-11-10,11.5714,6.9165


In [143]:
alpha = 0.2
ventas_semana["EMA_custom"] = np.nan

# En la semana 16 tomamos 80% de EMA1[15] + 20% de quantity[15]
ventas_semana.loc[15, "EMA_custom"] = (
    0.8 * ventas_semana.loc[15, "EMA1"] +
    0.2 * ventas_semana.loc[15, "quantity"]
)

# Desde la semana 17 en adelante usamos EMA_custom
for i in range(16, len(ventas_semana)):
    ventas_semana.loc[i, "EMA_custom"] = (
        (1 - alpha) * ventas_semana.loc[i-1, "EMA_custom"]
        + alpha * ventas_semana.loc[i, "quantity"]
    )



In [145]:
#ventas_semana