# PARETO GENERAL

In [4]:
num_weeks = 52

# VENTAS

## OBTENER

In [None]:
import xmlrpc.client
from datetime import date, timedelta, datetime
import pandas as pd

# Conexi√≥n con Odoo (manteniendo tus credenciales)
username = "juan.cano@donsson.com"  # tu usuario
password = "1000285668"         # tu contrase√±a
url = "https://donsson.com"     # URL del servidor
db = "Donsson_produccion" # nombre de la base de datos


# --- Autenticaci√≥n ---
common = xmlrpc.client.ServerProxy(f"{url}/xmlrpc/2/common")
uid = common.authenticate(db, username, password, {})
models = xmlrpc.client.ServerProxy(f"{url}/xmlrpc/2/object")

# --- Fechas ---
weeks = num_weeks

# --- Fechas autom√°ticas ---
hoy = date.today()
fecha_fin = hoy.strftime("%Y-%m-%d")
fecha_inicio = (hoy - timedelta(weeks=weeks)).strftime("%Y-%m-%d")

# --- 1) Buscar facturas v√°lidas (account.invoice) ---

invoice_domain = [
    ("date_invoice", ">=", fecha_inicio),
    ("date_invoice", "<=", fecha_fin),
    ("type", "=", "out_invoice"),    # solo ventas
    ("state", "in", ["open", "paid"])
]

invoice_ids = models.execute_kw(
    db, uid, password,
    "account.invoice", "search",
    [invoice_domain]
)
print(f"Facturas encontradas: {len(invoice_ids)}")

# --- 2) Descargar las l√≠neas de esas facturas (account.invoice.line) ---

# Campos de la L√çNEA de factura. Eliminamos 'number', 'user_id', 'section_id', 'partner_id' porque ir√°n en la factura.
line_fields = ["product_id", "quantity", "price_subtotal", "invoice_id","create_date","origin"] 

records = []
limit = 20000
offset = 0

while True:
    result = models.execute_kw(
        db, uid, password,
        "account.invoice.line", "search_read",
        [[("invoice_id", "in", invoice_ids)]],
        {"fields": line_fields, "limit": limit, "offset": offset}
    )
    if not result:
        break
    records.extend(result)
    offset += limit
    print(f"Descargados {len(records)} registros de l√≠neas...")

# --- 3) Pasar a DataFrame de l√≠neas ---
line_df = pd.DataFrame(records).fillna(0)


# Separar product_id
line_df["product_id_num"] = line_df["product_id"].apply(
    lambda x: x[0] if isinstance(x, (list, tuple)) else None
)
line_df["product_name"] = line_df["product_id"].apply(
    lambda x: x[1] if isinstance(x, (list, tuple)) else str(x)
)

# Separar invoice_id
line_df["invoice_id_num"] = line_df["invoice_id"].apply(
    lambda x: x[0] if isinstance(x, (list, tuple)) else None
)
line_df["invoice_name"] = line_df["invoice_id"].apply(
    lambda x: x[1] if isinstance(x, (list, tuple)) else str(x)
)

# Convertir fecha a datetime
line_df["date_invoice"] = pd.to_datetime(line_df["create_date"], errors="coerce")

# Eliminar las columnas originales problem√°ticas
line_df = line_df.drop(columns=["product_id", "invoice_id","create_date"])

print(f"Total de l√≠neas descargadas: {len(line_df)}")

# ----------------------------------------------------
# --- 4) Descargar los campos adicionales de Factura (account.invoice) ---
# ----------------------------------------------------
# A√±adimos los campos que quieres: number, user_id, section_id, y tambi√©n partner_id y store_id
invoice_fields = ["id", "store_id", "number", "user_id", "section_id", "partner_id"]
invoices = models.execute_kw(
    db, uid, password,
    "account.invoice", "read",
    [invoice_ids], # Solo las facturas que encontramos
    {"fields": invoice_fields}
)
invoice_df = pd.DataFrame(invoices)

# --- 5) Procesar campos de la factura ---

# Separar store_id
invoice_df["store_name"] = invoice_df["store_id"].apply(
    lambda x: x[1] if isinstance(x, (list, tuple)) else str(x)
)

# Separar user_id (Vendedor)
invoice_df["salesperson_name"] = invoice_df["user_id"].apply(
    lambda x: x[1] if isinstance(x, (list, tuple)) else None
)

# Separar section_id (Equipo de Ventas)
invoice_df["sales_team_name"] = invoice_df["section_id"].apply(
    lambda x: x[1] if isinstance(x, (list, tuple)) else None
)

# Separar partner_id (Cliente/Partner)
invoice_df["partner_id_num"] = invoice_df["partner_id"].apply(
    lambda x: x[0] if isinstance(x, (list, tuple)) else None
)
# El nombre del partner es el segundo elemento de la tupla (si existe)
invoice_df["client_name_inv"] = invoice_df["partner_id"].apply(
    lambda x: x[1] if isinstance(x, (list, tuple)) else None
)


# Eliminar columnas originales no deseadas o ya procesadas
invoice_df = invoice_df.drop(columns=["store_id", "user_id", "section_id", "partner_id"])


# ----------------------------------------------------
# --- 6) Fusionar DataFrames ---
# ----------------------------------------------------

# Fusionamos las l√≠neas de factura (line_df) con los datos de las facturas (invoice_df)
df = line_df.merge(
    invoice_df, 
    left_on="invoice_id_num", 
    right_on="id", 
    how="left"
)

# Limpieza final de columnas de IDs de factura
df = df.drop(columns=["invoice_id_num", "product_id_num"])


df['origin'] = df['origin'].astype('string')

Facturas encontradas: 51191
Descargados 20000 registros de l√≠neas...
Descargados 40000 registros de l√≠neas...
Descargados 60000 registros de l√≠neas...
Descargados 80000 registros de l√≠neas...
Descargados 100000 registros de l√≠neas...
Descargados 120000 registros de l√≠neas...
Descargados 140000 registros de l√≠neas...
Descargados 160000 registros de l√≠neas...
Descargados 180000 registros de l√≠neas...
Descargados 199535 registros de l√≠neas...
Total de l√≠neas descargadas: 199535


## PROCESAR

In [5]:
def clasificar_sucursal(invoice_name):
    if pd.isna(invoice_name):
        return "VENDEDOR EXTERNO"
    if "Mostrador" in invoice_name:
        return "MOSTRADOR"
    return "VENDEDOR EXTERNO"
    
df["Sucursal_tipo"] = df["invoice_name"].apply(clasificar_sucursal)


mapa_sucursales = {
    'BD1': 'PRINCIPAL COTA',
    'BD2': 'SUCURSAL CALLE 6',
    'BD3': 'SUCURSAL VALLADOLID',
    'BD4': 'SUCURSAL NORTE',
    'BD5': 'SUCURSAL MEDELLIN',
    'BD6': 'SUCURSAL BARRANQUILLA',
    'BD7':' SUCURSAL BUCARAGAMNGA ',
    'BD8':'MOSTRADOR COTA',
    'BD9':'SUCURSAL CALI',
    'BD11': 'CUMMINS DE LOS ANDES',
    'BD12': 'CUMMINS DE LOS ANDES MEDELLIN',
    'BD13': 'CSS CONSTRUCTORES',
    'BD14': 'ANTIOQUE√ëA DE LUBRICANTES SGP SAS',
    'BD15': 'ASEO CAPITAL',
    'BD16': 'COVIANDES',
    'BD18': 'CEMEX BOSA',
    'BD19': 'Cemex ibague (eliminar)',
    'BD21': 'CEMEX CENTENARIO',
    'BD22': 'PENDIENTE POR FACTURAR CIERRE CONT.',
    'BD24': 'FILTRO EN REPROCESO',
    'BD26': 'PRESTAMOS INTERNOS',
    'BD31': 'EXP.CTP',
    'BD32': 'EXP. ECUADOR',
    'BD34':'EXP.DONSSON.USA',
    'BD40': 'DESCUADRE B1',
    'BD MP':'MATERIA PRIMA',
    'BD41':'BODEGA VENDEDOR WILMER GIL',
    'BD33':'EXP.REP.DOMINICANA',
    'BD42':'STOCK SIGMA ENERGY'
    # Agrega m√°s c√≥digos si los encuentras en tus datos, siguiendo este formato.
    }


#Quitar las facturas que salen como SO son no vendibles
df = df[~(df["origin"].str.contains("SO"))] 


# Normalizar los separadores para que todos sean iguales
df["origin_norm"] = df["origin"].str.replace(r"[\\/]", "/", regex=True)

# Extraer solo el BD y el n√∫mero (ejemplo: BD11, BD9, etc.)
df["origen"] = df["origin_norm"].str.extract(r"^(BD\d+)")

df["Bodega"] = df["origen"].map(mapa_sucursales)
df["Bodega"] = df["Bodega"].fillna(df["store_name"])

df = df.drop(columns=["origin_norm"])



# ===============================
# Procesar ventas normales
# ===============================
df_sales = df.copy()
df_sales["date_invoice"] = pd.to_datetime(df_sales["date_invoice"], errors="coerce")


# Referncia de producto
df_sales["product_ref"] = df_sales["product_name"].str.extract(r"\[([A-Z0-9]+)\]")


# Columnas temporales igual que en df_vp
df_sales["Semana"] = df_sales["date_invoice"].dt.to_period("W").dt.start_time
df_sales["ano"]    = df_sales["Semana"].dt.year
df_sales["mes"]    = df_sales["Semana"].dt.month
df_sales["dia"]    = df_sales["Semana"].dt.day

# ===============================
# Agrupaci√≥n por tienda + producto + semana
# ===============================
sales = (
    df_sales.groupby(["product_ref", "Semana", "ano", "mes", "dia"], as_index=False)
    .agg(
        sales=("quantity", "sum"),              # suma de unidades vendidas
        num_facturas=("date_invoice", "nunique")  # cantidad de facturas distintas
    )
)


# Resultado
sales.sample(10)



Unnamed: 0,product_ref,Semana,ano,mes,dia,sales,num_facturas
14820,BHS00686125,2025-06-02,2025,6,2,2.0,1
11938,BHE00022125,2025-06-02,2025,6,2,1.0,1
37805,DAR02189025,2025-06-09,2025,6,9,5.0,4
43861,DCE10968189,2025-05-12,2025,5,12,12.0,1
30116,DAB09152025,2025-07-21,2025,7,21,5.0,2
46377,DCX00923139,2025-08-18,2025,8,18,1.0,1
30965,DAB12852025,2025-06-16,2025,6,16,27.0,5
18607,BLS00264125,2025-06-09,2025,6,9,23.0,8
41634,DAX00392189,2025-07-28,2025,7,28,5.0,2
46441,DCX20225137,2025-09-08,2025,9,8,1.0,1


# VENTAS PERDIDAS

## OBTENER

In [6]:
import xmlrpc.client
from datetime import date, timedelta
import pandas as pd

# ===============================
# 1. Conexi√≥n con Odoo
# ===============================

ODOO_URL = "https://donsson.com"
ODOO_DB = "Donsson_produccion"
ODOO_USERNAME = "juan.cano@donsson.com"
ODOO_PASSWORD = "1000285668"

# Autenticaci√≥n
common = xmlrpc.client.ServerProxy(f"{ODOO_URL}/xmlrpc/2/common")
uid = common.authenticate(ODOO_DB, ODOO_USERNAME, ODOO_PASSWORD, {})
if not uid:
    print("Error de autenticaci√≥n. Verifica tus credenciales.")
    exit()
models = xmlrpc.client.ServerProxy(f"{ODOO_URL}/xmlrpc/2/object")

# ===============================
# 2. Definir rango de semanas
# ===============================

# N√∫mero de semanas que quieres descargar (puedes cambiarlo)
NUM_SEMANAS = num_weeks

# Fecha de hoy
hoy = date.today()
# Fecha de inicio seg√∫n n√∫mero de semanas atr√°s
fecha_inicio = hoy - timedelta(weeks=NUM_SEMANAS)

FECHA_INICIO = fecha_inicio.strftime("%Y-%m-%d")
FECHA_FIN = hoy.strftime("%Y-%m-%d")

print(f"üìÖ Descargando registros desde {FECHA_INICIO} hasta {FECHA_FIN}")

# ===============================
# 3. Leer ventas perdidas
# ===============================

lost_sales_ids = models.execute_kw(
    ODOO_DB, uid, ODOO_PASSWORD,
    "ventas.perdidas", "search",
    [[["fecha", ">=", FECHA_INICIO], ["fecha", "<=", FECHA_FIN], ["almacenamiento_tipo", "=", "agotado"]]]
)

lost_sales_data = models.execute_kw(
    ODOO_DB, uid, ODOO_PASSWORD,
    "ventas.perdidas", "read",
    [lost_sales_ids],
    {"fields": ["origen", "store_id", "cliente_id", "fecha", "product_ref",
                "cantidad", "cantidad_existencia", "cantidad_reservada",
                "almacenamiento_tipo"]}
)

df = pd.DataFrame(lost_sales_data)

# ===============================
# 4. Normalizar columnas
# ===============================

for col in ["store_id", "cliente_id"]:
    df[col.replace('_id', '_name')] = df[col].apply(
        lambda x: x[1] if isinstance(x, (list, tuple)) and len(x) > 1 else str(x) if x else None
    )
    df[col + "_num"] = df[col].apply(
        lambda x: x[0] if isinstance(x, (list, tuple)) and len(x) > 0 else None
    )

# Eliminar columnas originales
df = df.drop(columns=["id", "store_id", "cliente_id"], errors='ignore')

# ===============================
# 4.1. Forzar tipos de columnas antes de exportar
# ===============================
for col in df.columns:
    if col.endswith("_num"):
        df[col] = pd.to_numeric(df[col], errors="coerce").astype("Int64")
    else:
        df[col] = df[col].astype("string")

vp = df.copy()



üìÖ Descargando registros desde 2024-10-02 hasta 2025-10-01


## PROCESAR

In [7]:
import pandas as pd
import numpy as np

# ===============================
# Filtrar almacenamiento agotado
# ===============================
vp = vp[vp["almacenamiento_tipo"].str.lower() == "agotado"]

# ===============================
# Asegurar tipos correctos
# ===============================
vp = vp.copy()
vp["fecha"] = pd.to_datetime(vp["fecha"], errors="coerce")

# Num√©ricos
for col in ["cantidad", "cantidad_existencia", "cantidad_reservada"]:
    vp[col] = pd.to_numeric(vp[col], errors="coerce").fillna(0).clip(lower=0)

# ===============================
# Reglas Odoo vectorizadas
# ===============================
is_cot = vp["origen"].fillna("").str.lower() == "cotizacion"
ignore_mask = vp["cantidad"] >= 100

ajuste = np.where(
    is_cot,
    vp["cantidad"] - vp["cantidad_existencia"] - vp["cantidad_reservada"],
    vp["cantidad"] - vp["cantidad_reservada"]
)

# Aplicar reglas de descarte y piso en cero
ajuste = np.where(ignore_mask, 0, ajuste)
ajuste = np.where(ajuste > 0, ajuste, 0)

vp["ventas_perdidas"] = ajuste.astype(float)

# ===============================
# Columnas temporales
# ===============================
vp["Semana"] = vp["fecha"].dt.to_period("W").dt.start_time
vp["ano"]   = vp["Semana"].dt.year
vp["mes"]   = vp["Semana"].dt.month
vp["dia"]   = vp["Semana"].dt.day

# ===============================
# Filtro adicional: excluir SERV y CARCASA
# ===============================
mask_excluir = ~vp["product_ref"].str.contains("SERV|CARCASA", case=False, na=False)
vp = vp[mask_excluir]

# ===============================
# Agrupaci√≥n por tienda + producto + semana
# ===============================
lost_by_week = (
    vp.groupby(["product_ref", "Semana", "ano", "mes", "dia"])
    .agg(
        lost_sales=("ventas_perdidas", "sum"),   # suma total de ventas perdidas
        veces_vp=("ventas_perdidas", "count")    # n√∫mero de veces que hubo p√©rdida
    )
    .reset_index()
)

# Mostrar resultado agrupado
vp = lost_by_week

In [8]:
vp.sample(10)

Unnamed: 0,product_ref,Semana,ano,mes,dia,lost_sales,veces_vp
6559,BCE00924125,2025-01-13,2025,1,13,4.0,3
35778,DCE30128137,2024-10-28,2024,10,28,1.0,1
16637,BLS00072125,2024-12-02,2024,12,2,3.0,4
35298,DCE01029189,2025-03-31,2025,3,31,3.0,3
16660,BLS00075125,2025-05-05,2025,5,5,1.0,2
30535,DAP09046025,2025-06-16,2025,6,16,1.0,1
9979,BCS00626125,2025-05-26,2025,5,26,5.0,2
27820,DAE04120025,2025-09-08,2025,9,8,1.0,1
22574,DAB08186025,2024-10-07,2024,10,7,1.0,1
23447,DAB09146025,2025-05-19,2025,5,19,1.0,2


# COSTOS

## OBTENER

In [9]:
import xmlrpc.client
import pandas as pd


# ===============================
# Conexi√≥n con Odoo
# ===============================
username = "juan.cano@donsson.com"   # tu usuario
password = "1000285668"              # tu contrase√±a
url = "https://donsson.com"          # URL del servidor
db = "Donsson_produccion"            # nombre de la base de datos

common = xmlrpc.client.ServerProxy(f"{url}/xmlrpc/2/common")
uid = common.authenticate(db, username, password, {})
models = xmlrpc.client.ServerProxy(f"{url}/xmlrpc/2/object")


#df_campos.info()


productos_costo = models.execute_kw(
    db , uid , password,
    "cs.analisis.costo","search_read",
    [[]],
    {"fields":["producto_costo_unitario","producto_id"]}


)

# Convertir en df
df_p = pd.DataFrame(productos_costo)


# Separar lista anidada
df_p[["product_id_num","product_name"]] = (df_p["producto_id"].apply(pd.Series))

#Agregar columna estandarizada
df_p["product_name"] = df_p["product_name"].str.strip()

#DF normalizado
df_p = df_p[["product_name","producto_costo_unitario"]]

## PROCESAR

In [10]:
df_p["product_ref"] = df_p["product_name"].str.extract(r"\[([A-Z0-9]+)\]")

df_p_unique = (
    df_p[["product_ref", "producto_costo_unitario"]]
    .drop_duplicates(subset=["product_ref"])
)

In [18]:
sales.head()

Unnamed: 0,product_ref,Semana,ano,mes,dia,sales,num_facturas
0,AC10388020,2025-01-13,2025,1,13,2.0,1
1,AC10388020,2025-02-24,2025,2,24,10.0,1
2,AC10388020,2025-07-07,2025,7,7,2.0,1
3,AC10388020,2025-07-21,2025,7,21,2.0,1
4,AC10388020,2025-09-01,2025,9,1,4.0,2


# UNION

In [19]:
vp.head()

Unnamed: 0,product_ref,Semana,ano,mes,dia,lost_sales,veces_vp
0,AC000001222,2025-01-20,2025,1,20,1.0,1
1,AC000002222,2025-01-20,2025,1,20,1.0,1
2,AC000002222,2025-01-27,2025,1,27,1.0,1
3,AC000005020,2024-12-30,2024,12,30,1.0,1
4,AC00389020,2024-10-21,2024,10,21,1.0,1


# VENTAS A VP

In [24]:
df_merged = pd.merge(
    sales[["product_ref", "Semana", "sales", "num_facturas"]],
    vp[["product_ref", "Semana", "lost_sales", "veces_vp"]],
    on=["product_ref", "Semana"],
    how="outer"
).fillna(0)

# Pasar de semanas a meses
df_merged["Mes"] = df_merged["Semana"].dt.to_period("M").dt.to_timestamp()

# Consolidar por producto y mes
df_merged = (
    df_merged
    .groupby(["product_ref", "Mes"], as_index=False)
    .agg({
        "sales": "sum",
        "num_facturas": "sum",
        "lost_sales": "sum",
        "veces_vp": "sum"
    })
)

# A√±adir columnas de a√±o y mes
df_merged["a√±o"] = df_merged["Mes"].dt.year
df_merged["mes"] = df_merged["Mes"].dt.month



In [25]:
df_merged.sample(10)

Unnamed: 0,product_ref,Mes,sales,num_facturas,lost_sales,veces_vp,a√±o,mes
1904,BAC00188125,2025-03-01,2.0,1.0,2.0,1.0,2025,3
17486,DAB09178025,2025-06-01,12.0,4.0,5.0,3.0,2025,6
15402,DAB04598025,2025-06-01,39.0,28.0,1.0,1.0,2025,6
23562,DAR02270025,2024-10-01,21.0,13.0,0.0,2.0,2024,10
21185,DAE04356025,2024-12-01,0.0,0.0,1.0,1.0,2024,12
22486,DAE14120025,2025-09-01,2.0,1.0,7.0,7.0,2025,9
14953,DAB02881025,2025-06-01,76.0,38.0,0.0,0.0,2025,6
29620,DLE00839189,2024-11-01,43.0,9.0,8.0,1.0,2024,11
1771,BAC00155125,2024-09-01,0.0,0.0,0.0,1.0,2024,9
16626,DAB08166025,2025-05-01,4.0,4.0,1.0,2.0,2025,5


# EMA

In [29]:
import pandas as pd
import numpy as np

def compute_demand_and_ema(df,
                           alpha=0.20,        # peso de EMA
                           n_init=12,   # semanas o meses para inicializar EMA
                           date_col="Mes", #Semana o Mes
                           sales_col="sales",
                           lost_col="lost_sales",
                           freq ="M" # "W-MON" semanas y "M" meses
                           ):
    df = df.copy()

    # ---------- Asegurar tipos y semana iniciando lunes ----------
    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
    df[date_col] = df[date_col].dt.to_period(freq).dt.start_time #W_MON para lunes en semanas M para primer dia del mes

    # Asegurar num√©ricos
    df[sales_col] = pd.to_numeric(df[sales_col], errors="coerce").fillna(0)
    df[lost_col]  = pd.to_numeric(df[lost_col], errors="coerce").fillna(0)

    # ---------- Crear combinaciones completas ----------
    #stores   = df["store_name"].unique()
    products = df["product_ref"].unique()


    # Crear rango completo desde min hasta max semana
    weeks = pd.period_range(df[date_col].min(), df[date_col].max(), freq=freq).to_timestamp()


    full_index = pd.MultiIndex.from_product([products, weeks],
                                            names=["product_ref", date_col])

    df = df.set_index(["product_ref", date_col]).reindex(full_index).reset_index()

    # Rellenar ventas y perdidas con 0 en combinaciones faltantes
    df[sales_col] = df[sales_col].fillna(0)
    df[lost_col]  = df[lost_col].fillna(0)

    # Ordenar
    df = df.sort_values(["product_ref", date_col])

    out_groups = []

    # ---------- Calcular demanda ajustada y EMA ----------
    for (prod), g in df.groupby("product_ref", sort=False):
        g = g.sort_values(date_col).reset_index(drop=True)
        sales = g[sales_col].to_numpy(dtype=float)
        lost  = g[lost_col].to_numpy(dtype=float)

        L = len(g)
        demanda = np.zeros(L, dtype=float)
        ema_arr = np.zeros(L, dtype=float)

        if L == 0:
            out_groups.append(g)
            continue

        # Inicializaci√≥n EMA: promedio de primeras n_init ventas
        init_n = min(n_init, L)
        ema_prev = float(np.nanmean(sales[:init_n])) if init_n > 0 else 0.0
        if np.isnan(ema_prev):
            ema_prev = 0.0

        for i in range(L):
            s = sales[i]
            l = lost[i]

            # Regla 1
            if s >= 2.0 * l:
                demand_candidate = s + l
                demand = min(demand_candidate, 1.5 * s) if s > 0 else demand_candidate
            else:
                # Regla 2
                demand = s + 0.5 * ema_prev

            # Guardar redondeando
            demanda[i] = round(demand, 6)

            # Calcular EMA
            ema = alpha * demand + (1.0 - alpha) * ema_prev
            ema = round(ema, 6)
            ema_arr[i] = ema

            # actualizar para siguiente
            ema_prev = ema

        # A√±adir columnas al grupo
        g = g.copy()
        g["demanda_ajustada"] = demanda
        g["EMA"] = ema_arr

        out_groups.append(g)

        

    # Concat resultados
    result = pd.concat(out_groups, ignore_index=True, sort=False)
    result = result.sort_values(["product_ref", date_col]).reset_index(drop=True)
    return result

# ------------------ USO ------------------
df_with_demand1 = compute_demand_and_ema(df_merged, alpha=0.2, n_init=12) 

In [32]:
df_with_demand1.sample(10)

Unnamed: 0,product_ref,Mes,sales,num_facturas,lost_sales,veces_vp,a√±o,mes,demanda_ajustada,EMA
38253,DAE05727115,2025-04-01,0.0,,0.0,,,,0.0,0.0
53778,DHE00307152,2025-07-01,0.0,,0.0,,,,0.0,0.0
49330,DCE00932136,2025-05-01,0.0,0.0,1.0,1.0,2025.0,5.0,0.0,0.0
56327,DLS00307005,2025-08-01,0.0,,0.0,,,,0.0,0.0
2861,BAB12852125,2024-10-01,0.0,,0.0,,,,0.0,0.0
41467,DAE61063115,2025-07-01,0.0,,0.0,,,,0.0,0.085194
29787,DAB18120025,2025-01-01,0.0,,0.0,,,,0.0,0.0
6079,BAE04562125,2025-05-01,0.0,0.0,1.0,1.0,2025.0,5.0,0.0,0.0
39563,DAE06803189,2025-01-01,0.0,,0.0,,,,0.0,0.054613
12191,BCS00434125,2025-07-01,38.0,13.0,7.0,2.0,2025.0,7.0,45.0,31.724041


In [None]:
def add_deviation_and_classification(df, ema_col="EMA", sales_col="sales", date_col="Mes", n_init=12): ##PERILLA MESES 
    df = df.copy()
    out_groups = []

    for (prod), g in df.groupby("product_ref", sort=False):
        g = g.sort_values(date_col).reset_index(drop=True)
        
        sales = g[sales_col].to_numpy(dtype=float)
        ema   = g[ema_col].to_numpy(dtype=float)
        L = len(sales)

        # Inicializamos columnas
        g["desviacion_ema%"] = 0.0
        g["coef_ventas"] = 0.0

        if L < 2:
            out_groups.append(g)
            continue

        for i in range(L):
            # Usar solo hasta la mes actual (i)
            window_start = max(0, i - n_init)       
            window_sales = sales[window_start:i]     # mes previas
            window_ema   = ema[window_start:i]

            if len(window_sales) < 2:
                continue

            promedio_prev = np.mean(window_sales)
            if promedio_prev == 0:
                promedio_prev = 1e-6

            # --- Desviaci√≥n usando EMAs previos ---
            desv = np.std(window_ema, ddof=1) / promedio_prev if promedio_prev > 0 else 0

            # --- Coeficiente de ventas ---
            coef = (max(window_sales) - min(window_sales)) / promedio_prev if promedio_prev > 0 else 0

            g.at[i, "desviacion_ema%"] = round(desv,2)
            g.at[i, "coef_ventas"] = coef

        out_groups.append(g)

    return pd.concat(out_groups, ignore_index=True)


In [43]:
df_with_demand2 = add_deviation_and_classification(df_with_demand1)
df_with_demand2 = df_with_demand2[["store_name","product_ref","Mes","num_facturas","sales","lost_sales","demanda_ajustada","EMA","desviacion_ema%","coef_ventas"]]

In [198]:
df_last_month = df_with_demand2[(df_with_demand2["a√±o"]==2025) & (df_with_demand2["mes"]==9) ]

## UNIR COSTO

In [226]:
df_merge_def = pd.merge(
    df_last_month,
    df_p_unique,
    on="product_ref",
    how="outer" #para incluir todos , estaba antes left
).fillna(0)

ultimo_mes = df_last_month["Mes"].max()
df_merge_def["Mes"] = ultimo_mes

df_merge_def["a√±o"] = df_merge_def["Mes"].dt.year
df_merge_def["mes"] = df_merge_def["Mes"].dt.month



merge_def =  df_merge_def.copy()

In [238]:
merge_def.sort_values(by=["desviacion_ema%"], ascending =True).sample(30)

Unnamed: 0,product_ref,Mes,sales,num_facturas,lost_sales,veces_vp,a√±o,mes,demanda_ajustada,EMA,desviacion_ema%,coef_ventas,producto_costo_unitario
1291,BHE00611125,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,0.0,0.0,1.0
515,BAE04893125,2025-09-01,0.0,0.0,5.0,5.0,2025,9,0.238698,0.429656,0.81,12.0,130050.0
4518,DHE00309024,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,0.0,0.0,5250.0
2938,DAE04082025,2025-09-01,0.0,0.0,1.0,1.0,2025,9,0.040488,0.072878,0.8,12.0,5701.05
2399,DAB18093025,2025-09-01,20.0,4.0,1.0,1.0,2025,9,21.0,26.200567,0.13,1.714286,83173.57
1367,BHE00860125,2025-09-01,4.0,1.0,1.0,5.0,2025,9,5.0,2.675908,0.28,3.333333,124425.59
3730,DAR08147025,2025-09-01,1.0,1.0,0.0,0.0,2025,9,1.0,1.730075,0.26,4.0,66640.43
2099,DAB02974025,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,0.0,0.0,10453.46
2935,DAE04069025,2025-09-01,3.0,2.0,1.0,1.0,2025,9,4.0,4.263398,0.26,3.428571,9148.02
4800,DRG2650V034,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,0.0,0.0,22818.25


In [254]:
merge_def["coef_ventas"].describe()

count    4853.000000
mean        1.744122
std         2.894294
min         0.000000
25%         0.000000
50%         0.000000
75%         2.571429
max        12.000000
Name: coef_ventas, dtype: float64

# CLASIFICACION

In [249]:
ema = merge_def.copy()

# -------------------------------
# 1. Calcular venta_costo_ema
# -------------------------------
ema["venta_costo_ema"] = (ema["EMA"] * ema["producto_costo_unitario"]).round(2)

# -------------------------------
# 2. Totales por sucursal
# -------------------------------
ema["venta_costo_tot"] = ema.groupby("product_ref")["venta_costo_ema"].transform("sum")
ema["total_ema"] = ema.groupby("product_ref")["EMA"].transform("sum")

# -------------------------------
# 3. Proporciones dentro de cada sucursal
# -------------------------------
ema["venta_costo%"] = (ema["venta_costo_ema"] / ema["venta_costo_tot"]).round(4)
ema["cantidad%"] = (ema["EMA"] / ema["total_ema"]).round(4)

# -------------------------------
# 4. Orden y acumulados
# -------------------------------
# Ordenar por costo dentro de cada sucursal
ema = ema.sort_values(["product_ref", "venta_costo_ema"], ascending=[True, False])
ema["acumulado_costo"] = ema.groupby("product_ref")["venta_costo%"].cumsum().round(4)

# Ordenar por cantidad dentro de cada sucursal
ema = ema.sort_values(["product_ref", "EMA"], ascending=[True, False])
ema["acumulado_cantidad"] = ema.groupby("product_ref")["cantidad%"].cumsum().round(2)

# Acumulado de desviaci√≥n por sucursal
if "desviacion_ema%" in ema.columns:
    ema["acumulado_desviacion"] = (
        ema.groupby("product_ref")["desviacion_ema%"].cumsum().round(2)
    )

# -------------------------------
# 5. Renombrar columnas (estilo reporte Odoo)
# -------------------------------
ema_def = ema.rename(
    columns={
        "a√±o": "A√±o",
        "semana_num": "semana",
        "EMA": "Ema",
        "producto_costo_unitario": "Costo unitario",
        "venta_costo_ema": "Venta costo EMA",
        "venta_costo%": "Venta costo %",
        "acumulado_costo": "Acumulado costo",
        "desviacion_ema%": "Desviacion EMA %",
    }
)

# -------------------------------
# 6. Calcular desviaci√≥n en dinero
# -------------------------------
if "Desviacion EMA %" in ema_def.columns:
    ema_def["Desviacion EMA"] = (
        ema_def["Venta costo EMA"] * ema_def["Desviacion EMA %"]
    ).round(1)



# Ejemplo de reglas (lo que viene de conf_acumulado_desviacion en Odoo) #PERILLA
reglas = [
    {"rango_ini": 0, "rango_fin": 0.8, "valor": 0},
    {"rango_ini": 0.8, "rango_fin": 1.5, "valor": 0.5},
    {"rango_ini": 1.5, "rango_fin": 3, "valor": 0.8},
    {"rango_ini": 3, "rango_fin": 2000, "valor": 1},
]

# aplicar reglas 0‚Äì0.8‚Äì1 como antes, pero sobre coef_ventas_norm


# Funci√≥n que asigna el valor seg√∫n reglas
def asignar_desviacion(coef, reglas):
    for r in reglas:
        if r["rango_ini"] <= coef <= r["rango_fin"]:
            return r["valor"]
    return np.nan  # si no entra en ning√∫n rango

# Aplicar
ema_def["acumulado_desviacion_p"] = ema_def["coef_ventas"].apply(
    lambda x: asignar_desviacion(x, reglas)
)



# -------------------------------
# 7. Limpiar columnas innecesarias
# -------------------------------
if "Unnamed: 0" in ema_def.columns:
    ema_def = ema_def.drop(columns=["Unnamed: 0"])


# 8. Reglas acumulado costo combinado
# --- Configuraci√≥n ---
def check_div(a, b):
    try:
        return a / b if b != 0 else 0
    except Exception:
        return 0

## PERILLA

conf_porcentajes_impacto = {
    "cantidad": 0.5,
    "costo": 0.3,
    "desviacion": 0.2,
    "cantidad_mayor": 0.7,
    "costo_mayor": 0.3,
}

# Valor de referencia en porcentaje (ejemplo: 10%) ##PERILLA
conf_acumulado_cantidad = 80
conf_acumulado_cantidad_desviacion = check_div(conf_acumulado_cantidad, 100)


# --- C√°lculo en el DataFrame ---
def calcular_combinado(row):
    acumulado_cantidad_final = round(row["acumulado_cantidad"], 2)
    acumulado_costo_final = round(row["Acumulado costo"], 2)
    acumulado_desviacion_p_final = round(row["acumulado_desviacion_p"], 2)

    if acumulado_cantidad_final >= conf_acumulado_cantidad_desviacion:
        return (
            acumulado_cantidad_final * conf_porcentajes_impacto["cantidad_mayor"]
            + acumulado_costo_final * conf_porcentajes_impacto["costo_mayor"]
        )
    else:
        return (
            acumulado_cantidad_final * conf_porcentajes_impacto["cantidad"]
            + acumulado_costo_final * conf_porcentajes_impacto["costo"]
            + acumulado_desviacion_p_final * conf_porcentajes_impacto["desviacion"]
        )

# Nueva columna en tu df
ema_def["acumulado_combinado"] = ema_def.apply(calcular_combinado, axis=1).round(2)


def clasificar_pareto(valor):
    if valor <= 0.50:
        return "AAA"
    elif valor < 0.8:
        return "A"
    elif valor < 0.95:
        return "B"
    else:
        return "C"

ema_def["Clasificacion"] = ema_def["acumulado_combinado"].apply(clasificar_pareto)


# Asegurar Mes como datetime y crear A√±o/Mes_num (enteros)
ema_def["Mes"] = pd.to_datetime(ema_def["Mes"], errors="coerce")
ema_def = ema_def.dropna(subset=["Mes"]).copy()
ema_def["A√±o"] = ema_def["Mes"].dt.year.astype(int)
ema_def["Mes_num"] = ema_def["Mes"].dt.month.astype(int)


ema_def = ema_def[ema_def["A√±o"]>2000]


In [250]:
ema_def.sample(10)

Unnamed: 0,product_ref,Mes,sales,num_facturas,lost_sales,veces_vp,A√±o,mes,demanda_ajustada,Ema,...,Venta costo %,cantidad%,Acumulado costo,acumulado_cantidad,acumulado_desviacion,Desviacion EMA,acumulado_desviacion_p,acumulado_combinado,Clasificacion,Mes_num
3547,DAR02049025,2025-09-01,19.0,8.0,0.0,0.0,2025,9,19.0,32.586926,...,1.0,1.0,1.0,1.0,0.15,216202.4,0.5,1.0,C,9
570,BAE08096125,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,...,,,,,0.0,0.0,0.0,,C,9
2587,DAC00234020,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,...,,,,,0.0,0.0,0.0,,C,9
3949,DCE00490136,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,...,,,,,0.0,0.0,0.0,,C,9
55,ALG20W50116,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,...,,,,,0.0,0.0,0.0,,C,9
2414,DAB18249025,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,...,,,,,0.0,0.0,0.0,,C,9
4445,DCX00366138,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,...,,,,,0.0,0.0,0.0,,C,9
2430,DAB28191025,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,...,,,,,0.0,0.0,0.0,,C,9
4657,DLE00907122,2025-09-01,2.0,1.0,1.0,1.0,2025,9,3.0,0.7257,...,1.0,1.0,1.0,1.0,0.75,5652.2,1.0,1.0,C,9
74,ALT10W30116,2025-09-01,0.0,0.0,0.0,0.0,2025,9,0.0,0.0,...,,,,,0.0,0.0,0.0,,C,9


In [253]:
ema_def.sort_values(by=["acumulado_combinado"], ascending=False)

ema_def["acumulado_combinado"].max()

np.float64(1.0)

In [248]:
ema_def.to_excel("/home/donsson/proyectos/MODELO ABASTECIMIENTO/exceles/prueba_pareto_general.xlsx")

Exception ignored in: <function ZipFile.__del__ at 0x76b27fc2fe20>
Traceback (most recent call last):
  File "/usr/lib/python3.12/zipfile/__init__.py", line 1940, in __del__
    self.close()
  File "/usr/lib/python3.12/zipfile/__init__.py", line 1957, in close
    self.fp.seek(self.start_dir)
ValueError: seek of closed file
