In [1]:
import pyodbc
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MaxAbsScaler

import warnings
warnings.filterwarnings("ignore")

from pycaret.time_series import *
from datetime import datetime

In [2]:
def df_pdto(cod_pdto, fecha_final):
    # Conexion al dwh
    cnxn = pyodbc.connect(
        driver='{SQL Server}',
        server='192.168.100.58',
        uid='bilectura',
        pwd='D1sp@p3l3s')
    cursor = cnxn.cursor()

    # Codigo SQL para cargar del dwh
    # codigo_SQL = f"SELECT DATEFROMPARTS(VTAANO, VTAMES, 1) AS 'Fecha', SUM(VTACNTKLS) AS 'Ventas' FROM V_VTA_VTAHEC WHERE VTAPRDCOD = '{cod_pdto}' AND VTAFCH < '{fecha_final}' AND PRMCOD = 1 GROUP BY DATEFROMPARTS(VTAANO, VTAMES, 1)"
    codigo_SQL = f"SELECT DATEFROMPARTS(VTAANO, VTAMES, 1) AS 'Fecha', SUM(VTAUNDINV) AS 'Ventas' FROM V_VTA_VTAHEC WHERE VTAPRDCOD = '{cod_pdto}' AND VTAFCH <= '{fecha_final}' AND PRMCOD = 1 GROUP BY DATEFROMPARTS(VTAANO, VTAMES, 1)"

    #Carga de la data desde el dwh de Dispapeles y se guarda en df
    cursor.execute(codigo_SQL)
    rows = cursor.fetchall()
    df_SQL = pd.DataFrame.from_records(rows, columns=[col[0] for col in cursor.description])
    df_SQL["Ventas"] = df_SQL["Ventas"].astype(int)
    df_SQL["Fecha"] = pd.to_datetime(df_SQL["Fecha"])

    return df_SQL

In [3]:
def plot_suavizacion(bd, alpha, tipo_suavizacion):
    # Visualizar la serie original y suavizada
    x = bd["Fecha"]
    y1 = bd["Ventas"]
    y2 = bd["Suavizado"]
    trace1 = go.Scatter(x= x, y= y1, mode= "lines+markers", name= "Ventas")
    trace2 = go.Scatter(x= x, y= y2, mode= "lines+markers", name= f"Suavizado alfa {alpha}")

    layout = go.Layout(
            title=f"Ventas por mes, suavizado con {tipo_suavizacion} y alpha {alpha}",
            xaxis=dict(title="Fecha"),
            yaxis=dict(title="Ventas"),
            legend=dict(x=1, y=1)
            )
    # fig = go.Figure()
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(trace1, secondary_y= False)
    fig.add_trace(trace2, secondary_y= True)
    fig.update_yaxes(title_text="Ventas", secondary_y=False)
    fig.update_yaxes(title_text="Suavizacion", secondary_y=True)
    fig.update_layout(layout)
    fig.show()

def suavizacion(bd, alpha= 0.2, tipo_suavizacion= "exponencial simple"):
    # Aplicar suavización exponencial simple
    if tipo_suavizacion == "exponencial simple":
        bd["Suavizado"] = bd["Ventas"].ewm(alpha=alpha).mean()
    elif tipo_suavizacion == "logaritmica":
        bd["Suavizado"] = np.log(bd["Ventas"]).ewm(alpha=alpha).mean()
    elif tipo_suavizacion == "raiz cuadrada":
        bd["Suavizado"] = np.sqrt(bd["Ventas"]).ewm(alpha=alpha).mean()
    else:
        bd["Suavizado"] = bd["Ventas"]
    
    plot_suavizacion(bd, alpha, tipo_suavizacion)
    
    return bd

def inversa_suavizacion(bd, tipo_suavizacion= "exponencial simple"):
    if tipo_suavizacion == "logaritmica":
        print(np.exp(bd))
    elif tipo_suavizacion == "raiz cuadrada":
        print(bd.apply(lambda x: x**2))
    else:
        print(bd)

In [12]:
bd_total = pd.read_csv("C:/Users/tcardenas/Downloads/Ventas prodispel.csv", encoding= 'utf-8', decimal= ",",  sep= ";")
bd_total["Fecha"] = pd.to_datetime(bd_total["Fecha"])
bd_total["Ventas"] = bd_total["Ventas"].astype(float)

In [13]:
bd_total.dtypes

Fecha              datetime64[ns]
Codigo producto             int64
Ventas                    float64
dtype: object

In [22]:
# logaritmica
# exponencial simple
# raiz cuadrada
# None

cod_pdto = 59572

df_SQL = bd_total[bd_total["Codigo producto"] == cod_pdto].reset_index().drop(["Codigo producto", "index"], axis= 1)
df_SQL = suavizacion(bd= df_SQL, alpha= 0.3, tipo_suavizacion= "exponencial simple")

In [45]:
setup_suavizado = setup(
            df_SQL, #df
            target= "Suavizado",
            ignore_features= "Ventas",
            index= "Fecha",
            session_id = 42, #id para mantener replicabilidad
            transform_target= None, #transformador del target, "box-cox", "log", "sqrt", "exp", "cos"
            coverage= 0.9, #intervalos
            fh = 6,
            use_gpu= True,
            verbose= True,
            hyperparameter_split= 'train', #all or train
            # seasonal_period= 'Q',
            )

In [46]:
top_models = compare_models(
                        n_select= 5,
                        sort= "r2"
                        )
metricas_completas = pull()

In [47]:
plot_model(top_models, plot = 'forecast', data_kwargs = {'fh': 9})

In [52]:
pd.options.display.float_format = '{:,.0f}'.format
final_model = setup_suavizado.finalize_model(top_models[3])
print(setup_suavizado.predict_model(final_model))
setup_suavizado.plot_model(final_model)

         y_pred
2023-04   7,325
2023-05   7,323
2023-06   7,673
2023-07   7,671
2023-08   8,021
2023-09   8,019
