In [2]:
import pyodbc
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MaxAbsScaler

import warnings
warnings.filterwarnings("ignore")

from pycaret.time_series import *

In [83]:
def df_cluster(nits_clientes, fecha_final):
    # Conexion al dwh
    cnxn = pyodbc.connect(
        driver='{SQL Server}',
        server='192.168.100.58',
        uid='bilectura',
        pwd='D1sp@p3l3s')
    cursor = cnxn.cursor()

    df_SQL = []

    for nit in nits_clientes:
        #Consulta SQL
        consulta_SQL = f"SELECT DATEFROMPARTS(VTAANO, VTAMES, 1) AS 'Fecha', SUM(VTAVLRVTA) AS 'Ventas' FROM V_VTA_VTAHEC WHERE CONCAT(CONCAT(VTANIT, '-'), VTASUC) = '{nit}' AND VTAFCH < '{fecha_final}' GROUP BY DATEFROMPARTS(VTAANO, VTAMES, 1)"

        #Carga de la data desde el dwh de Dispapeles y se guarda en df
        cursor.execute(consulta_SQL)
        rows = cursor.fetchall()
        df_SQL_int = pd.DataFrame.from_records(rows, columns=[col[0] for col in cursor.description])
        df_SQL_int["Ventas"] = df_SQL_int["Ventas"].astype(int)
        df_SQL_int["Fecha"] = pd.to_datetime(df_SQL_int["Fecha"])

        df_SQL.append(df_SQL_int)

    #Convertir el arreglo, de 3D a 2D
    arr_df_SQL = np.array(df_SQL)
    n_filas = arr_df_SQL.shape[0] * arr_df_SQL.shape[1]
    df_SQL = arr_df_SQL.reshape(n_filas, 2)

    #Tranformaciones para convertir a pd DF
    df_SQL = pd.DataFrame(df_SQL, columns= ["Fecha", "Ventas"])
    df_SQL["Ventas"] = pd.to_numeric(df_SQL["Ventas"])
    # df_SQL["Fecha"] = pd.to_numeric(df_SQL["Fecha"])
    df_SQL = df_SQL.groupby("Fecha").sum()

    return df_SQL

In [94]:
df_clusters = pd.read_csv("C:/Users/tcardenas/OneDrive/OneDrive - Grupo DISPAPELES/Documents/ML-Dispapeles-TomasCaLo/Clustering/Clustering 27-1-23.csv",
                            encoding= 'utf-8', decimal= ",", sep= ";")
col_eliminar = ["Recency", "Monetary", "Frequency", "Distrito-Nombretipozona", "Cluster"]
df_clusters = df_clusters.drop(col_eliminar, axis= 1)

filtro_distrito = 10
filtro_tipozona = "Institucional"
filtro_cluster = "A"

df_clusters_f = df_clusters[
                    (df_clusters["Codigo distrito"] == filtro_distrito) &
                    (df_clusters["Nombre tipo zona"] == filtro_tipozona) &
                    (df_clusters["Letra cluster"] == filtro_cluster)
                ]

In [96]:
from datetime import datetime
lista_nits = df_clusters_f["Nit cliente-sucursal"].tolist()
fecha_final = '2023-03-31'
fecha_final = datetime.strptime(fecha_final, '%Y-%m-%d').strftime('%Y-%m-%d')

In [97]:
ventas_cluster = df_cluster(nits_clientes= lista_nits, fecha_final= fecha_final)

In [98]:
ventas_cluster

Unnamed: 0_level_0,Ventas
Fecha,Unnamed: 1_level_1
2018-01-01,0
2018-02-01,57325910
2018-03-01,42296947
2018-04-01,61992674
2018-05-01,61996231
...,...
2022-11-01,485318911
2022-12-01,423495241
2023-01-01,516162793
2023-02-01,478651558


In [103]:
s = setup(ventas_cluster, fh = 6, session_id = 123)

In [104]:
best = compare_models()

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
snaive,Seasonal Naive Forecaster,0.1816,0.1146,16563919.1111,18951206.7076,0.0326,0.0323,-1.93,0.0233
naive,Naive Forecaster,0.2041,0.1264,18876662.0,21062274.0853,0.0374,0.0368,-2.4037,0.0333
auto_arima,Auto ARIMA,0.2867,0.1905,27491691.871,32178422.5448,0.0535,0.0558,-7.2658,2.03
theta,Theta Forecaster,0.3401,0.2085,31663104.3222,34703689.3537,0.0628,0.0603,-8.3429,0.03
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.4145,0.2813,37871288.9337,46577182.2134,0.0745,0.0735,-17.0705,0.2333
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.4416,0.2931,40936550.7765,48642950.6495,0.0801,0.0791,-17.7636,0.1633
exp_smooth,Exponential Smoothing,0.4843,0.2957,46744926.9088,50269284.3748,0.0923,0.0863,-19.0188,0.1
ets,ETS,0.4862,0.2999,46950782.0828,50981850.5865,0.0927,0.0866,-19.5198,0.0433
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.499,0.3221,45660984.4882,53038284.4289,0.0898,0.085,-24.493,0.1133
arima,ARIMA,0.5288,0.3289,49520312.9711,54912679.7972,0.0979,0.0918,-21.8827,0.0633


In [106]:
plot_model(best, plot = 'forecast')

In [107]:
plot_model(best, plot = 'diagnostics')

In [108]:
plot_model(best, plot = 'insample')