In [1]:
%pylab inline
import pandas as pd

from dataManager import DataManager
from model import ModelManager as manager

from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error as mse

import plotly.graph_objects as go

Populating the interactive namespace from numpy and matplotlib


In [2]:
promedios = DataManager().sales_prod.copy()
promedios = promedios.groupby(['REF']).agg({'PRECIO':'mean','DESCUENTO(%)':'mean','DESC_LARGA':'first'})[['PRECIO','DESCUENTO(%)']]

covid = DataManager().sales_ref_month_sin_ventas_mayores()[['DATE','F_COVID']].drop_duplicates()
covid = covid.set_index('DATE')

In [3]:
prods = data = DataManager().products.drop_duplicates().copy()
prods['AREA'] = prods.ANCHO * prods.FONDO

data = DataManager().sales_ref_month_sin_ventas_mayores()
data['DATE'] = data['ANIO'].astype(str) + '-' + data['MES'].astype(str).str.zfill(2)

pasado = data.pivot_table(index='REF',columns=['DATE','ANIO','MES','TIENDA'],values='CANTIDAD',aggfunc='sum').reset_index()
pasado = pd.melt(pasado,id_vars='REF')

pasado = pasado.sort_values(['REF','DATE'])
pasado = pasado.rename(columns={'value':'CANTIDAD'})
pasado = pasado.reset_index(drop=True).fillna(0)

pasado = pasado.merge(data.drop(columns=['CANTIDAD','ANIO','MES']).groupby(['REF','DATE','TIENDA']).agg({'PRECIO':'mean','DESCUENTO(%)':'mean','F_COVID':'first'}),on=['REF','DATE','TIENDA'],how='left',validate='1:1')
pasado = pasado[['REF','TIENDA','DATE','ANIO','MES','CANTIDAD','PRECIO','DESCUENTO(%)','F_COVID']]

pasado = pasado.set_index('DATE')
pasado.update(covid)
pasado.reset_index(inplace=True)

pasado = pasado.set_index('REF')
pasado.update(promedios, overwrite=False)
pasado.reset_index(inplace=True)

pasado = pasado.merge(prods,on='REF',validate='m:1')
pasado = pasado.sort_values(['ANIO','MES']).reset_index(drop=True)

In [4]:
sales = pasado.copy().query('VIGENCIA != "DESCONTINUADO"')
sales = sales.groupby(['REF','TIENDA']).agg({'PRECIO':'mean','DESCUENTO(%)':'mean',
    'AREA':'first','ALTO':'first','PUESTOS':'first', 'COLOR_POS':'first', 
    'SUBCATEGORIA_POS':'first','MATERIAL_POS':'first','ACABADO':'first',
    'CATEGORIA':'first','ORIGEN':'first'}
).reset_index()

# 2021 future months and covid
months = [5,6,7,8,9,10,11,12]
covid = [0,0,0,0,0,0,0,0]
min_sales = sales[['REF','TIENDA']].copy()
for m,c in zip(months,covid):
    min_sales[m]=c

melt_sales=pd.melt(min_sales,id_vars=['REF','TIENDA'],var_name='MES',value_name='F_COVID')
futuro=melt_sales.merge(sales,on=['REF','TIENDA'],how='left',validate='m:1')

futuro['ANIO'] = 2021
futuro['DATE'] = futuro['ANIO'].astype(str) + '-' +futuro['MES'].astype(str).str.zfill(2)
futuro.sort_values(['ANIO','MES']).reset_index(drop=True)

futuro = futuro[['REF','TIENDA','DATE','ANIO','MES','PRECIO','DESCUENTO(%)','F_COVID','AREA','ALTO','PUESTOS','COLOR_POS','SUBCATEGORIA_POS','MATERIAL_POS','ACABADO','ORIGEN']]
futuro[(futuro.REF == 'BR0002:00193:') & (futuro.TIENDA == 'PAGINA WEB FIOTTI')]
futuro = futuro.fillna(0)

In [5]:
total = pd.concat([pasado.drop(columns='CANTIDAD').copy(),futuro.copy()]).reset_index(drop=True)
particion = len(futuro)

In [6]:
scaler = MinMaxScaler()

num=['AREA','ALTO','DESCUENTO(%)','PRECIO','CANTIDAD']
x_num=total[num[:-1]].astype('float')
x_norm = scaler.fit_transform(x_num)

cat=[
    'TIENDA','MES',
    'F_COVID','PUESTOS','COLOR_POS','SUBCATEGORIA_POS',
    'MATERIAL_POS','ACABADO','ORIGEN'
]
x_cat=total[cat].astype('category')
x_dummies=pd.get_dummies(x_cat)

x_tot = np.append(x_num,x_dummies,axis=1)
x_past = x_tot[:-particion]
y_past = pasado['CANTIDAD']

In [7]:
model = GradientBoostingRegressor(**{'learning_rate': 0.01, 'max_depth': 6, 'n_estimators': 200})
model.fit(x_past,y_past)

GradientBoostingRegressor(learning_rate=0.01, max_depth=6, n_estimators=200)

In [8]:
x_fut = x_tot[-particion:]
y_fut = model.predict(x_fut)

In [12]:
pasado['PREDICTED'] = model.predict(x_past)

d_futuro = futuro.groupby(['DATE']).sum().reset_index()
d_pasado = pasado.groupby(['DATE']).sum().reset_index()
defi = d_futuro#.query('REF=="D00935:00048:00048"')
fig = go.Figure()
fig.add_scatter(x=defi['DATE'], y=defi['PREDICTED'], mode='lines', name='Valores predichos')
fig.add_scatter(x=d_pasado['DATE'], y=d_pasado['PREDICTED'], mode='lines', name='Valores predichos')
fig.add_scatter(x=d_pasado['DATE'], y=d_pasado['CANTIDAD'], mode='lines', name='Valores reales')


In [46]:
import statsmodels.api as sm

mod = sm.OLS(y, sm.add_constant(x))
res = mod.fit()
res.summary()

0,1,2,3
Dep. Variable:,CANTIDAD,R-squared:,0.035
Model:,OLS,Adj. R-squared:,0.035
Method:,Least Squares,F-statistic:,187.3
Date:,"Wed, 01 Sep 2021",Prob (F-statistic):,0.0
Time:,19:14:20,Log-Likelihood:,-510140.0
No. Observations:,382165,AIC:,1020000.0
Df Residuals:,382089,BIC:,1021000.0
Df Model:,75,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0716,0.007,10.396,0.000,0.058,0.085
x1,6.898e-07,3.64e-07,1.897,0.058,-2.29e-08,1.4e-06
x2,0.0006,8.24e-05,6.793,0.000,0.000,0.001
x3,-0.1923,0.012,-16.093,0.000,-0.216,-0.169
x4,-5.082e-08,4.85e-09,-10.474,0.000,-6.03e-08,-4.13e-08
x5,-0.0403,0.005,-8.051,0.000,-0.050,-0.030
x6,-0.0233,0.005,-4.730,0.000,-0.033,-0.014
x7,0.4166,0.005,84.637,0.000,0.407,0.426
x8,-0.0923,0.012,-7.429,0.000,-0.117,-0.068

0,1,2,3
Omnibus:,907013.301,Durbin-Watson:,1.763
Prob(Omnibus):,0.0,Jarque-Bera (JB):,21489072251.935
Skew:,23.85,Prob(JB):,0.0
Kurtosis:,1163.707,Cond. No.,5.66e+19


In [48]:
pred_past_test = pasado.copy()
pred_past_test['PREDICTED'] = res.predict(sm.add_constant(x,has_constant='add'))

pred_fut_test = futuro.copy()
pred_fut_test['PREDICTED'] = res.predict(sm.add_constant(x_fut,has_constant='add'))

In [49]:
d_pasado = pred_past_test.groupby(['DATE']).sum().reset_index()
d_futuro = pred_fut_test.groupby(['DATE']).sum().reset_index()

fig = go.Figure()
fig.add_scatter(x=d_pasado['DATE'], y=d_pasado['PREDICTED'], mode='lines', name='Valores predichos')
fig.add_scatter(x=d_pasado['DATE'], y=d_pasado['CANTIDAD'], mode='lines', name='Valores reales')
fig.add_scatter(x=d_futuro['DATE'], y=d_futuro['PREDICTED'], mode='lines', name='Valores futuros')