In [None]:
%pylab inline
import pandas as pd

from dataManager import DataManager
from model import ModelManager as manager

from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error as mse

import plotly.graph_objects as go

In [None]:
promedios = DataManager().sales_prod.copy()
promedios = promedios.groupby(['REF']).agg({'PRECIO':'mean','DESCUENTO(%)':'mean','DESC_LARGA':'first'})[['PRECIO','DESCUENTO(%)']]
promedios

covid = DataManager().sales_ref_month_sin_ventas_mayores()[['DATE','F_COVID']].drop_duplicates()
covid = covid.set_index('DATE')
covid

In [None]:
prods = data = DataManager().products.drop_duplicates().copy()
prods['AREA'] = prods.ANCHO * prods.FONDO

data = DataManager().sales_ref_month_sin_ventas_mayores()
data['DATE'] = data['ANIO'].astype(str) + '-' + data['MES'].astype(str).str.zfill(2)

pasado = data.pivot_table(index='REF',columns=['DATE','ANIO','MES','TIENDA'],values='CANTIDAD',aggfunc='sum').reset_index()
pasado = pd.melt(pasado,id_vars='REF')

pasado = pasado.sort_values(['REF','DATE'])
pasado = pasado.rename(columns={'value':'CANTIDAD'})
pasado = pasado.reset_index(drop=True).fillna(0)

pasado = pasado.merge(data.drop(columns=['CANTIDAD','ANIO','MES']).groupby(['REF','DATE','TIENDA']).agg({'PRECIO':'mean','DESCUENTO(%)':'mean','F_COVID':'first'}),on=['REF','DATE','TIENDA'],how='left',validate='1:1')
pasado = pasado[['REF','TIENDA','DATE','ANIO','MES','CANTIDAD','PRECIO','DESCUENTO(%)','F_COVID']]

pasado = pasado.set_index('DATE')
pasado.update(covid)
pasado.reset_index(inplace=True)

pasado = pasado.set_index('REF')
pasado.update(promedios, overwrite=False)
pasado.reset_index(inplace=True)

pasado = pasado.merge(prods,on='REF',validate='m:1')
pasado = pasado.sort_values(['ANIO','MES']).reset_index(drop=True)

pasado

In [None]:
sales = pasado.copy().query('VIGENCIA != "DESCONTINUADO"')
sales = sales.groupby(['REF','TIENDA']).agg({'PRECIO':'mean','DESCUENTO(%)':'mean',
    'AREA':'first','ALTO':'first','PUESTOS':'first', 'COLOR_POS':'first', 
    'SUBCATEGORIA_POS':'first','MATERIAL_POS':'first','ACABADO':'first',
    'CATEGORIA':'first','ORIGEN':'first'}
).reset_index()

# 2021 future months and covid
months = [5,6,7,8,9,10,11,12]
covid = [1,1,1,1,1,1,1,1]
min_sales = sales[['REF','TIENDA']].copy()
for m,c in zip(months,covid):
    min_sales[m]=c

melt_sales=pd.melt(min_sales,id_vars=['REF','TIENDA'],var_name='MES',value_name='F_COVID')
futuro=melt_sales.merge(sales,on=['REF','TIENDA'],how='left',validate='m:1')

futuro['ANIO'] = 2021
futuro['DATE'] = futuro['ANIO'].astype(str) + '-' +futuro['MES'].astype(str).str.zfill(2)
futuro.sort_values(['ANIO','MES']).reset_index(drop=True)

futuro = futuro[['REF','TIENDA','DATE','ANIO','MES','PRECIO','DESCUENTO(%)','F_COVID','AREA','ALTO','PUESTOS','COLOR_POS','SUBCATEGORIA_POS','MATERIAL_POS','ACABADO','ORIGEN']]
futuro[(futuro.REF == 'BR0002:00193:') & (futuro.TIENDA == 'PAGINA WEB FIOTTI')]
futuro = futuro.fillna(0)
futuro

In [None]:
total = pd.concat([pasado.drop(columns='CANTIDAD').copy(),futuro.copy()]).reset_index(drop=True)
particion = len(futuro)
total[:-particion]

In [None]:
scaler = MinMaxScaler()

num=['AREA','ALTO','DESCUENTO(%)','PRECIO','CANTIDAD']
x_num=total[num[:-1]].astype('float')
x_norm = scaler.fit_transform(x_num)

cat=[
    'TIENDA','MES',
    'F_COVID','PUESTOS','COLOR_POS','SUBCATEGORIA_POS',
    'MATERIAL_POS','ACABADO','ORIGEN'
]
x_cat=total[cat].astype('category')
x_dummies=pd.get_dummies(x_cat)

x_tot = np.append(x_num,x_dummies,axis=1)
x = x_tot[:-particion]
y = pasado['CANTIDAD']

model = GradientBoostingRegressor(**{'learning_rate': 0.01, 'max_depth': 6, 'n_estimators': 200})
model.fit(x,y)

In [None]:
x = x_tot[-particion:]
y = model.predict(x)

In [None]:
futuro['PREDICTED'] = y.round()
futuro

In [None]:
futuro['PREDICTED_M'] = (futuro.PREDICTED).round()

d_futuro = futuro.groupby(['DATE']).sum().reset_index()
d_pasado = pasado.groupby(['DATE']).sum().reset_index()
defi = d_futuro#.query('REF=="D00935:00048:00048"')
fig = go.Figure()
fig.add_scatter(x=defi['DATE'], y=defi['PREDICTED'], mode='lines', name='Valores predichos')
fig.add_scatter(x=d_pasado['DATE'], y=d_pasado['CANTIDAD'], mode='lines', name='Valores reales')


In [None]:
n = len(pasado[(pasado.ANIO == 2021) & (pasado.MES == 4) & (pasado.CANTIDAD > 0)].REF.unique())
m = len(pasado.REF.unique())
print(n,m)

In [28]:
g = len(futuro[(futuro.ANIO == 2021) & (futuro.MES == 5) & (futuro.PREDICTED > 0)].REF.unique())
h = len(futuro.REF.unique())
print(g,h)

98 584


In [29]:
test = futuro[futuro.MES == 5].PREDICTED.value_counts().to_frame().reset_index().rename(columns={'index':'VALUE'})
test

Unnamed: 0,VALUE,PREDICTED
0,0.0,8035
1,1.0,112
2,2.0,13
3,3.0,12
4,5.0,2
5,4.0,1
6,10.0,1


In [22]:
pred_past_test = pasado.copy()
pred_past_test['PREDICTED'] = model.predict(x_tot[:-particion])
pred_past_test

Unnamed: 0,REF,DATE,TIENDA,ANIO,MES,CANTIDAD,PRECIO,DESCUENTO(%),F_COVID,ITEM,...,COLOR,ANCHO,ALTO,FONDO,DESC_LARGA,SUBCATEGORIA_POS,COLOR_POS,MATERIAL_POS,AREA,PREDICTED
0,A01040:00005:,2019-01,ADMINISTRACION,2019,1,0.0,145210.21875,0.65000,0.0,11057,...,NEGRO,40.0,77.0,47.0,SILLA COMEDOR 85-1062 NG NO GARANTIA,SILLAS DE COMEDOR,NEGRO,SINTÉTICO,1880.0,0.084206
1,A01040:00005:,2019-01,PAGINA WEB FIOTTI,2019,1,0.0,145210.21875,0.65000,0.0,11057,...,NEGRO,40.0,77.0,47.0,SILLA COMEDOR 85-1062 NG NO GARANTIA,SILLAS DE COMEDOR,NEGRO,SINTÉTICO,1880.0,0.084206
2,A01040:00005:,2019-01,PUNTO DE VENTA AV 68,2019,1,0.0,145210.21875,0.65000,0.0,11057,...,NEGRO,40.0,77.0,47.0,SILLA COMEDOR 85-1062 NG NO GARANTIA,SILLAS DE COMEDOR,NEGRO,SINTÉTICO,1880.0,0.796419
3,A01040:00005:,2019-01,PUNTO DE VENTA BUCARAMANGA,2019,1,0.0,145210.21875,0.65000,0.0,11057,...,NEGRO,40.0,77.0,47.0,SILLA COMEDOR 85-1062 NG NO GARANTIA,SILLAS DE COMEDOR,NEGRO,SINTÉTICO,1880.0,0.084206
4,A01040:00005:,2019-01,PUNTO DE VENTA CALI,2019,1,0.0,145210.21875,0.65000,0.0,11057,...,NEGRO,40.0,77.0,47.0,SILLA COMEDOR 85-1062 NG NO GARANTIA,SILLAS DE COMEDOR,NEGRO,SINTÉTICO,1880.0,0.086559
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382160,XZ0993:00100:,2021-04,PUNTO DE VENTA PLAZA CENTRAL,2021,4,0.0,484316.47619,0.48131,1.0,11151,...,BLANCO,100.0,44.0,50.0,MESA CENTRO FILIS LACA BL 2C LIQUIDACIÓN,MESAS DE CENTRO,BLANCO,VIDRIO,5000.0,0.053286
382161,XZ0993:00100:,2021-04,PUNTO DE VENTA PLAZA IMPERIAL,2021,4,0.0,484316.47619,0.48131,1.0,11151,...,BLANCO,100.0,44.0,50.0,MESA CENTRO FILIS LACA BL 2C LIQUIDACIÓN,MESAS DE CENTRO,BLANCO,VIDRIO,5000.0,0.053286
382162,XZ0993:00100:,2021-04,PUNTO DE VENTA POBLADO,2021,4,0.0,484316.47619,0.48131,1.0,11151,...,BLANCO,100.0,44.0,50.0,MESA CENTRO FILIS LACA BL 2C LIQUIDACIÓN,MESAS DE CENTRO,BLANCO,VIDRIO,5000.0,0.053286
382163,XZ0993:00100:,2021-04,VENTAS DIGITALES,2021,4,0.0,484316.47619,0.48131,1.0,11151,...,BLANCO,100.0,44.0,50.0,MESA CENTRO FILIS LACA BL 2C LIQUIDACIÓN,MESAS DE CENTRO,BLANCO,VIDRIO,5000.0,0.053286


In [27]:
defi = pred_past_test.groupby(['DATE']).sum().reset_index()

fig = go.Figure()
fig.add_scatter(x=defi['DATE'], y=defi['PREDICTED'], mode='lines', name='Valores predichos')
fig.add_scatter(x=defi['DATE'], y=defi['CANTIDAD'], mode='lines', name='Valores reales')
fig.add_scatter(x=d_futuro['DATE'], y=d_futuro['PREDICTED'], mode='lines', name='Valores futuros')

In [30]:
import statsmodels.api as sm

mod = sm.OLS(pasado['CANTIDAD'], sm.add_constant(x_tot[:-particion]))
res = mod.fit()
res.summary()