## Recomendaciones basadas en similitud de productos

In [1]:
#imports
import pandas as pd
import numpy as np

from scipy.spatial.distance import pdist, squareform

from datetime import date
import datetime
import warnings
import locale

locale.setlocale(locale.LC_TIME, 'es_MX.UTF-8')
warnings.filterwarnings("ignore")

## Consiguiendo datos

In [2]:
def LimpiarDatos(datos: pd.DataFrame):
    #Cambiando formato fecha
    datos['fecha'] = datos['fecha'].dt.strftime('%d/%m/%Y')
    
    #Quitando columnas extras
    for column in datos.columns:
        if column not in ['fecha','cliente','cantidad','producto','precio_u','importe']:
            datos.drop(column,
                       axis = 1,
                       inplace = True)
            
    #Quitando filas vacias
    datos.dropna(how='all', 
                 inplace = True)
    return datos

In [3]:
ventas_enero = pd.ExcelFile("Enero/ventas_diarias_Enero.xlsm", 
                            engine = "openpyxl")
ventas_febrero = pd.ExcelFile("Febrero/ventas_diarias_Febrero.xlsm", 
                            engine = "openpyxl")
sheets_enero = [sheet for sheet in ventas_enero.sheet_names if "_20" in sheet]
sheets_febrero = [sheet for sheet in ventas_febrero.sheet_names if "_20" in sheet]

print(sheets_enero)
print(sheets_febrero)

data = pd.DataFrame()

for sheet in sheets_enero:
    venta = LimpiarDatos(ventas_enero.parse(sheet))
    data = data.append(venta)

for sheet in sheets_febrero:
    venta = LimpiarDatos(ventas_febrero.parse(sheet))
    data = data.append(venta)  
    
display(data)

['02_01_2021', '03_01_2021', '04_01_2021', '05_01_2021', '06_01_2021', '07_01_2021', '08_01_2021', '09_01_2021', '10_01_2021', '11_01_2021', '12_01_2021', '13_01_2021', '14_01_2021', '15_01_2021', '16_01_2021', '17_01_2021', '18_01_2021', '19_01_2021', '20_01_2021', '21_01_2021', '22_01_2021', '23_01_2021', '24_01_2021', '25_01_2021', '26_01_2021', '27_01_2021', '28_01_2021', '29_01_2021', '30_01_2021', '31_01_2021']
['01_02_2021', '02_02_2021', '03_02_2021', '04_02_2021', '05_02_2021', '06_02_2021', '07_02_2021', '08_02_2021', '09_02_2021', '10_02_2021', '11_02_2021', '12_02_2021', '13_02_2021', '14_02_2021', '15_02_2021', '16_02_2021', '17_02_2021', '18_02_2021', '19_02_2021', '20_02_2021', '21_02_2021', '22_02_2021', '23_02_2021', ' 24_02_2021', '25_02_2021', '26_02_2021', '27_02_2021', '28_02_2021']


Unnamed: 0,fecha,cliente,cantidad,producto,precio_u,importe
0,02/01/2021,Salinas,30.80,pechuga,70.0,2156.0
1,02/01/2021,Salinas,20.40,pierna con muslo,43.0,877.2
2,02/01/2021,Salinas,10.50,retazo,35.0,367.5
3,02/01/2021,Jesus Aguinaga,7.30,pierna con muslo,45.0,328.5
4,02/01/2021,Jesus Aguinaga,4.10,pechuga,80.0,328.0
...,...,...,...,...,...,...
0,28/02/2021,Abarrotes Betito,4.00,pechuga,80.0,320.0
1,28/02/2021,Tramonte 87,2.20,tilapia,78.0,171.6
2,28/02/2021,Tramonte 87,1.10,pechuga,95.0,104.5
3,28/02/2021,Tramonte 87,2.40,muslo,60.0,144.0


In [4]:
productos_cliente = data.groupby(['producto','cliente',]).agg({'cantidad':'sum'}).reset_index()

In [5]:
productos_cliente = data.groupby(['producto','cliente',]).agg({'cantidad':'sum'}).reset_index()
productos_cliente['producto'] = productos_cliente['producto'].str.strip()
productos_cliente['producto'].sort_values().unique()

array(['ala', 'ala adobada', 'ala natural', 'blancio', 'blanco de nilo',
       'boneless', 'chile', 'consome', 'corazon', 'hambuergesa de pollo',
       'hamburgesa de arrachera', 'hamburgesa de pollo',
       'hamburgesa de res', 'higado', 'huacal', 'huacal con ala', 'huevo',
       'longaniza', 'milanesa', 'mole rojo', 'mole verde', 'molida',
       'molleja', 'msulo', 'muslo', 'nuggets', 'pacotilla',
       'palomita de pollo', 'papa francesa', 'papa gajo', 'papa ondulada',
       'papa recta', 'pata', 'pechuga', 'pechuga caja', 'pierna',
       'pierna con muslo', 'pollo entero', 'pulpa', 'rabadilla', 'retazo',
       'retazoi', 'salmon', 'tenders', 'tilapia', 'tira de pollo',
       'tira empanizada', 'tronco'], dtype=object)

In [7]:
productos_cliente_pivot = productos_cliente.pivot_table(values='cantidad', 
                                                columns='cliente', 
                                                index='producto', 
                                                aggfunc='sum').fillna(0)

productos_cliente_pivot = productos_cliente_pivot.div(productos_cliente_pivot.sum(axis=1), axis=0)
productos_cliente_pivot.head(3)

cliente,Abarrotes Betito,Abarrotes Betito Mama,Aby's,Alejandra,Alfonso Ceja,Almendro 57,Antonio Ceja,Armando,Bosque 135,Cabaña 46,...,Salinas,Soki,Sra. Ana Patiño,Sra. Gela,Sra. Maria,Sra. Maru,Tramonte 87,Tramonte 87,Vanesa,Xajal 232
producto,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abarrotes Betito,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Abarrotes Betito Mama,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Aby's,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
prod_dist = pd.DataFrame(1/(1 + squareform(pdist(productos_cliente_pivot, 'euclidean'))),
                         index=productos_cliente_pivot.index, columns=productos_cliente_pivot.index)
prod_dist.head(3)

producto,ala,ala adobada,ala natural,aplanada,bistec,blancio,blanco de nilo,boneless,chile,consome,...,retazo,retazoi,salmon,tender,tenders,tilapia,tira,tira de pollo,tira empanizada,tronco
producto,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ala,1.0,0.582823,0.581772,0.466399,0.501104,0.466399,0.51816,0.466399,0.572208,0.466399,...,0.543567,0.466399,0.661962,0.466399,0.563299,0.616944,0.466399,0.466399,0.466399,0.591236
ala adobada,0.582823,1.0,0.661286,0.476871,0.515168,0.476871,0.534445,0.476871,0.476871,0.476871,...,0.530353,0.476871,0.598911,0.476871,0.574622,0.615203,0.476871,0.482043,0.476871,0.625682
ala natural,0.581772,0.661286,1.0,0.466349,0.502595,0.469138,0.518101,0.466349,0.486402,0.466349,...,0.520231,0.469138,0.585682,0.467248,0.635722,0.592307,0.466349,0.466349,0.466349,0.591176


In [31]:
customers = list(data['cliente'].unique())

recomendaciones = {}
    
for customer in customers:
    productos_similares = list()
    
    productos_comprados = list(productos_cliente['producto'][productos_cliente['cliente'] == customer].unique()) #products purchased by customer
    for product in productos_comprados:
        productos_similares.append(prod_dist[product].sort_values(ascending=False)[1:6].index.to_list()) #all items to recommend
    
    #flatten
    productos_similares = [item for sublist in productos_similares for item in sublist]
    
    #filter: Removing items already purchased
    for product in productos_comprados:
        if product in productos_similares:
            productos_similares.remove(product)
            
    #top 5 to recommend
    recs_products = list(pd.Series(productos_similares).value_counts()[:5].index)

    #creation dictionary
    recomendaciones[customer] = recs_products
        
recomendaciones_clientes = pd.DataFrame.from_dict(recomendaciones, orient='index').reset_index()
recomendaciones_clientes.columns = ['cliente', 'Recomendacion 1', 'Recomendacion 2', 'Recomendacion 3', 'Recomendacion 4', 'Recomendacion 5']
recomendaciones_clientes.sort_values('cliente', inplace=True, ignore_index=True)  
display(recomendaciones_clientes)

Unnamed: 0,cliente,Recomendacion 1,Recomendacion 2,Recomendacion 3,Recomendacion 4,Recomendacion 5
0,Abarrotes Betito,huevo,hamburgesa de pollo,pierna con muslo,ala,pollo entero
1,Abarrotes Betito Mama,pechuga,hamburgesa de pollo,tronco,pierna con muslo,ala
2,Aby's,huevo,hamburgesa de pollo,muslo,ala,pierna con muslo
3,Alejandra,huevo,retazo,muslo,ala,pechuga
4,Alfonso Ceja,muslo,ala,retazo,huevo,ala adobada
...,...,...,...,...,...,...
82,Sra. Maru,huevo,muslo,pierna con muslo,hamburgesa de pollo,retazo
83,Tramonte 87,huevo,hamburgesa de pollo,pierna con muslo,retazo,tilapia
84,Tramonte 87,,,,,
85,Vanesa,huevo,pierna con muslo,muslo,hamburgesa de pollo,ala


In [32]:
recomendaciones_clientes.to_excel("Recomendaciones_Clientes.xlsx" 
                                  , sheet_name='Recomendacion para clientes',
                                 index=False)