## Recomendaciones basadas en similitud de productos

In [7]:
#imports
import pandas as pd
import numpy as np
import gspread
import pymysql
from sqlalchemy import types, create_engine, select,exc
from df2gspread import df2gspread as d2g
from oauth2client.service_account import ServiceAccountCredentials
from datetime import date
import datetime
import warnings
import locale
from scipy.spatial.distance import pdist, squareform

#Global Config
locale.setlocale(locale.LC_TIME, 'Spanish_Mexico')
locale.setlocale(locale.LC_TIME, 'es_MX.UTF-8')
warnings.filterwarnings("ignore")

In [3]:
#Google API Authorization
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']

google_key_file = 'service_key.json'

credentials = ServiceAccountCredentials.from_json_keyfile_name(google_key_file, scope)
gc = gspread.authorize(credentials)

## Consiguiendo datos

In [5]:
engine = create_engine('mysql+pymysql://root:Constellation_96502@127.0.0.1:3306/pollosnpollos_regs')
engine.connect()
# Extrae los datos de SQL
data = pd.read_sql(f"SELECT * FROM ventas LIMIT 100000;", 
                   engine, 
                   parse_dates = {'fecha': '%Y-%m-%d'})  
data['fecha'] = data['fecha'].dt.date

In [11]:
customer_products  = data.groupby(['cliente','producto'], 
                      as_index = False)['importe'].count()
customer_products.columns = ['cliente','producto','purchase_count']
customer_products.sort_values(by="purchase_count", ascending=False)

Unnamed: 0,cliente,producto,purchase_count
254,LA SUIZA,PECHUGA,61
396,SALINAS,PECHUGA,60
399,SALINAS,RETAZO,56
255,LA SUIZA,PIERNA CON MUSLO,55
397,SALINAS,PIERNA CON MUSLO,51
...,...,...,...
179,ELSA FRIAS,PATA,1
178,EL VENADO,PIERNA CON MUSLO,1
176,EL VENADO,MUSLO,1
174,EL TACOTE,RETAZO,1


In [16]:
cust_prod_pivot = customer_products.pivot_table(values='purchase_count', 
                                                columns='cliente', 
                                                index='producto', 
                                                aggfunc='sum').fillna(0)
cust_prod_pivot = cust_prod_pivot.T

cust_prod_pivot = cust_prod_pivot.div(cust_prod_pivot.sum(axis=1), axis=0)
cust_dist = pd.DataFrame(1/(1 + squareform(pdist(cust_prod_pivot, 'cosine'))),
                         index=cust_prod_pivot.index, columns=cust_prod_pivot.index)

In [28]:
recommendations = {}
customers = customer_products['cliente'].sort_values().unique()
for customer in customers:
    similar_cust = list(cust_dist[customer].sort_values(ascending=False)[1:].head().index)
    sim_cust_prod = customer_products[customer_products['cliente'].isin(similar_cust)]
    grouped = sim_cust_prod.groupby('producto').agg({'purchase_count':'sum'})
    ranked_products = grouped.sort_values('purchase_count', ascending=False).reset_index()

    merged = pd.merge(ranked_products, pd.DataFrame(cust_prod_pivot.T[customer]), on='producto')
    merged.columns = ['producto', 'purchase_count', 'comprado']
    recs = merged[merged['comprado']==0].head()
    recommendations[customer] = list(recs['producto'])

user_recs = pd.DataFrame.from_dict(recommendations, orient='index').reset_index()
user_recs.columns = ['Cliente', 'Recomendacion 1', 'Recomendacion 2', 'Recomendacion 3', 'Recomendacion 4', 'Recomendacion 5']
user_recs.dropna(subset=['Recomendacion 1', 'Recomendacion 2', 'Recomendacion 3', 'Recomendacion 4', 'Recomendacion 5'], 
                 how='all',
                 inplace=True)

In [30]:
user_recs.to_excel('Recomendaciones para clientes.xlsx', 'Recomendaciones',index = False)