In [1]:
import pandas as pd
import numpy as np
import time
import turicreate as tc
from sklearn.model_selection import train_test_split
from sqlalchemy import types, create_engine, insert, exc
import pymysql

---
# Extracting Data from MySQL DB

In [61]:
engine = create_engine('mysql+pymysql://root:Constellation_96502@127.0.0.1:3306/pollosnpollos_regs')
data = pd.read_sql(f"SELECT * FROM ventas LIMIT 0, 10000;", 
                   engine, 
                   parse_dates = {'fecha': '%Y-%m-%d'})  

data['fecha'] = data['fecha'].dt.date

In [62]:
data.columns

Index(['fecha', 'cliente', 'cantidad', 'producto', 'precio_u', 'importe'], dtype='object')

In [63]:
clientes = data['cliente'].sort_values().unique()
display(clientes)

array(['A. CEJA', 'ABARROTES BETITO', 'ABARROTES BETITO MAMA', "ABY'S",
       'ALEJANDRA', 'ALFONSO', 'ALFONSO CEJA', 'ALMENDRO 57',
       'ANTONIO CEJA', 'ARMANDO', 'BETITO', 'BOSQUE 135', 'CABAÑA 46',
       'CABAÑA 46 ', 'CABAÑA 47', 'CABAÑA 48', 'CABAÑA 49', 'CABAÑA 50',
       'CAMPIÑA', 'CAMPIÑA 165', 'CAMPIÑA 166', 'CAMPIÑA 167',
       'CAMPIÑA 168', 'CAMPIÑA 71', 'CAMPIÑA 80', 'CAMPIÑA 80 ',
       'CAMPIÑA 81', 'CARLOS', 'CARLOS DIAZ', 'CARLOS JUGOS',
       'CARLOS LUGO', 'CASA RIVAS', 'CHUCHO', 'CIENEGA ', 'CIENEGA 7 ',
       'CIENEGA 8', 'CIENEGA 8 ', 'CIENEGA 9', 'CLUB HACIENDA', 'CORONA',
       'CRISTOBAL', 'DANIEL', 'DELICIAS', 'DRA. ABARCA', 'EL TACOTE',
       'EL VENADO', 'ELSA FRIAS', 'ERNESTO ', 'ERNESTO 31', 'F. SANCHEZ',
       'FANY', 'FCO PAPA', 'FCO SANCHEZ', 'FCO. SANCHEZ',
       'FRANCISCO IRENE', 'FRANCISCO SANCHEZ',
       'FRANCISCO SANCHEZ HERMANO', 'FRANCISCO SANCHEZ PAPA', 'GELA',
       'HACIENDA DE LA GAVIA 121', 'HUMBERTO', 'J. AGUINAGA',
     

In [80]:
ventas = data.groupby(['cliente','producto'], 
                      as_index = False)['importe'].count()
ventas.columns = ['cliente','producto','purchase_count']

In [81]:
ventas.sort_values(by="purchase_count", ascending=False)

Unnamed: 0,cliente,producto,purchase_count
254,LA SUIZA,PECHUGA,61
396,SALINAS,PECHUGA,60
399,SALINAS,RETAZO,56
255,LA SUIZA,PIERNA CON MUSLO,55
397,SALINAS,PIERNA CON MUSLO,51
...,...,...,...
179,ELSA FRIAS,PATA,1
178,EL VENADO,PIERNA CON MUSLO,1
176,EL VENADO,MUSLO,1
174,EL TACOTE,RETAZO,1


In [82]:
def create_data_dummy(data):
    data_dummy = data.copy()
    data_dummy['purchase_dummy'] = 1
    return data_dummy

data_dummy = create_data_dummy(ventas)

In [83]:
data_dummy

Unnamed: 0,cliente,producto,purchase_count,purchase_dummy
0,A. CEJA,P.MUSLO,1,1
1,A. CEJA,PECHUGA,1,1
2,ABARROTES BETITO,ALA NATURAL,1,1
3,ABARROTES BETITO,MOLE ROJO,1,1
4,ABARROTES BETITO,MUSLO,1,1
...,...,...,...,...
446,VANESA,TRONCO,5,1
447,XAJAL 232,CONSOME,1,1
448,XAJAL 232,MOLE ROJO,1,1
449,XAJAL 232,MUSLO,1,1


In [104]:
df_matrix = pd.pivot_table(ventas, values='purchase_count', index='cliente', columns='producto')

In [105]:
df_matrix_norm = (df_matrix-df_matrix.min())/(df_matrix.max()-df_matrix.min())

In [106]:
df_matrix_norm

producto,ADOBADA,ALA ADOBADA,ALA NATURAL,BCO NILO,BLANCO DE NILO,BLANCO NILO,BONELESS,CHILE,CONSOME,CORAZON,...,PULPA,RABADILLA,RETAZO,SALMON,TENDERS,TILAPIA,TIRA,TIRA DE POLLO,TIRA EMPANIZADA,TRONCO
cliente,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A. CEJA,,,,,,,,,,,...,,,,,,,,,,
ABARROTES BETITO,,,0.0,,,,,,,,...,,,,,,,,,,
ABARROTES BETITO MAMA,,,,,,,,,,,...,,,,,,,,,,
ABY'S,,0.166667,0.0,,,,,,,,...,0.130435,,,,,,,,,0.10
ALEJANDRA,,,,,,,,,,,...,0.021739,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TRAMONTE,,,,,,,,,,,...,,,,,,0.000000,,,,0.00
TRAMONTE 87,,,,,,,,,,,...,,,,,,0.538462,,,,0.25
TRAMONTE 87,,,,,,,,,,,...,,,,,,,,,,0.00
VANESA,,0.166667,,,,,,,,,...,0.260870,,,,,,,,,0.20


In [107]:
d = df_matrix_norm.reset_index() 
d.index.names = ['scaled_purchase_freq'] 
data_norm = pd.melt(d, 
                    id_vars=['cliente'], 
                    value_name='scaled_purchase_freq').dropna()
print(data_norm.shape)
data_norm.head()

(412, 3)


Unnamed: 0,cliente,producto,scaled_purchase_freq
112,ABY'S,ALA ADOBADA,0.166667
118,ARMANDO,ALA ADOBADA,0.0
120,BOSQUE 135,ALA ADOBADA,0.166667
136,CARLOS,ALA ADOBADA,0.0
137,CARLOS DIAZ,ALA ADOBADA,1.0


In [108]:
def split_data(data):
    '''
    Splits dataset into training and test set.
    
    Args:
        data (pandas.DataFrame)
        
    Returns
        train_data (tc.SFrame)
        test_data (tc.SFrame)
    '''
    train, test = train_test_split(data, test_size = .2)
    train_data = tc.SFrame(train)
    test_data = tc.SFrame(test)
    return train_data, test_data

In [109]:
train_data, test_data = split_data(ventas)
train_data_dummy, test_data_dummy = split_data(data_dummy)
train_data_norm, test_data_norm = split_data(data_norm)

In [110]:
train_data_norm

cliente,producto,scaled_purchase_freq
ALEJANDRA,PULPA,0.0217391304347826
ALFONSO CEJA,PIERNA MUSLO,1.0
SRA. ANA PATIÑO,PECHUGA,0.0
DRA. ABARCA,ALA ADOBADA,0.0
ERNESTO,PECHUGA,0.0
FANY,ALA NATURAL,0.2857142857142857
ARMANDO,NUGGETS,0.0
ABARROTES BETITO,MUSLO,0.0
FRANCISCO SANCHEZ,MUSLO,0.1764705882352941
CABAÑA 46,BLANCO DE NILO,0.0


In [111]:
user_id = 'cliente'
item_id = 'producto'
users_to_recommend = list(clientes)
n_rec = 5 # number of items to recommend
n_display = 30 # to display the first few rows in an output dataset

In [112]:
def model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display):
    if name == 'popularity':
        model = tc.popularity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target)
    elif name == 'cosine':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='cosine')
    elif name == 'pearson':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='pearson')
        
    recom = model.recommend(users=users_to_recommend, k=n_rec)
    recom.print_rows(n_display)
    return model


In [113]:
name = 'popularity'
target = 'purchase_count'
popularity = model(train_data, 
                   name, 
                   user_id, 
                   item_id, 
                   target, 
                   users_to_recommend, 
                   n_rec, 
                   n_display)

+-----------------------+------------------+--------------------+------+
|        cliente        |     producto     |       score        | rank |
+-----------------------+------------------+--------------------+------+
|        A. CEJA        |      RETAZO      | 34.333333333333336 |  1   |
|        A. CEJA        |   POLLO ENTERO   |        7.0         |  2   |
|        A. CEJA        | PIERNA CON MUSLO | 6.305555555555555  |  3   |
|        A. CEJA        |      PULPA       | 5.470588235294118  |  4   |
|        A. CEJA        |   PECHUGA CAJA   |        4.0         |  5   |
|    ABARROTES BETITO   |      RETAZO      | 34.333333333333336 |  1   |
|    ABARROTES BETITO   |     PECHUGA      | 7.180327868852459  |  2   |
|    ABARROTES BETITO   |   POLLO ENTERO   |        7.0         |  3   |
|    ABARROTES BETITO   |      PULPA       | 5.470588235294118  |  4   |
|    ABARROTES BETITO   |   PECHUGA CAJA   |        4.0         |  5   |
| ABARROTES BETITO MAMA |      RETAZO      | 34.333

In [114]:
name = 'popularity'
target = 'scaled_purchase_freq'
pop_norm = model(train_data_norm, 
                 name, 
                 user_id, 
                 item_id, 
                 target, 
                 users_to_recommend, 
                 n_rec, 
                 n_display)

+-----------------------+-------------------+---------------------+------+
|        cliente        |      producto     |        score        | rank |
+-----------------------+-------------------+---------------------+------+
|        A. CEJA        |   PAPA ONDULADA   |         1.0         |  1   |
|        A. CEJA        |      PIERNA       |         1.0         |  2   |
|        A. CEJA        |     PAPA RECTA    |         1.0         |  3   |
|        A. CEJA        |   PAPA FRANCESA   |         0.5         |  4   |
|        A. CEJA        |      NUGGUETS     |         0.5         |  5   |
|    ABARROTES BETITO   |   PAPA ONDULADA   |         1.0         |  1   |
|    ABARROTES BETITO   |      PIERNA       |         1.0         |  2   |
|    ABARROTES BETITO   |     PAPA RECTA    |         1.0         |  3   |
|    ABARROTES BETITO   |      NUGGUETS     |         0.5         |  4   |
|    ABARROTES BETITO   | PALOMITA DE POLLO | 0.42857142857142855 |  5   |
| ABARROTES BETITO MAMA |

In [126]:
final_model = tc.item_similarity_recommender.create(tc.SFrame(data_norm), 
                                            user_id=user_id, 
                                            item_id=item_id, 
                                            target='scaled_purchase_freq', similarity_type='cosine')
recom = final_model.recommend(users=users_to_recommend, k=n_rec)
recom.print_rows(n_display)

+-----------------------+---------------------+----------------------+------+
|        cliente        |       producto      |        score         | rank |
+-----------------------+---------------------+----------------------+------+
|        A. CEJA        |        HIGADO       |         0.0          |  1   |
|        A. CEJA        | HAMBURGESA DE POLLO |         0.0          |  2   |
|        A. CEJA        |    BLANCO DE NILO   |         0.0          |  3   |
|        A. CEJA        |     ALA NATURAL     |         0.0          |  4   |
|        A. CEJA        |     ALA ADOBADA     |         0.0          |  5   |
|    ABARROTES BETITO   |        RETAZO       | 0.009231841564178467 |  1   |
|    ABARROTES BETITO   |       NUGGETS       | 0.005518734455108643 |  2   |
|    ABARROTES BETITO   |       MOLLEJA       | 0.005373334884643555 |  3   |
|    ABARROTES BETITO   |     POLLO ENTERO    | 0.005214512348175049 |  4   |
|    ABARROTES BETITO   |       TILAPIA       | 0.00466448068618

In [127]:
df_rec = recom.to_dataframe()
print(df_rec.shape)
df_rec.head()

(545, 4)


Unnamed: 0,cliente,producto,score,rank
0,A. CEJA,HIGADO,0.0,1
1,A. CEJA,HAMBURGESA DE POLLO,0.0,2
2,A. CEJA,BLANCO DE NILO,0.0,3
3,A. CEJA,ALA NATURAL,0.0,4
4,A. CEJA,ALA ADOBADA,0.0,5


In [128]:
name = 'cosine' 
target = 'scaled_purchase_freq' 
cos_norm = model(train_data_norm, 
                 name, user_id, 
                 item_id, 
                 target, 
                 users_to_recommend,
                 n_rec, 
                 n_display)

+-----------------------+------------------+-----------------------+------+
|        cliente        |     producto     |         score         | rank |
+-----------------------+------------------+-----------------------+------+
|        A. CEJA        |     NUGGETS      |          0.0          |  1   |
|        A. CEJA        |   ALA NATURAL    |          0.0          |  2   |
|        A. CEJA        |   ALA ADOBADA    |          0.0          |  3   |
|        A. CEJA        |   PIERNA MUSLO   |          0.0          |  4   |
|        A. CEJA        |      PULPA       |          0.0          |  5   |
|    ABARROTES BETITO   |     NUGGETS      |          0.0          |  1   |
|    ABARROTES BETITO   |   ALA ADOBADA    |          0.0          |  2   |
|    ABARROTES BETITO   |     PECHUGA      |          0.0          |  3   |
|    ABARROTES BETITO   |   PIERNA MUSLO   |          0.0          |  4   |
|    ABARROTES BETITO   |      PULPA       |          0.0          |  5   |
| ABARROTES 