# Let´s recommend!!

We will load the model that generate candidates and create a function that receives a customer as input and returns a top of N products to be recommended. We will evaluate the results afterwords.

In [36]:
import pandas as pd
import os, sys
import numpy as np
import seaborn as sns
import gc
import warnings
import matplotlib.pyplot as plt

from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding
from gensim.models import Word2Vec

In [37]:
path = os.path.join('../../Data/')
data = pd.read_csv(path + 'data_filtered_20190422.csv', sep = ';')
data_processed = pd.read_csv('data_final_20190524.csv')
data_processed['text'] = data_processed['text'].astype(str)

In [38]:
items_unique = data_processed.item_id.unique()
items_map = {i:val for i,val in enumerate(items_unique)}
items_map_inv = {val:i for i,val in enumerate(items_unique)}
items_map_text = data_processed.set_index('item_id_int').text.to_dict()

In [39]:
idx_customers_map = {i:val for i,val in enumerate(data_processed['customer_id_int'])}
idx_customers_df = pd.DataFrame({'idx': data_processed.index.values, 'customer_id_int': data_processed.customer_id_int})

In [40]:
data.head(3)

Unnamed: 0,item_id,availability_date,brand,category,name,price
0,A28233506,,Woman Limited El Corte Inglés,"['Moda', 'Mujer', 'Abrigos']",Abrigo masculino con textura de mujer,"{'final': 199, 'currency': 'EUR'}"
1,A29054782,,Woman Limited El Corte Inglés,"['Moda', 'Mujer', 'Abrigos']",Abrigo doble faz de mujer con cinturón a tono,"{'final': 149, 'currency': 'EUR'}"
2,A27354432,,Woman El Corte Inglés,"['Moda', 'Mujer', 'Abrigos']",Abrigo largo de antelina de mujer Woman El Cor...,"{'final': 89.99, 'currency': 'EUR'}"


In [41]:
data_processed.head(3)

Unnamed: 0,date,item_id,brand,PRICE,customer_id,text,item_age,customer_id_int,item_id_int,brand_id,score,score_original,power_price,power_score,power_item_age,sqrt_price,sqrt_score,sqrt_item_age
0,20190101,A26036172,tintoretto,0.002021,0,vestido mujer flor lazada,0.0,0,0,0,0.003465,8,4e-06,1.2e-05,0.0,0.044954,0.058867,0.0
1,20190115,A26036172,tintoretto,0.002021,0,vestido mujer flor lazada,0.0,0,0,0,0.003465,8,4e-06,1.2e-05,0.0,0.044954,0.058867,0.0
2,20190101,A26870590,fórmula joven,0.001428,0,vestido laminado mujer formula joven escote pico,0.0,0,1,1,0.001485,4,2e-06,2e-06,0.0,0.037792,0.038538,0.0


In [42]:
MAX_NB_WORDS = 30_000 #decided by cumsum wordcount plot (Script 01)
MAX_SEQUENCE_LENGTH = 24 #decided by max words in a product (Script 00)
EMBEDDING_DIM = 100 #Same dim as our W2V embedding

all_text = data_processed['text']
all_text = all_text.drop_duplicates (keep = False)

tokenizer = Tokenizer(num_words=MAX_NB_WORDS, )
tokenizer.fit_on_texts(all_text)

data_sequences = tokenizer.texts_to_sequences(data_processed['text'])
data_vec = pad_sequences(data_sequences, maxlen=MAX_SEQUENCE_LENGTH)

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

Found 9193 unique tokens.


In [43]:
model = load_model('candidate_generation_20190522')

## Predictions

In [54]:
test_pred = model.predict([data_processed['customer_id_int'], data_processed['item_id_int'], 
                     data_processed['brand_id'], data_processed['PRICE'],
                     data_vec, data_processed['item_age'], data_processed['score'],
                     data_processed['power_price'], data_processed['power_score'], data_processed['power_item_age'],
                     data_processed['sqrt_price'], data_processed['sqrt_score'], data_processed['sqrt_item_age']],
                     verbose = 1)



In [55]:
#prueba = pd.DataFrame(test_pred)
#customers = data_processed['customer_id_int'].values
#prueba['customer_id_int'] = customers
#prueba = prueba.sample(frac = 0.4)
#prueba = prueba.groupby(['customer_id_int']).max()
#del prueba.index.name
#print(prueba.shape)
#prueba.head()

In [56]:

test_pred = pd.DataFrame(test_pred)
customers = data_processed['customer_id_int'].values
test_pred['customer_id_int'] = customers
test_pred = test_pred.sample(frac = 0.4) #In order to dont have allocation problems
test_pred = test_pred.groupby(['customer_id_int']).max()
del test_pred.index.name
        
print(test_pred.shape)
test_pred.head()

(609, 34849)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,34839,34840,34841,34842,34843,34844,34845,34846,34847,34848
0,0.026115,0.000648,0.0001,0.001016,0.000601,0.000179,0.00016,0.000689,2.9e-05,6.3e-05,...,3.5e-05,3.5e-05,2.7e-05,2.7e-05,2.6e-05,3.3e-05,3.2e-05,4.9e-05,3.8e-05,2.5e-05
1,0.000982,0.000599,7.2e-05,0.002659,0.000303,0.00104,0.00046,0.001567,3.7e-05,0.000181,...,2.3e-05,2.3e-05,3e-05,3.2e-05,3e-05,3.4e-05,2.6e-05,2.7e-05,2.6e-05,2.5e-05
2,0.000745,0.000161,3.5e-05,0.000264,0.000154,0.003013,0.000232,0.000645,2.7e-05,6e-05,...,2.8e-05,2.8e-05,2.8e-05,3.4e-05,2.3e-05,3.5e-05,2.5e-05,8.8e-05,7.9e-05,2.4e-05
3,0.002552,0.000322,7.7e-05,0.001658,0.00024,0.000621,0.000371,0.002709,4.7e-05,0.000144,...,2.7e-05,5.2e-05,2.7e-05,3.1e-05,2.8e-05,3.7e-05,2.5e-05,5.4e-05,4.4e-05,2.5e-05
4,2.6e-05,0.000314,3e-06,0.000482,7.5e-05,7e-06,8e-06,1.3e-05,3e-06,5e-06,...,3e-06,3e-06,3e-06,3e-06,3e-06,3e-06,4e-06,3e-06,3e-06,3e-06


In [57]:
def diversify(arr, diversity, plot = False):
    div = np.log(arr) / diversity
    exp_preds = np.exp(div)
    preds = exp_preds / np.sum(exp_preds)
    if plot:
        plt.figure(figsize = (10, 8));
        plt.subplot(2, 1, 1);
        sns.distplot(arr); plt.title('Original Distribution');
        plt.subplot(2, 1, 2);
        sns.distplot(preds); plt.title(f'Distribution with {diversity} diversity')
    probas = np.random.multinomial(1, preds, 1)
    return probas

#diversify(data_preds.loc[128], diversity = 100, plot = True)

In [61]:
def get_afines(data_pred, customer, items_unique, N = 5):
    try:
        #data_pred = diversify(data_pred.loc[cliente] , diversity = 0.25)
        #data_pred = data_pred.reshape(len(items_unique))
        print(f'==> Top {N} Recommended items to Customer {customer}: ')
        print(f'\nThe customer {customer} has bought this items: ')
        print('\n' + '=='*30 + '\n')
        interacted_items = data_processed[['text', 'score_original']][data_processed['customer_id_int'] == customer].groupby('text')\
                            .sum().reset_index().sort_values(['score_original'], ascending = False)
        print('\n'.join([str(i+1) + str(' - ') + str(x) for i, x in enumerate(interacted_items['text'].values[0:30])]))
        top = data_pred[customer].values.argsort()[-N:][::-1] #items positions
        print('\n====================== IDs DE PRODUCTOS RECOMENDADOS ==============')
        print([items_map[item] for item in top])
        print ("\n===================== PRODUCTOS RECOMENDADOS =====================")
        print('\n'.join([str(i+1) + str(' - ') + str(items_map_text[x]) for i, x in enumerate(top)]))
        print ("==================================================================")
    except:
        print(f'The customer {customer} does not exist')

In [65]:
CLIENTE = 12
get_afines(data_pred = test_pred, customer = CLIENTE, items_unique = items_unique, N = 10)
#data_processed[data_processed['customer_id_int'] == CLIENTE]

==> Top 10 Recommended items to Customer 12: 

The customer 12 has bought this items: 


1 - pantalon cropped mujer cintura alta
2 - vestido camisero print leopardo
3 - sombra gelcrema aqua xl color paint make ever exclusivo sephora
4 - perfilador labio pro sculpting make exclusivo sephora
5 - falda pantalon mujer raya
6 - abrigo hombre perimeter mte
7 - neceser mujer danielle nicole plateado parche minnie mouse
8 - vestido print leopardo cuello camisero
9 - vestido mujer verde jareta
10 - vestido mujer estampado sixties volante
11 - vaquero pitillo mujer bordado
12 - pantalon slim mujer cinturon
13 - pantalon recto mujer roberto verino bolsillo
14 - pantalon pitillo mujer pana
15 - pantalon pitillo mujer boutique moschino fucsia
16 - neceser mujer formula joven bordado oriental flor
17 - neceser mujer calvin klein plata cremallera
18 - abrigo hombre torrey hooded mte
19 - neceser mujer calvin klein negro cremallera
20 - jeggin mujer color liso strass
21 - estuche mini gloria ortiz chr