## Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [2]:
from sklearn.feature_selection import r_regression as pearson_correlation
from sklearn.decomposition import NMF

## Non negative matrice factorization recommandation model

In [3]:
# On reprend la fonction de recommandation NNF
def person_based_recommandation_nnf(data, utilisateur, n_categories=5, n_recommandations=5):
    model = NMF(n_components=n_categories, init='random', max_iter=2000, random_state=0)
    W = model.fit_transform(data)
    H = model.components_
    approximated_matrix = np.matmul(W, H)
    approximated_matrix_df = pd.DataFrame(approximated_matrix, index=data.index, columns=data.columns)

    # Pour recommander un item, on prend le mieux noté parmi ceux qu'il n'avait pas notés.
    data_utilisateur = data.filter(items=[utilisateur], axis=0).transpose()
    data_utilisateur['has_rated'] = (data_utilisateur[utilisateur] != 0)
    
    predicted_ratings_utilisateur = approximated_matrix_df.filter(items=[utilisateur], axis=0).transpose()

    # Un peu pareil que pour Pearson, on prend les meilleurs de ceux qu'on a pas encore notés
    items_sorted = predicted_ratings_utilisateur[data_utilisateur['has_rated'] == False].sort_values(utilisateur, ascending=False)
    return items_sorted[:n_recommandations].index        


In [4]:
# On sépare entre train et apply, pour gagner du temps
def person_based_recommandation_nnf_train(data, n_categories=5):
    model = NMF(n_components=n_categories, init='random', max_iter=2000, random_state=0)
    W = model.fit_transform(data)
    H = model.components_
    approximated_matrix = np.matmul(W, H)
    approximated_matrix_df = pd.DataFrame(approximated_matrix, index=data.index, columns=data.columns)
    return approximated_matrix_df

def person_based_recommandation_nnf_apply(data, approximated_matrix_df, utilisateur, n_recommandations=5):
    # Pour recommander un item, on prend le mieux noté parmi ceux qu'il n'avait pas notés.
    data_utilisateur = data.filter(items=[utilisateur], axis=0).transpose()
    data_utilisateur['has_rated'] = (data_utilisateur[utilisateur] != 0)
    
    predicted_ratings_utilisateur = approximated_matrix_df.filter(items=[utilisateur], axis=0).transpose()

    # Un peu pareil que pour Pearson, on prend les meilleurs de ceux qu'on a pas encore notés
    items_sorted = predicted_ratings_utilisateur[data_utilisateur['has_rated'] == False].sort_values(utilisateur, ascending=False)
    return items_sorted[:n_recommandations].index        


In [5]:
# Calcul de score (MAE) pour une matrice NNF
def nnf_mae(data, approximated_matrix_df):
    return (data - approximated_matrix_df).abs().mean().mean()

In [6]:
# On reprend la fonction de recommandation Pearson
def person_based_recommandation(data, utilisateur, n_proches=5, n_recommandations=5):
    # On transpose la matrice, parceque la fonction pearson fait une correlation entre les colonnes
    data_transposed = data.transpose()

    # On fait la corrélation de cust_5 avec toutes les autres colonnes
    correlations = pearson_correlation(data_transposed, data_transposed[utilisateur])

    # On reformate pour avoir les données dans un dataframe propre
    correlations_col = pd.DataFrame(index=data.index)
    correlations_col['correlations'] = correlations

    # On trie par ordre décroissant
    sorted_correlations = correlations_col.sort_values('correlations', ascending=False)
    
    utilisateurs_proches = sorted_correlations[1:n_proches+1].index
    
    # On ne garde que les ratings de ces utilisateurs
    data_utilisateurs_proches = data.filter(items=utilisateurs_proches, axis=0).transpose()
    
    # Les ratings de notre utilisateur
    data_utilisateur = data.filter(items=[utilisateur], axis=0).transpose()

    # Nombre d'utilisateurs proches qui ont noté
    n_ratings = (data_utilisateurs_proches != 0).sum(axis=1)

    # Note moyenne
    mean_ratings = (data_utilisateurs_proches).mean(axis=1)

    utilisateur_has_rated = (data_utilisateur[utilisateur] != 0)

    data_utilisateur['n_ratings'] = n_ratings
    data_utilisateur['mean_ratings'] = mean_ratings
    data_utilisateur['has_rated'] = utilisateur_has_rated
    
    items_sorted = data_utilisateur[data_utilisateur['has_rated'] == False].sort_values(['n_ratings', 'mean_ratings'], ascending=False)

    return items_sorted[:n_recommandations].index

## Chargement des données

In [7]:
orders = pd.read_json("../raw_data/order_scoring.json",lines=True)

In [8]:
counted_sku_df = orders.groupby('sku').count()['LO'].copy()
counted_sku_df.sort_values(ascending=False,inplace=True)
sku_lenght = 500
short_sku_list = list(counted_sku_df.head(sku_lenght).index)
orders_short = orders[orders['sku'].isin(short_sku_list)]

In [9]:
coef = {
    1:0.4,
    2:0.3,
    3:0.2,
    4:0.1
    }
orders_short['score'] = orders_short['LO'].map(coef)
table_scoring = orders_short.groupby(by=['customer_id','sku']).sum()[['score']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  orders_short['score'] = orders_short['LO'].map(coef)


In [10]:
penality = 0.5
table_penality = pd.pivot_table(orders_short, 
                       values=['score'], 
                       index=['customer_id','sku'],
                       columns=['LO'],
                       aggfunc='count',
                       fill_value=0)

table_penality['penality'] = table_penality[[('score',4),('score',3)]].max(axis=1)*(1-table_penality[('score',2)])*(1-table_penality[('score',1)])*penality
final_table = pd.DataFrame(table_scoring['score'] - table_penality['penality'],columns=["score"])

In [11]:
matrix = pd.pivot_table(final_table, values=['score'], index=['customer_id'],
                    columns=['sku'],aggfunc='sum')
matrix = matrix.fillna(0)

In [12]:
matrix.columns = matrix.columns.droplevel(0)
data = matrix

In [13]:
data

sku,1-ABB-101,1-ABO-103,1-ABO-105,1-ACN-106,1-ACN-107,1-ACN-108,1-ACN-113,1-ACN-116,1-ACP-101,1-ACP-102,...,8-GRI-104,8-ILB-103,8-ILB-106,8-LCM-119,8-RES-102,8-RES-106,8-VLF-104,8-VLF-105,8-VLF-107,8-VLF-108
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00HCaCqtG9aMGVCDyBUbvzIzY073,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00KfpQoUiRTi8RfGFcdjFM80s5s1,0.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00RZel3ipuTJMrADRnZ3Lr1C1XX2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00V8dG1xBHM84aVChJyEgIL21XK2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00zoJSpuLHUBtthekFUQ3sjnJy63,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zzqXh3hccWTjkxco7Qdz1iEy7bu1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zzqaeuDHkFcnofZkDUHPJ7fbCS33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zzrZzibp5CbzXtjld4reTvX7bdR2,0.0,0.7,0.0,0.0,0.0,0.0,0.0,0.0,0.9,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
zzxqL7l0M2fuSto96UNjWi3pg9b2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.4,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
matrix_positive = pd.pivot_table(final_table, values=['score'], index=['customer_id'],
                    columns=['sku'],aggfunc='sum')

In [15]:
matrix_positive = matrix_positive + 1
matrix_positive = matrix_positive.fillna(0)
matrix_positive.columns = matrix_positive.columns.droplevel(0)
data_positive = matrix_positive

### Tests de recommandations

In [16]:
person_based_recommandation(data, '00V8dG1xBHM84aVChJyEgIL21XK2')

Index(['1-TAH-102', '3-PAP-101', '6-COT-105', '4-BOF-144', '6-BOI-101'], dtype='object', name='sku')

In [17]:
person_based_recommandation_nnf(data_positive, '00V8dG1xBHM84aVChJyEgIL21XK2')

Index(['4-GRA-105', '1-SNT-103', '3-PAP-101', '6-GAB-101', '6-CAS-105'], dtype='object', name='sku')

In [18]:
nnf_model = person_based_recommandation_nnf_train(data_positive, n_categories=25)

In [19]:
nnf_mae(data_positive, nnf_model)

0.08600592979140802

In [20]:
person_based_recommandation_nnf_apply(data_positive, nnf_model, '00V8dG1xBHM84aVChJyEgIL21XK2')

Index(['6-CAS-105', '6-COT-105', '3-PAP-101', '1-SNT-103', '1-BID-121'], dtype='object', name='sku')

## Tests

In [21]:
customer_id = "h6IvomPZxOTKuKGuvS8b3nZrzVk2"

In [22]:

person_based_recommandation(data, customer_id, n_recommandations=3)

Index(['1-PHL-160', '1-VIT-115', '6-RIS-103'], dtype='object', name='sku')

In [23]:

# https://lafourche.fr/products/philia-bouillon-de-legumes-en-poudre-bio-125g-bio
# https://lafourche.fr/products/lot-de-12-compotes-cool-fruits-pomme-acerola
# https://lafourche.fr/products/la-fourche-tomates-concassees-bio-0-4kg

In [24]:
person_based_recommandation_nnf_apply(data_positive, nnf_model, customer_id, n_recommandations=3)

Index(['1-ELB-100', '4-LAF-117', '3-PAP-101'], dtype='object', name='sku')

In [25]:
# https://lafourche.fr/products/elibio-mais-doux-bio-300g
# https://lafourche.fr/products/la-fourche-1kg-de-pates-cocciolette-blanches-bio-en-vrac
# https://lafourche.fr/products/papeco-essuie-tout-blanc-100prct-recycle-origine-france-200-feuilles-ecologique

In [21]:
customer_id = "h6IvomPZxOTKuKGuvS8b3nZrzVk2"

In [22]:

person_based_recommandation(data, customer_id, n_recommandations=3)

Index(['1-PHL-160', '1-VIT-115', '6-RIS-103'], dtype='object', name='sku')

In [23]:

# https://lafourche.fr/products/philia-bouillon-de-legumes-en-poudre-bio-125g-bio
# https://lafourche.fr/products/lot-de-12-compotes-cool-fruits-pomme-acerola
# https://lafourche.fr/products/la-fourche-tomates-concassees-bio-0-4kg

In [24]:
person_based_recommandation_nnf_apply(data_positive, nnf_model, customer_id, n_recommandations=3)

Index(['1-ELB-100', '4-LAF-117', '3-PAP-101'], dtype='object', name='sku')

In [25]:
# https://lafourche.fr/products/elibio-mais-doux-bio-300g
# https://lafourche.fr/products/la-fourche-1kg-de-pates-cocciolette-blanches-bio-en-vrac
# https://lafourche.fr/products/papeco-essuie-tout-blanc-100prct-recycle-origine-france-200-feuilles-ecologique

In [21]:
customer_id = "h6IvomPZxOTKuKGuvS8b3nZrzVk2"

In [22]:

person_based_recommandation(data, customer_id, n_recommandations=3)

Index(['1-PHL-160', '1-VIT-115', '6-RIS-103'], dtype='object', name='sku')

In [23]:

# https://lafourche.fr/products/philia-bouillon-de-legumes-en-poudre-bio-125g-bio
# https://lafourche.fr/products/lot-de-12-compotes-cool-fruits-pomme-acerola
# https://lafourche.fr/products/la-fourche-tomates-concassees-bio-0-4kg

In [24]:
person_based_recommandation_nnf_apply(data_positive, nnf_model, customer_id, n_recommandations=3)

Index(['1-ELB-100', '4-LAF-117', '3-PAP-101'], dtype='object', name='sku')

In [25]:
# https://lafourche.fr/products/elibio-mais-doux-bio-300g
# https://lafourche.fr/products/la-fourche-1kg-de-pates-cocciolette-blanches-bio-en-vrac
# https://lafourche.fr/products/papeco-essuie-tout-blanc-100prct-recycle-origine-france-200-feuilles-ecologique