## Import Libs for Recommender System

In [117]:
#import basic Libraries
import pandas as pd
import numpy as np
import random

#Importing Sklearn
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity 

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [26]:
filenames = ['aisles','departments' ,'order_products__prior','order_products__train', 'orders','products']

In [27]:
#import of all files from the data folder.
aisles = pd.read_csv('./data/'+ filenames[0] + '.csv')
departments = pd.read_csv('./data/'+ filenames[1] + '.csv')
order_products__prior = pd.read_csv('./data/'+ filenames[2] + '.csv')
order_products__train = pd.read_csv('./data/'+ filenames[3] + '.csv')
orders = pd.read_csv('./data/'+ filenames[4] + '.csv')
products = pd.read_csv('./data/'+ filenames[5] + '.csv')

## Merging Datasets together

In [28]:
opt = order_products__train.merge(products,how='left', on='product_id')
opt = opt.merge(departments,how='left', on='department_id')
opt = opt.merge(aisles,how='left', on='aisle_id')

## Creating the recommender system

#### Filtering the data by reorders

In [29]:
reorders = opt[opt['reordered'] == 1]

In [30]:
reorders['product_id'] = reorders['product_id'].astype('int64')

In [31]:
# get list of hi volume products (products that occurr mo' than 1 tiempo)
hivol = reorders.copy()['product_id'].value_counts().sort_values(ascending=False)\
    [reorders.copy()['product_id'].value_counts().sort_values(ascending=False) > 1].index.tolist()

In [32]:
# mask the reorders dataframe to only incluse dem hi vol prods
reorders = reorders[reorders['product_id'].isin(hivol)]

In [33]:
#filters the High demand items greater than.
reorders['hi_dem'] = (reorders.copy()['product_id'].value_counts().sort_values(ascending=False)>1)

In [34]:
hidem_ord = reorders[reorders['hi_dem'] == True]

In [36]:
user_orders = reorders.merge(orders)

In [37]:
#reorders['hi_dem'] = 
user_orders['hi_dem'] = (user_orders.copy()['product_id'].value_counts().sort_values(ascending=False)>1)

In [38]:
hidem_ord = user_orders[user_orders['hi_dem'] == True]

### Setup Model - Compare Users to other Users

In [39]:
#return the total items
users = hidem_ord.groupby(['user_id','product_name']).size().sort_values(ascending=False).unstack().fillna(0)

In [40]:
#creates a similiarity by users.
users_sim = pd.DataFrame(cosine_similarity(users),index=users.index,columns=users.index)

In [41]:
def next_prod(df,num_col):
    return df[df.columns[num_col]].drop(df.columns[num_col]).sort_values(ascending=False).head(3)

In [42]:
#returns similar users to this one.
pd.DataFrame(next_prod(users_sim,56)).T

user_id,43254,48962,10453
1711,0.5,0.353553,0.188982


## Recommendations for Products by User ID

In [44]:
#return the total items in the basket from the aisles
products = hidem_ord.groupby(['product_name','user_id']).size().sort_values(ascending=False).unstack().fillna(0)

In [45]:
#creates a similiarity by users.
products_sim = pd.DataFrame(cosine_similarity(products),index=products.index,columns=products.index)

In [46]:
#checking the shape to ensure I set the columns and indexes correctly. 
products.shape

(6567, 6336)

In [48]:
#gives a recommendation for the last product added_to_cart
pd.DataFrame(next_prod(products_sim,11)).T

product_name,Organic Heirloom Tomatoes,Organic Spinach Bunch,Salted Butter
1% Low Fat Milk,0.57735,0.160128,0.144338


In [49]:
profile = pd.Series(np.zeros(len(products.columns.tolist())),users_sim[55])

In [50]:
recommendations = np.dot(products.values,users_sim[55])

In [51]:
recommendations = pd.Series(recommendations, index=products.index)

In [52]:
recommendations.sort_values(ascending=False).head()

product_name
Grape White/Green Seedless        11.120541
Flour Tortillas                    1.902369
Chocolate Peppermint Stick Bar     1.673560
Banana                             1.399944
Organic Avocado                    0.740677
dtype: float64

### Recommendations by Products

In [53]:
#return the total items
ords = hidem_ord.groupby(['order_id','product_name']).size().sort_values(ascending=False).unstack().fillna(0)

In [54]:
#creates a similiarity by users.
ords_sim = pd.DataFrame(cosine_similarity(ords),index=ords.index,columns=ords.index)

In [56]:
#displays which users are similar to the current user.
pd.DataFrame(next_prod(users_sim,19)).T

user_id,72696,174562,48806
586,0.57735,0.57735,0.408248


### Product Recommender by Order ID

In [57]:
baskets = hidem_ord.groupby(['product_name','order_id']).size().sort_values(ascending=False).unstack().fillna(0)

In [58]:
basket_sim = pd.DataFrame(cosine_similarity(baskets),columns=baskets.index,index=baskets.index)

In [59]:
basket_sim['Zucchini Squash'].sort_values(ascending=False).head(10)[1:]

product_name
Chocolate Cheerios Cereal                      0.577350
Organic Green Peas                             0.408248
Quick & Easy Steel Cut Irish Oatmeal           0.408248
Small Size Flour Tortillas                     0.333333
Raspberry on the Bottom Nonfat Greek Yogurt    0.235702
Sweet Onions                                   0.218218
Flat Parsley, Bunch                            0.174078
Green Beans                                    0.149071
Organic Kiwi                                   0.107211
Name: Zucchini Squash, dtype: float64

In [60]:
def Recommender_System(user_id):
    
    '''
    Input User Id to see recommendations for the User
    '''
    
    u = hidem_ord.groupby(['user_id','product_name']).size().sort_values(ascending=False).unstack().fillna(0)
    u_sim = pd.DataFrame(cosine_similarity(u),index=u.index,columns=u.index)

    p = hidem_ord.groupby(['product_name','user_id']).size().sort_values(ascending=False).unstack().fillna(0)
    products_sim = pd.DataFrame(cosine_similarity(p),index=p.index,columns=p.index)
    profile = pd.Series(np.zeros(len(u_sim.columns.tolist())),u_sim[user_id])
    
    recommendations = pd.Series(np.dot(p.values,users_sim[user_id]), index=p.index)
    return recommendations.sort_values(ascending=False).head(), 
    

In [122]:
#gives a random user ID to for input to the recommender system
random.sample(hidem_ord['user_id'].tolist(),1)

[130025]

In [123]:
Recommender_System(130025)

(product_name
 Total 0% Nonfat Greek Yogurt              6.061393
 Carb Balance Flour Tortillas              1.977657
 Creamy Peanut Butter Spread With Honey    1.000000
 100% Whole Grain Flaxseed Bread           1.000000
 Limes                                     0.776737
 dtype: float64,)