In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix

In [2]:
df = pd.read_csv("candy.csv")

In [3]:
df.head()

Unnamed: 0,item,user,review
0,Reese's Peanut Butter Cups Miniatures,darlene90,5
1,Reese's Peanut Butter Cups Miniatures,taylordarlene,4
2,Reese's Peanut Butter Cups Miniatures,aliciadennis,5
3,Reese's Peanut Butter Cups Miniatures,ocook,5
4,Reese's Peanut Butter Cups Miniatures,chad38,5


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17234 entries, 0 to 17233
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   item    17234 non-null  object
 1   user    17234 non-null  object
 2   review  17234 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 404.0+ KB


In [5]:
from lightfm import LightFM

  "LightFM was compiled without OpenMP support. "


In [6]:
d = pd.pivot_table(df, index='user', columns='item', values='review')
d = d.fillna(0)

In [7]:
d

item,3 Musketeers Candy Bar,3 Musketeers Miniature Bars,5 Gum,Airheads Bites Fruit,Airheads White Mystery,Airheads Xtremes Sweetly Sour Candy Rainbow Berry,Almond Joy Snack Size Bites,Altoids Curiously Strong Cinnamon Mints,Bouquet of Fruits Valentine Chocolate Dipped Strawberries,Brachs Candy Corn,...,Trolli Sour Brite Crawlers,Trolli Sour Brite Eggs Candy,Twix,Twix Bites,Twizzlers Bites Cherry Gummy Candy,Twizzlers Pull-N-Peel Candy Cherry,Twizzlers Twists Strawberry,Warheads Extreme Sour Hard Candy,Werther's Original Caramel Hard Candies,York Peppermint Patty
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
aaron67,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aaron68,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aaron73,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abarker,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abigail04,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zsellers,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0
zsimpson,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0
zsmith,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0
zvaldez,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,5.0


In [8]:
# mapping user_names with index values to use in LightFM predictions

user_id = list(d.index)
user_dict = {}
counter = 0
for i in user_id:
    user_dict[i] = counter
    counter += 1

In [9]:
d_csr = csr_matrix(d.values)
d_csr

<2531x142 sparse matrix of type '<class 'numpy.float64'>'
	with 17199 stored elements in Compressed Sparse Row format>

In [10]:
model = LightFM(loss='warp',random_state=45,learning_rate=0.95,no_components=100)
model = model.fit(d_csr,epochs=100,num_threads=10, verbose=False)

In [11]:
def recommend5(model,data,user_name,user_dict,treshold=0):
    # Getting number of Candies to use while prediction 
    n_users, n_items = data.shape
    # getting the index of the user_name
    user_ind = user_dict[user_name]
    # Storing prediced scores in scores
    scores = pd.Series(model.predict(user_ind,np.arange(n_items)))
    # Assigning candy names for the scores
    scores.index = d.columns
    # Getting the scores in descending order and making a list of candies in preferred order
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    # Getting the list of already Eaten Candies
    Eaten_items = list(pd.Series(d.loc[user_name,:][d.loc[user_name,:] > 0].index).sort_values(ascending=False))
    # Getting list of not Eaten Candies
    scores = [x for x in scores if x not in Eaten_items]
    # Recommending top 5 in the list to the person
    return_score_list = scores[0:5]
    return return_score_list

In [12]:
recommend5(model,d,"connerthomas",user_dict)

['3 Musketeers Miniature Bars',
 'Lindt Lindor Stracciatella Chocolate',
 'Butterfinger Peanut Butter Cups',
 'Skittles Sweets Sours Candy',
 'Oreo Chocolate Candy Bar']

In [13]:
recommend5(model,d,"flee",user_dict)

['M&Ms Brand Dark Chocolate Candies Holiday Blend',
 'Jet Puffed Regular Everyday Marshmallows',
 'Oreo Chocolate Candy Bar',
 'Kirkland Milk Chocolate Almonds',
 'Toblerone Swiss Milk Chocolate with Honey and Almond Nougat']

In [14]:
recommend5(model,d,"nsolis",user_dict)

['3 Musketeers Miniature Bars',
 'Trolli Sour Brite Eggs Candy',
 'Sour Patch Kids Candy',
 'Godiva Chocolates',
 'Lindt Ultimate 8 Flavor Assortment Lindor Truffles']