In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse import coo_matrix
from IPython.display import display_html
import warnings

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
%matplotlib inline

from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import auc_score, precision_at_k, recall_at_k
from lightfm import LightFM
from skopt import forest_minimize



In [3]:
data = pd.read_csv('retail.csv')

In [4]:
data

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France


## RECOMMENDATION

In [5]:
# Removing returned products (Invoice numbers starting with C) from the data set
data = data[~data["InvoiceNo"].str.contains("C", na = False)]
# Removing missing values from the dataset
data=data.dropna()
data

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France


In [6]:
data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [45]:
products = data[['StockCode','Description','UnitPrice']]


In [46]:
products.shape

(397924, 3)

In [47]:
products=products.drop_duplicates()
products

Unnamed: 0,StockCode,Description,UnitPrice
0,85123A,WHITE HANGING HEART T-LIGHT HOLDER,2.55
1,71053,WHITE METAL LANTERN,3.39
2,84406B,CREAM CUPID HEARTS COAT HANGER,2.75
3,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,3.39
4,84029E,RED WOOLLY HOTTIE WHITE HEART.,3.39
...,...,...,...
540066,23535,WALL ART BICYCLE SAFETY,3.90
540068,23526,WALL ART DOG LICENCE,3.90
540272,21253,SET OF PICTURE FRAME STICKERS,0.79
540421,23843,"PAPER CRAFT , LITTLE BIRDIE",2.08


In [48]:
products['UnitPrice']=products['UnitPrice'].astype('float64')

In [49]:
products=products.drop_duplicates(subset=['StockCode']).reset_index()
products['ProductID']=products['index']


In [53]:
products=products[['ProductID','Description','UnitPrice','StockCode']]
products
dicty=pd.Series(products.ProductID.values,index=products.StockCode).to_dict()

In [60]:
users=data[['CustomerID','Country','StockCode','Quantity']]
users['ProductID']=users['StockCode'].map(dicty)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  users['ProductID']=users['StockCode'].map(dicty)


In [62]:
users.isna().sum()

CustomerID    0
Country       0
StockCode     0
Quantity      0
ProductID     0
dtype: int64

In [13]:
import scipy

In [63]:
user_product_interaction = pd.pivot_table(users, index='CustomerID', columns='ProductID', values='Quantity')
# fill missing values with 0
user_product_interaction = user_product_interaction.fillna(0)
user_id = list(user_product_interaction.index)
user_dict = {}
counter = 0 
for i in user_id:
    user_dict[i] = counter
    counter += 1
# convert to csr matrix
user_product_interaction_csr = scipy.sparse.csr_matrix(user_product_interaction.values)
user_product_interaction_csr

<4339x3665 sparse matrix of type '<class 'numpy.float64'>'
	with 266802 stored elements in Compressed Sparse Row format>

In [64]:
user_product_interaction

ProductID,0,1,2,3,4,5,6,7,8,9,...,503602,504104,527057,527060,527064,527065,527067,527069,530382,540421
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12346.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12347.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12348.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12349.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12350.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18280.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18281.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18282.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18283.0,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
user_product_interaction

StockCode,10002,10080,10120,10123C,10124A,10124G,10125,10133,10135,11001,...,90214V,90214W,90214Y,90214Z,BANK CHARGES,C2,DOT,M,PADS,POST
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
12346.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
12347.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
12348.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25
12349.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.00
12350.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18280.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
18281.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
18282.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
18283.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.00


In [69]:
products=products.drop(['StockCode'],axis=1)

In [70]:
item_dict ={}
df = products[['ProductID', 'Description']].sort_values('ProductID')
for i in range(df.shape[0]):
    item_dict[(df.loc[i,'ProductID'])] = df.loc[i,'Description']

products_csr = scipy.sparse.csr_matrix(products.drop(['ProductID','Description'], axis=1).values)

products_csr

<3665x1 sparse matrix of type '<class 'numpy.float64'>'
	with 3665 stored elements in Compressed Sparse Row format>

In [71]:
item_dict[0]
user_dict[17850.0]

4017

In [72]:
products

Unnamed: 0,ProductID,Description,UnitPrice
0,0,WHITE HANGING HEART T-LIGHT HOLDER,2.55
1,1,WHITE METAL LANTERN,3.39
2,2,CREAM CUPID HEARTS COAT HANGER,2.75
3,3,KNITTED UNION FLAG HOT WATER BOTTLE,3.39
4,4,RED WOOLLY HOTTIE WHITE HEART.,3.39
...,...,...,...
3660,527065,"LETTER ""U"" BLING KEY RING",0.29
3661,527067,"LETTER ""W"" BLING KEY RING",0.29
3662,527069,"LETTER ""Z"" BLING KEY RING",0.29
3663,530382,PINK CRYSTAL SKULL PHONE CHARM,0.19


In [75]:
users=users.drop(['StockCode'],axis=1)
users

KeyError: "['StockCode'] not found in axis"

In [78]:
model = LightFM(loss='warp',
                random_state=2016,
                learning_rate=0.90,
                no_components=150,
                user_alpha=0.000005)

model = model.fit(user_product_interaction_csr,
                  epochs=100,
                  num_threads=16, verbose=False)

In [79]:
def sample_recommendation_user(model, interactions, user_id, user_dict, 
                               item_dict,threshold = 0,nrec_items = 5, show = True):
    
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items), item_features=products_csr))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index).sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[x]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[x]))
    if show == True:
        print ("User: " + str(user_id))
        print("Known Likes:")
        counter = 1
        for i in known_items:
            print(str(counter) + '- ' + i)
            counter+=1

        print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            counter+=1

In [80]:
sample_recommendation_user(model, user_product_interaction, 12680.0, user_dict, item_dict)

User: 12680.0
Known Likes:
1- BUNDLE OF 3 SCHOOL EXERCISE BOOKS  
2- CHILDRENS CUTLERY CIRCUS PARADE
3- CHILDRENS CUTLERY SPACEBOY 
4- CHILDRENS CUTLERY DOLLY GIRL 
5- PANTRY WASHING UP BRUSH
6- PANTRY SCRUBBING BRUSH
7- PASTEL COLOUR HONEYCOMB FAN
8- BAKING SET 9 PIECE RETROSPOT 
9- PACK OF 20 SPACEBOY NAPKINS
10- FUNKY WASHING UP GLOVES ASSORTED
11- ROBOT BIRTHDAY CARD
12- SPACEBOY BIRTHDAY CARD
13- RED RETROSPOT CHILDRENS UMBRELLA
14- FAIRY CAKE BIRTHDAY CANDLE SET
15- PACK OF 12 WOODLAND TISSUES 
16- PLASTERS IN TIN STRONGMAN
17- PLASTERS IN TIN CIRCUS PARADE 
18- CHILDS BREAKFAST SET SPACEBOY 
19- CHILDS BREAKFAST SET DOLLY GIRL 
20- CARAVAN SQUARE TISSUE BOX
21- CARD DOLLY GIRL 
22- CHILDRENS APRON SPACEBOY DESIGN
23- CHILDREN'S APRON DOLLY GIRL 
24- PACK OF 12 PINK POLKADOT TISSUES
25- DOLLY GIRL LUNCH BOX
26- ALARM CLOCK BAKELIKE IVORY
27- POSTAGE
28- CIRCUS PARADE LUNCH BOX 
29- SPACEBOY LUNCH BOX 
30- ROUND SNACK BOXES SET OF4 WOODLAND 
31- ALARM CLOCK BAKELIKE GREEN
32- ALAR