# Grocery Product Recommendation using Apriori Algorithm

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from mlxtend import frequent_patterns 
from mlxtend.preprocessing import TransactionEncoder

In [2]:
df = pd.read_csv("store_data.csv", header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [3]:
df.shape
# 7501 Orders with maximun 20 products bought together

(7501, 20)

In [4]:
type(df.values[1,4])

float

In [5]:
df= df.fillna(0)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,chutney,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,turkey,avocado,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,mineral water,milk,energy bar,whole wheat rice,green tea,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Preprocessing 

In [6]:
records = []
for i in range(0, 7501):
    records.append([str(df.values[i, j]) for j in range(0, 20) if df.values[i, j] != 0])

In [7]:
# records

In [8]:
products = []
for i in range(1, 7501):
    for j in range(20):
        if str(df.values[i,j]) not in products:
            products.append(str(df.values[i,j]))
            
products.remove("0")

In [9]:
print("Total products: ", len(products))

Total products:  120


In [10]:
te = TransactionEncoder()
te_array = te.fit(records).transform(records)

In [11]:
df_trans = pd.DataFrame(te_array, columns=te.columns_)
df_trans.head()

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


### Useful insights from data

In [12]:
# List of top 10 products sold
df_trans.sum().sort_values(ascending=False).head(10)

mineral water        1788
eggs                 1348
spaghetti            1306
french fries         1282
chocolate            1229
green tea             991
milk                  972
ground beef           737
frozen vegetables     715
pancakes              713
dtype: int64

### Using Apriori Algorithm

In [13]:
freq_itemsets = frequent_patterns.apriori(df_trans,min_support=0.015,use_colnames=True)
freq_itemsets

Unnamed: 0,support,itemsets
0,0.020397,(almonds)
1,0.033329,(avocado)
2,0.033729,(brownies)
3,0.087188,(burgers)
4,0.030129,(butter)
...,...,...
148,0.020931,"(spaghetti, tomatoes)"
149,0.016531,"(spaghetti, turkey)"
150,0.015865,"(spaghetti, mineral water, chocolate)"
151,0.017064,"(spaghetti, mineral water, ground beef)"


In [14]:
rules = frequent_patterns.association_rules(freq_itemsets,metric='lift',min_threshold=1)
rules.sort_values(by='confidence',ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
148,(soup),(mineral water),0.050527,0.238368,0.023064,0.456464,1.914955,0.011020,1.401255
181,"(spaghetti, milk)",(mineral water),0.035462,0.238368,0.015731,0.443609,1.861024,0.007278,1.368879
175,"(spaghetti, ground beef)",(mineral water),0.039195,0.238368,0.017064,0.435374,1.826477,0.007722,1.348914
141,(olive oil),(mineral water),0.065858,0.238368,0.027596,0.419028,1.757904,0.011898,1.310962
176,"(mineral water, ground beef)",(spaghetti),0.040928,0.174110,0.017064,0.416938,2.394681,0.009938,1.416470
...,...,...,...,...,...,...,...,...,...
145,(mineral water),(salmon),0.238368,0.042528,0.017064,0.071588,1.683336,0.006927,1.031302
119,(mineral water),(herb & pepper),0.238368,0.049460,0.017064,0.071588,1.447397,0.005275,1.023835
172,(mineral water),"(spaghetti, chocolate)",0.238368,0.039195,0.015865,0.066555,1.698053,0.006522,1.029311
184,(mineral water),"(spaghetti, milk)",0.238368,0.035462,0.015731,0.065996,1.861024,0.007278,1.032691


In [15]:
rules[(rules['confidence']>0.16) & (rules['lift']>2)].head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
94,(frozen vegetables),(shrimp),0.095321,0.071457,0.016664,0.174825,2.446574,0.009853,1.125268
95,(shrimp),(frozen vegetables),0.071457,0.095321,0.016664,0.233209,2.446574,0.009853,1.179825
98,(tomatoes),(frozen vegetables),0.068391,0.095321,0.016131,0.235867,2.474464,0.009612,1.18393
99,(frozen vegetables),(tomatoes),0.095321,0.068391,0.016131,0.169231,2.474464,0.009612,1.121381
110,(herb & pepper),(ground beef),0.04946,0.098254,0.015998,0.32345,3.291994,0.011138,1.33286
111,(ground beef),(herb & pepper),0.098254,0.04946,0.015998,0.162822,3.291994,0.011138,1.13541
116,(spaghetti),(ground beef),0.17411,0.098254,0.039195,0.225115,2.291162,0.022088,1.163716
117,(ground beef),(spaghetti),0.098254,0.17411,0.039195,0.398915,2.291162,0.022088,1.373997
136,(soup),(milk),0.050527,0.129583,0.015198,0.300792,2.321232,0.008651,1.244861
174,"(spaghetti, mineral water)",(ground beef),0.059725,0.098254,0.017064,0.285714,2.907928,0.011196,1.262445


In [16]:
rules.antecedents.values

array([frozenset({'chocolate'}), frozenset({'burgers'}),
       frozenset({'eggs'}), frozenset({'burgers'}),
       frozenset({'burgers'}), frozenset({'french fries'}),
       frozenset({'green tea'}), frozenset({'burgers'}),
       frozenset({'burgers'}), frozenset({'milk'}),
       frozenset({'mineral water'}), frozenset({'burgers'}),
       frozenset({'spaghetti'}), frozenset({'burgers'}),
       frozenset({'cake'}), frozenset({'eggs'}), frozenset({'cake'}),
       frozenset({'french fries'}), frozenset({'cake'}),
       frozenset({'mineral water'}), frozenset({'cake'}),
       frozenset({'spaghetti'}), frozenset({'mineral water'}),
       frozenset({'chicken'}), frozenset({'spaghetti'}),
       frozenset({'chicken'}), frozenset({'eggs'}),
       frozenset({'chocolate'}), frozenset({'chocolate'}),
       frozenset({'escalope'}), frozenset({'chocolate'}),
       frozenset({'french fries'}), frozenset({'frozen vegetables'}),
       frozenset({'chocolate'}), frozenset({'chocolate'}),
 

In [23]:
rules["antecedents"] = rules["antecedents"].apply(lambda x: list(x)[0]).astype("unicode")
rules["consequents"] = rules["consequents"].apply(lambda x: list(x)[0]).astype("unicode")

In [28]:
rules[rules["antecedents"] == 'shrimp'].sort_values("confidence", ascending=False)["consequents"].head(5).tolist()

['mineral water', 'spaghetti', 'chocolate', 'milk', 'frozen vegetables']

In [29]:
rules.to_csv("recommedations.csv")

In [31]:
def recommend(product, items=3):
    top_recommended_products = rules[rules["antecedents"] == product].sort_values(
        "confidence", ascending=False)["consequents"].head(items).tolist()
    return top_recommended_products

In [33]:
print(recommend("shrimp",5))

['mineral water', 'spaghetti', 'chocolate', 'milk', 'frozen vegetables']
