# Recommender System Using Apriori
## Import library

In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

## Data preprocessing

In [31]:
ds = pd.read_csv('Groceries_dataset.csv')
ds.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


In [32]:
ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38765 entries, 0 to 38764
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Member_number    38765 non-null  int64 
 1   Date             38765 non-null  object
 2   itemDescription  38765 non-null  object
dtypes: int64(1), object(2)
memory usage: 908.7+ KB


In [33]:
ds.isna().any()

Member_number      False
Date               False
itemDescription    False
dtype: bool

In [34]:
ds['customerID'] = ds.groupby(['Member_number','Date']).ngroup()
ds = ds.sort_values('customerID')
ds.head()

Unnamed: 0,Member_number,Date,itemDescription,customerID
20992,1000,15-03-2015,semi-finished bread,0
8395,1000,15-03-2015,whole milk,0
24544,1000,15-03-2015,yogurt,0
4843,1000,15-03-2015,sausage,0
32851,1000,24-06-2014,salty snack,1


In [70]:
transaction = [list(ds[ds['customerID'] == i]['itemDescription']) for i in set(ds.customerID)]
transaction[:10]

[['semi-finished bread', 'whole milk', 'yogurt', 'sausage'],
 ['salty snack', 'whole milk', 'pastry'],
 ['canned beer', 'misc. beverages'],
 ['sausage', 'hygiene articles'],
 ['pickled vegetables', 'soda'],
 ['curd', 'frankfurter'],
 ['sausage', 'rolls/buns', 'whole milk'],
 ['soda', 'whole milk'],
 ['white bread', 'beef'],
 ['frankfurter', 'soda', 'whipped/sour cream']]

## Create recommender system from the dataset

In [65]:
from apyori import apriori

rules = apriori(transactions = transaction, min_support = 0.001, min_confidence = 0.01, max_length = 3)
results = list(rules)
results

[RelationRecord(items=frozenset({'UHT-milk'}), support=0.021386085678005748, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'UHT-milk'}), confidence=0.021386085678005748, lift=1.0)]),
 RelationRecord(items=frozenset({'beef'}), support=0.03395041101383412, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'beef'}), confidence=0.03395041101383412, lift=1.0)]),
 RelationRecord(items=frozenset({'berries'}), support=0.021787074784468355, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'berries'}), confidence=0.021787074784468355, lift=1.0)]),
 RelationRecord(items=frozenset({'beverages'}), support=0.016574216400454454, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'beverages'}), confidence=0.016574216400454454, lift=1.0)]),
 RelationRecord(items=frozenset({'bottled beer'}), support=0.04531176903027468, ordered_statistics=[OrderedStatistic(items_base=frozen

In [66]:
def inspect(results):
    items       = [tuple(result[0]) for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(items, supports, confidences, lifts))

resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Items combination', 'Support', 'Confidence', 'Lift'])

In [67]:
resultsinDataFrame

Unnamed: 0,Items combination,Support,Confidence,Lift
0,"(UHT-milk,)",0.021386,0.021386,1.000000
1,"(beef,)",0.033950,0.033950,1.000000
2,"(berries,)",0.021787,0.021787,1.000000
3,"(beverages,)",0.016574,0.016574,1.000000
4,"(bottled beer,)",0.045312,0.045312,1.000000
...,...,...,...,...
660,"(rolls/buns, whole milk, sausage)",0.001136,0.010328,1.153275
661,"(rolls/buns, whole milk, soda)",0.001002,0.010323,0.739091
662,"(rolls/buns, yogurt, whole milk)",0.001337,0.012151,1.088685
663,"(whole milk, soda, sausage)",0.001069,0.017719,1.523708


In [68]:
resultsinDataFrame.nlargest(n = 10, columns = 'Lift')

Unnamed: 0,Items combination,Support,Confidence,Lift
664,"(yogurt, whole milk, sausage)",0.00147,0.024363,2.182917
301,"(citrus fruit, specialty chocolate)",0.001403,0.026415,1.653762
373,"(tropical fruit, flour)",0.001069,0.109589,1.617141
109,"(beverages, sausage)",0.001537,0.092742,1.536764
663,"(whole milk, soda, sausage)",0.001069,0.017719,1.523708
487,"(napkins, pastry)",0.001738,0.07855,1.518529
576,"(processed cheese, root vegetables)",0.001069,0.105263,1.513019
439,"(hard cheese, pip fruit)",0.001069,0.072727,1.482586
635,"(yogurt, soft cheese)",0.00127,0.126667,1.474952
339,"(curd, sausage)",0.002941,0.087302,1.446615
