In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [4]:
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth
from mlxtend.preprocessing import TransactionEncoder


In [2]:
df=pd.read_csv(r"/content/Groceries_dataset.csv")

In [3]:
df.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38765 entries, 0 to 38764
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Member_number    38765 non-null  int64 
 1   Date             38765 non-null  object
 2   itemDescription  38765 non-null  object
dtypes: int64(1), object(2)
memory usage: 908.7+ KB


In [6]:
df.isna().sum()

Unnamed: 0,0
Member_number,0
Date,0
itemDescription,0


In [7]:
df['itemDescription'].nunique()


167

In [8]:
df['itemDescription'].value_counts()


Unnamed: 0_level_0,count
itemDescription,Unnamed: 1_level_1
whole milk,2502
other vegetables,1898
rolls/buns,1716
soda,1514
yogurt,1334
...,...
rubbing alcohol,5
bags,4
baby cosmetics,3
kitchen utensil,1


In [9]:
transactions = df.groupby(['Member_number', 'Date'])['itemDescription'].apply(list)
print(transactions)


Member_number  Date      
1000           15-03-2015    [sausage, whole milk, semi-finished bread, yog...
               24-06-2014                    [whole milk, pastry, salty snack]
               24-07-2015                       [canned beer, misc. beverages]
               25-11-2015                          [sausage, hygiene articles]
               27-05-2015                           [soda, pickled vegetables]
                                                   ...                        
4999           24-01-2015    [tropical fruit, berries, other vegetables, yo...
               26-12-2015                               [bottled water, herbs]
5000           09-03-2014                      [fruit/vegetable juice, onions]
               10-02-2015         [soda, root vegetables, semi-finished bread]
               16-11-2014                     [bottled beer, other vegetables]
Name: itemDescription, Length: 14963, dtype: object


In [10]:
#Encode the Transactions
te = TransactionEncoder()
transactions_matrix = te.fit_transform(transactions)
encoded_df = pd.DataFrame(transactions_matrix, columns=te.columns_,dtype=int)

print(encoded_df.head())


   Instant food products  UHT-milk  abrasive cleaner  artif. sweetener  \
0                      0         0                 0                 0   
1                      0         0                 0                 0   
2                      0         0                 0                 0   
3                      0         0                 0                 0   
4                      0         0                 0                 0   

   baby cosmetics  bags  baking powder  bathroom cleaner  beef  berries  ...  \
0               0     0              0                 0     0        0  ...   
1               0     0              0                 0     0        0  ...   
2               0     0              0                 0     0        0  ...   
3               0     0              0                 0     0        0  ...   
4               0     0              0                 0     0        0  ...   

   turkey  vinegar  waffles  whipped/sour cream  whisky  white bread  \
0 

In [11]:
freqitems_apriori = apriori(encoded_df, min_support=0.001, use_colnames=True)




In [12]:
freqitems_apriori.sort_values(by= "support",ascending=False)


Unnamed: 0,support,itemsets
146,0.157923,(whole milk)
90,0.122101,(other vegetables)
109,0.110005,(rolls/buns)
123,0.097106,(soda)
147,0.085879,(yogurt)
...,...,...
162,0.001002,"(canned beer, beef)"
34,0.001002,(cooking chocolate)
105,0.001002,(ready soups)
725,0.001002,"(tropical fruit, sugar)"


In [13]:
#Generate association rules from frequent itemsets
rules = association_rules(freqitems_apriori, metric="confidence", min_threshold=0.1,num_itemsets=3)
rules.head(10)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(UHT-milk),(other vegetables),0.021386,0.122101,0.002139,0.1,0.818993,1.0,-0.000473,0.975443,-0.184234,0.01513,-0.025175,0.058758
1,(UHT-milk),(whole milk),0.021386,0.157923,0.00254,0.11875,0.751949,1.0,-0.000838,0.955549,-0.252105,0.014367,-0.046519,0.067416
2,(beef),(whole milk),0.03395,0.157923,0.004678,0.137795,0.872548,1.0,-0.000683,0.976656,-0.131343,0.024991,-0.023902,0.083709
3,(berries),(other vegetables),0.021787,0.122101,0.002673,0.122699,1.004899,1.0,1.3e-05,1.000682,0.004984,0.01893,0.000681,0.072297
4,(berries),(whole milk),0.021787,0.157923,0.002272,0.104294,0.660414,1.0,-0.001168,0.940127,-0.344543,0.012806,-0.063686,0.059341
5,(beverages),(other vegetables),0.016574,0.122101,0.001738,0.104839,0.858622,1.0,-0.000286,0.980716,-0.143419,0.012689,-0.019663,0.059535
6,(beverages),(soda),0.016574,0.097106,0.001871,0.112903,1.162678,1.0,0.000262,1.017808,0.142275,0.016736,0.017496,0.066087
7,(beverages),(whole milk),0.016574,0.157923,0.001938,0.116935,0.740459,1.0,-0.000679,0.953585,-0.262765,0.011232,-0.048674,0.064604
8,(bottled beer),(other vegetables),0.045312,0.122101,0.004678,0.103245,0.845568,1.0,-0.000854,0.978973,-0.160585,0.028747,-0.021479,0.07078
9,(bottled beer),(whole milk),0.045312,0.157923,0.007151,0.157817,0.99933,1.0,-5e-06,0.999874,-0.000702,0.036469,-0.000126,0.101549


In [14]:
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
rules


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
129,"(sausage, yogurt)",(whole milk),0.005748,0.157923,0.001470,0.255814,1.619866,1.0,0.000563,1.131541,0.384877,0.009065,0.116250,0.132562
121,"(rolls/buns, sausage)",(whole milk),0.005347,0.157923,0.001136,0.212500,1.345594,1.0,0.000292,1.069304,0.258214,0.007007,0.064813,0.109847
126,"(sausage, soda)",(whole milk),0.005948,0.157923,0.001069,0.179775,1.138374,1.0,0.000130,1.026642,0.122281,0.006568,0.025951,0.093273
99,(semi-finished bread),(whole milk),0.009490,0.157923,0.001671,0.176056,1.114825,1.0,0.000172,1.022008,0.103985,0.010081,0.021534,0.093318
124,"(rolls/buns, yogurt)",(whole milk),0.007819,0.157923,0.001337,0.170940,1.082428,1.0,0.000102,1.015701,0.076751,0.008130,0.015459,0.089702
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47,(hamburger meat),(other vegetables),0.021854,0.122101,0.002205,0.100917,0.826507,1.0,-0.000463,0.976438,-0.176685,0.015559,-0.024130,0.059490
84,(pip fruit),(rolls/buns),0.049054,0.110005,0.004946,0.100817,0.916483,1.0,-0.000451,0.989783,-0.087448,0.032090,-0.010323,0.072887
68,(pip fruit),(other vegetables),0.049054,0.122101,0.004946,0.100817,0.825688,1.0,-0.001044,0.976330,-0.181671,0.029755,-0.024244,0.070660
94,(soft cheese),(rolls/buns),0.010025,0.110005,0.001002,0.100000,0.909052,1.0,-0.000100,0.988884,-0.091784,0.008422,-0.011241,0.054557


In [15]:
feqitems_fp = fpgrowth(encoded_df, min_support=0.001, use_colnames=True)
print(feqitems_fp.head())




    support               itemsets
0  0.157923           (whole milk)
1  0.085879               (yogurt)
2  0.060349              (sausage)
3  0.009490  (semi-finished bread)
4  0.051728               (pastry)


In [16]:
feqitems_fp.sort_values(by= "support",ascending=False)


Unnamed: 0,support,itemsets
0,0.157923,(whole milk)
17,0.122101,(other vegetables)
13,0.110005,(rolls/buns)
9,0.097106,(soda)
1,0.085879,(yogurt)
...,...,...
271,0.001002,"(canned beer, beef)"
742,0.001002,"(tropical fruit, cat food)"
278,0.001002,"(domestic eggs, white bread)"
167,0.001002,"(other vegetables, semi-finished bread)"


In [17]:
rules_fpgrowth = association_rules(feqitems_fp, metric='confidence', min_threshold=0.1,num_itemsets=3)
rules_fpgrowth.head(10)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(yogurt),(whole milk),0.085879,0.157923,0.011161,0.129961,0.82294,1.0,-0.002401,0.967861,-0.190525,0.047975,-0.033206,0.100317
1,"(whole milk, yogurt)",(other vegetables),0.011161,0.122101,0.001136,0.101796,0.833705,1.0,-0.000227,0.977394,-0.167857,0.008599,-0.023129,0.055551
2,"(other vegetables, yogurt)",(whole milk),0.008087,0.157923,0.001136,0.140496,0.889649,1.0,-0.000141,0.979724,-0.111151,0.006891,-0.020695,0.073845
3,"(whole milk, yogurt)",(rolls/buns),0.011161,0.110005,0.001337,0.11976,1.088685,1.0,0.000109,1.011083,0.08238,0.011154,0.010962,0.065956
4,"(rolls/buns, yogurt)",(whole milk),0.007819,0.157923,0.001337,0.17094,1.082428,1.0,0.000102,1.015701,0.076751,0.00813,0.015459,0.089702
5,(sausage),(whole milk),0.060349,0.157923,0.008955,0.148394,0.939663,1.0,-0.000575,0.988811,-0.063965,0.042784,-0.011316,0.102551
6,"(whole milk, sausage)",(yogurt),0.008955,0.085879,0.00147,0.164179,1.91176,1.0,0.000701,1.093681,0.481231,0.015748,0.085657,0.09065
7,"(whole milk, yogurt)",(sausage),0.011161,0.060349,0.00147,0.131737,2.182917,1.0,0.000797,1.082219,0.548014,0.020992,0.075973,0.07805
8,"(sausage, yogurt)",(whole milk),0.005748,0.157923,0.00147,0.255814,1.619866,1.0,0.000563,1.131541,0.384877,0.009065,0.11625,0.132562
9,"(whole milk, sausage)",(rolls/buns),0.008955,0.110005,0.001136,0.126866,1.153275,1.0,0.000151,1.019311,0.134105,0.009643,0.018945,0.068597


In [18]:
rules_fpgrowth = rules_fpgrowth.sort_values(['confidence', 'lift'], ascending =[False, False])
rules_fpgrowth


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
8,"(sausage, yogurt)",(whole milk),0.005748,0.157923,0.001470,0.255814,1.619866,1.0,0.000563,1.131541,0.384877,0.009065,0.116250,0.132562
10,"(rolls/buns, sausage)",(whole milk),0.005347,0.157923,0.001136,0.212500,1.345594,1.0,0.000292,1.069304,0.258214,0.007007,0.064813,0.109847
12,"(sausage, soda)",(whole milk),0.005948,0.157923,0.001069,0.179775,1.138374,1.0,0.000130,1.026642,0.122281,0.006568,0.025951,0.093273
13,(semi-finished bread),(whole milk),0.009490,0.157923,0.001671,0.176056,1.114825,1.0,0.000172,1.022008,0.103985,0.010081,0.021534,0.093318
4,"(rolls/buns, yogurt)",(whole milk),0.007819,0.157923,0.001337,0.170940,1.082428,1.0,0.000102,1.015701,0.076751,0.008130,0.015459,0.089702
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,(hamburger meat),(other vegetables),0.021854,0.122101,0.002205,0.100917,0.826507,1.0,-0.000463,0.976438,-0.176685,0.015559,-0.024130,0.059490
54,(pip fruit),(rolls/buns),0.049054,0.110005,0.004946,0.100817,0.916483,1.0,-0.000451,0.989783,-0.087448,0.032090,-0.010323,0.072887
55,(pip fruit),(other vegetables),0.049054,0.122101,0.004946,0.100817,0.825688,1.0,-0.001044,0.976330,-0.181671,0.029755,-0.024244,0.070660
105,(soft cheese),(rolls/buns),0.010025,0.110005,0.001002,0.100000,0.909052,1.0,-0.000100,0.988884,-0.091784,0.008422,-0.011241,0.054557
