In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.api.types import CategoricalDtype
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import warnings
warnings.filterwarnings('ignore')

In [14]:
data = pd.read_csv('./data/Groceries_dataset.csv')

In [16]:
data

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk
...,...,...,...
38760,4471,08-10-2014,sliced cheese
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,fruit/vegetable juice


In [26]:
# Transforming data for apriori implementation
basket = data.groupby(['Member_number', 'itemDescription'])['itemDescription'].count().unstack().reset_index().fillna(0).set_index('Member_number')

# Encoding the data: 0 if not purchased, 1 if purchased
def encode_units(x):
    return 1 if x >= 1 else 0

basket = basket.applymap(encode_units)

In [28]:
# Applying apriori conditions
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)

# Generating the association rules accordingly
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# association rules of head 15
top_rules = rules.sort_values(by='lift', ascending=False).head(50)

# Converting antecedents and consequents to strings for plotting
top_rules['antecedents'] = top_rules['antecedents'].apply(lambda x: ', '.join(list(x)))
top_rules['consequents'] = top_rules['consequents'].apply(lambda x: ', '.join(list(x)))

# Creating a table displaying the top 15 rules
top_rules_table = top_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].round(2)

# Displaying the top 15 association rules in table format
from IPython.display import display, HTML
display(HTML(top_rules_table.to_html(index=False)))
rules.head(50)

antecedents,consequents,support,confidence,lift
"whole milk, other vegetables, sausage","rolls/buns, yogurt",0.01,0.27,2.43
"rolls/buns, yogurt","whole milk, other vegetables, sausage",0.01,0.12,2.43
"rolls/buns, other vegetables, yogurt","whole milk, sausage",0.01,0.26,2.43
"whole milk, sausage","rolls/buns, other vegetables, yogurt",0.01,0.13,2.43
"whole milk, sausage","curd, yogurt",0.01,0.09,2.32
"curd, yogurt","whole milk, sausage",0.01,0.25,2.32
"whole milk, rolls/buns, sausage","other vegetables, yogurt",0.01,0.28,2.32
"other vegetables, yogurt","whole milk, rolls/buns, sausage",0.01,0.11,2.32
"rolls/buns, sausage","whole milk, other vegetables, yogurt",0.01,0.17,2.3
"whole milk, other vegetables, yogurt","rolls/buns, sausage",0.01,0.19,2.3


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(beef),(UHT-milk),0.119548,0.078502,0.010518,0.087983,1.120775,1.0,0.001133,1.010396,0.122392,0.056088,0.010289,0.110985
1,(UHT-milk),(beef),0.078502,0.119548,0.010518,0.133987,1.120775,1.0,0.001133,1.016672,0.11694,0.056088,0.016399,0.110985
2,(UHT-milk),(bottled beer),0.078502,0.158799,0.014879,0.189542,1.193597,1.0,0.002413,1.037933,0.176014,0.066897,0.036547,0.141621
3,(bottled beer),(UHT-milk),0.158799,0.078502,0.014879,0.0937,1.193597,1.0,0.002413,1.016769,0.192815,0.066897,0.016492,0.141621
4,(UHT-milk),(bottled water),0.078502,0.213699,0.021293,0.271242,1.269268,1.0,0.004517,1.07896,0.230217,0.078598,0.073181,0.185441
5,(bottled water),(UHT-milk),0.213699,0.078502,0.021293,0.09964,1.269268,1.0,0.004517,1.023477,0.269801,0.078598,0.022939,0.185441
6,(UHT-milk),(brown bread),0.078502,0.135967,0.012314,0.156863,1.153681,1.0,0.00164,1.024783,0.144557,0.060914,0.024184,0.123714
7,(brown bread),(UHT-milk),0.135967,0.078502,0.012314,0.090566,1.153681,1.0,0.00164,1.013266,0.154172,0.060914,0.013092,0.123714
8,(butter),(UHT-milk),0.126475,0.078502,0.010518,0.083164,1.059394,1.0,0.00059,1.005085,0.064181,0.05409,0.00506,0.108576
9,(UHT-milk),(butter),0.078502,0.126475,0.010518,0.133987,1.059394,1.0,0.00059,1.008674,0.06084,0.05409,0.008599,0.108576


In [32]:
min_confidence = 0.06

# Generating the association rules accordingly
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Filter rules based on the minimum confidence
rules = rules[rules['confidence'] >= min_confidence]

In [34]:
# Sort the rules table by confidence in descending order.
rules_sorted = rules.sort_values(by='confidence', ascending=False)
# Display the sorted rules table.
rules_sorted

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
7022,"(meat, domestic eggs)",(whole milk),0.013084,0.458184,0.010262,0.784314,1.711789,1.0,0.004267,2.512057,0.421328,0.022259,0.601920,0.403355
5710,"(chocolate, fruit/vegetable juice)",(whole milk),0.014366,0.458184,0.010775,0.750000,1.636898,1.0,0.004192,2.167265,0.394760,0.023333,0.538589,0.386758
15143,"(rolls/buns, other vegetables, bottled water, ...",(whole milk),0.014110,0.458184,0.010518,0.745455,1.626978,1.0,0.004053,2.128564,0.390879,0.022778,0.530200,0.384205
11633,"(pip fruit, bottled water, yogurt)",(whole milk),0.013853,0.458184,0.010262,0.740741,1.616689,1.0,0.003914,2.089863,0.386811,0.022222,0.521500,0.381569
11997,"(rolls/buns, yogurt, brown bread)",(whole milk),0.017445,0.458184,0.012827,0.735294,1.604802,1.0,0.004834,2.046862,0.383561,0.027716,0.511447,0.381645
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4111,(bottled water),"(soda, whipped/sour cream)",0.213699,0.048486,0.012827,0.060024,1.237955,1.0,0.002466,1.012274,0.244457,0.051440,0.012126,0.162287
10886,(bottled water),"(whole milk, other vegetables, bottled beer)",0.213699,0.038738,0.012827,0.060024,1.549494,1.0,0.004549,1.022645,0.451008,0.053533,0.022144,0.195575
3491,(bottled water),"(coffee, other vegetables)",0.213699,0.049256,0.012827,0.060024,1.218612,1.0,0.002301,1.011456,0.228150,0.051282,0.011326,0.160220
11684,(bottled water),"(whole milk, rolls/buns, sausage)",0.213699,0.048743,0.012827,0.060024,1.231440,1.0,0.002411,1.012001,0.239021,0.051387,0.011859,0.161591


In [36]:
# Import necessary libraries
import pandas as pd

# Assuming 'rules_sorted' is your DataFrame
# Replace 'your_keyword' with the actual keyword you want to search for
keyword = 'whole milk'

# Convert 'antecedents' and 'consequents' to strings in rules_sorted, if they are not already strings
rules_sorted['antecedents'] = rules_sorted['antecedents'].apply(lambda x: ', '.join(list(x)))
rules_sorted['consequents'] = rules_sorted['consequents'].apply(lambda x: ', '.join(list(x)))

# Use the .str.contains() method to filter the DataFrame
filtered_rules = rules_sorted[rules_sorted['antecedents'].str.contains(keyword)]

# Display the filtered DataFrame
filtered_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
13044,"whole milk, frozen meals, rolls/buns",other vegetables,0.015649,0.376603,0.010005,0.639344,1.697659,1.0,0.004112,1.728509,0.417487,0.026174,0.421467,0.332956
10790,"whole milk, soda, UHT-milk",other vegetables,0.015649,0.376603,0.010005,0.639344,1.697659,1.0,0.004112,1.728509,0.417487,0.026174,0.421467,0.332956
12975,"whole milk, soda, frankfurter",other vegetables,0.024885,0.376603,0.015136,0.608247,1.615088,1.0,0.005764,1.591302,0.390557,0.039177,0.371584,0.324219
12990,"whole milk, yogurt, frankfurter",other vegetables,0.025141,0.376603,0.015136,0.602041,1.598607,1.0,0.005668,1.566484,0.384112,0.039151,0.361628,0.321116
11268,"whole milk, bottled water, frankfurter",other vegetables,0.021036,0.376603,0.012571,0.597561,1.586712,1.0,0.004648,1.549046,0.377711,0.032645,0.354441,0.315470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14222,"whole milk, other vegetables","tropical fruit, shopping bags",0.191380,0.044638,0.011544,0.060322,1.351345,1.0,0.003001,1.016690,0.321531,0.051429,0.016416,0.159471
12460,"whole milk, other vegetables","sausage, citrus fruit",0.191380,0.039251,0.011544,0.060322,1.536824,1.0,0.004033,1.022423,0.431980,0.052693,0.021932,0.177220
14166,"whole milk, other vegetables","whipped/sour cream, sausage",0.191380,0.038994,0.011544,0.060322,1.546935,1.0,0.004082,1.022696,0.437239,0.052755,0.022193,0.178187
11259,"whole milk, other vegetables","curd, bottled water",0.191380,0.033094,0.011544,0.060322,1.822745,1.0,0.005211,1.028976,0.558206,0.054217,0.028160,0.204579
