In [None]:
# !pip install mlxtend

In [None]:
# !pip install pycaret==2.3

In [None]:
from pycaret.utils import enable_colab
enable_colab()

Colab mode enabled.


In [None]:
import pycaret
pycaret.__version__

'2.3.0'

In [None]:
import pandas as pd
pd.set_option('display.max_rows', None)
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

from mlxtend.frequent_patterns import association_rules, apriori
from pycaret.arules import plot_model

In [None]:
basket = pd.read_csv("/content/bread_basket.csv")

In [None]:
basket.head()

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
0,1,Bread,30-10-2016 09:58,morning,weekend
1,2,Scandinavian,30-10-2016 10:05,morning,weekend
2,2,Scandinavian,30-10-2016 10:05,morning,weekend
3,3,Hot chocolate,30-10-2016 10:07,morning,weekend
4,3,Jam,30-10-2016 10:07,morning,weekend


In [None]:
# Total number of transactions
print(basket['Transaction'].max())

# Total number of unique items
print(len(basket['Item'].unique()))

9684
94


In [None]:
basket.loc[basket['Transaction']==3]

Unnamed: 0,Transaction,Item,date_time,period_day,weekday_weekend
3,3,Hot chocolate,30-10-2016 10:07,morning,weekend
4,3,Jam,30-10-2016 10:07,morning,weekend
5,3,Cookies,30-10-2016 10:07,morning,weekend


In [None]:
basket = basket.groupby(by=['Transaction', 'Item'])['Item'].count().reset_index(name='Item_Count')
basket = basket.pivot_table(index='Transaction', columns='Item', values='Item_Count', aggfunc='sum').fillna(0).astype(bool)

In [None]:
basket.head()

Item,Adjustment,Afternoon with the baker,Alfajores,Argentina Night,Art Tray,Bacon,Baguette,Bakewell,Bare Popcorn,Basket,...,The BART,The Nomad,Tiffin,Toast,Truffles,Tshirt,Valentine's card,Vegan Feast,Vegan mincepie,Victorian Sponge
Transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:
support = basket.mean().sort_values(ascending=False)
support.head(10)

Item
Coffee           0.478394
Bread            0.327205
Tea              0.142631
Cake             0.103856
Pastry           0.086107
Sandwich         0.071844
Medialuna        0.061807
Hot chocolate    0.058320
Cookies          0.054411
Brownie          0.040042
dtype: float64

In [None]:
basket_expanded = basket.copy()
basket_expanded['Coffee & Bread'] = np.logical_and(basket_expanded['Coffee'], basket_expanded['Bread'])
basket_expanded['Coffee & Tea'] = np.logical_and(basket_expanded['Coffee'], basket_expanded['Tea'])
basket_expanded['Coffee & Cake'] = np.logical_and(basket_expanded['Coffee'], basket_expanded['Cake'])

support = basket_expanded.mean().sort_values(ascending=False)
support.head(10)

Item
Coffee            0.478394
Bread             0.327205
Tea               0.142631
Cake              0.103856
Coffee & Bread    0.090016
Pastry            0.086107
Sandwich          0.071844
Medialuna         0.061807
Hot chocolate     0.058320
Coffee & Cake     0.054728
dtype: float64

In [None]:
# Confidence value of if Coffee then Bread 
print(support['Coffee & Bread']/support['Coffee'])

# Confidence value of if Coffee then Cake 
print(support['Coffee & Cake']/support['Coffee'])

0.1881625441696113
0.11439929328621908


In [None]:
# Lift value of if Coffee then Bread 
print(support['Coffee & Bread']/(support['Coffee']*support['Bread']))

# Lift value of if Coffee then Cake 
print(support['Coffee & Cake']/(support['Coffee']*support['Cake']))

0.575059244612648
1.1015150670946732


In [None]:
# Leverage value of if Coffee then Bread 
print(support['Coffee & Bread'] - (support['Coffee']*support['Bread']))

# LLeverageift value of if Coffee then Cake 
print(support['Coffee & Cake'] - (support['Coffee']*support['Cake']))

-0.06651732451054836
0.005043699519652715


In [None]:
# Conviction value of if Coffee then Bread 
print(support['Coffee']*(1-support['Bread']) / (support['Coffee'] - support['Coffee & Bread']))

# Conviction value of if Coffee then Cake 
print(support['Coffee']*(1-support['Cake']) / (support['Coffee'] - support['Coffee & Cake']))

0.8287305559052395
1.0119048917589808


In [None]:
# Apply Apriori algorithm
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)
frequent_itemsets.sort_values(by='support', ascending=False).head(20)

Unnamed: 0,support,itemsets
6,0.478394,(Coffee)
2,0.327205,(Bread)
26,0.142631,(Tea)
4,0.103856,(Cake)
34,0.090016,"(Coffee, Bread)"
19,0.086107,(Pastry)
21,0.071844,(Sandwich)
16,0.061807,(Medialuna)
12,0.05832,(Hot chocolate)
42,0.054728,"(Coffee, Cake)"


In [None]:
# Compute association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

In [None]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Alfajores),(Coffee),0.036344,0.478394,0.019651,0.540698,1.130235,0.002264,1.135648
1,(Cake),(Coffee),0.103856,0.478394,0.054728,0.526958,1.101515,0.005044,1.102664
2,(Cookies),(Coffee),0.054411,0.478394,0.028209,0.518447,1.083723,0.002179,1.083174
3,(Hot chocolate),(Coffee),0.05832,0.478394,0.029583,0.507246,1.060311,0.001683,1.058553
4,(Juice),(Coffee),0.038563,0.478394,0.020602,0.534247,1.11675,0.002154,1.119919
5,(Medialuna),(Coffee),0.061807,0.478394,0.035182,0.569231,1.189878,0.005614,1.210871
6,(Pastry),(Coffee),0.086107,0.478394,0.047544,0.552147,1.154168,0.006351,1.164682
7,(Sandwich),(Coffee),0.071844,0.478394,0.038246,0.532353,1.112792,0.003877,1.115384
8,(Scone),(Coffee),0.034548,0.478394,0.018067,0.522936,1.093107,0.001539,1.093366
9,(Spanish Brunch),(Coffee),0.018172,0.478394,0.010882,0.598837,1.251766,0.002189,1.300235


In [None]:
# Plot rules
plot_model(rules, plot = '2d')