In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("Order3.csv")

In [14]:
df

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,10/30/2016 9:58,Morning,Weekend
1,2,Scandinavian,10/30/2016 10:05,Morning,Weekend
2,2,Scandinavian,10/30/2016 10:05,Morning,Weekend
3,3,Hot chocolate,10/30/2016 10:07,Morning,Weekend
4,3,Jam,10/30/2016 10:07,Morning,Weekend
...,...,...,...,...,...
20502,9682,Coffee,9/4/2017 14:32,Afternoon,Weekend
20503,9682,Tea,9/4/2017 14:32,Afternoon,Weekend
20504,9683,Coffee,9/4/2017 14:57,Afternoon,Weekend
20505,9683,Pastry,9/4/2017 14:57,Afternoon,Weekend


In [3]:
df['Items'] = df['Items'].str.strip()

In [4]:
#Drop missing values
df.dropna(inplace=True)

In [16]:
#Transaction List
num_unique_transactions = df['TransactionNo'].nunique()

transaction_series = df.groupby('TransactionNo')['Items'].apply(list)

transactions = list(transaction_series)

print(transactions[0:5])
print(len(transactions))


[['Bread'], ['Scandinavian', 'Scandinavian'], ['Hot chocolate', 'Jam', 'Cookies'], ['Muffin'], ['Coffee', 'Pastry', 'Bread']]
9465


In [6]:
#As Apriori algorithm needs encoding so we will apply OHE
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_onehot = pd.DataFrame(te_ary,columns=te.columns_)

df_onehot.head()

Unnamed: 0,Adjustment,Afternoon with the baker,Alfajores,Argentina Night,Art Tray,Bacon,Baguette,Bakewell,Bare Popcorn,Basket,...,The BART,The Nomad,Tiffin,Toast,Truffles,Tshirt,Valentine's card,Vegan Feast,Vegan mincepie,Victorian Sponge
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [7]:
#Train Apriori
from mlxtend.frequent_patterns import apriori, association_rules

In [8]:
#First, calculate minimum support using the formula....
min_support = 50 / len(transactions)

#Second, train Apriori
frequent_itemsets = apriori(df_onehot,min_support=min_support,use_colnames=True)

print(frequent_itemsets.sort_values(by='support',ascending=False).head(10))

     support         itemsets
6   0.478394         (Coffee)
2   0.327205          (Bread)
30  0.142631            (Tea)
4   0.103856           (Cake)
41  0.090016  (Bread, Coffee)
22  0.086107         (Pastry)
24  0.071844       (Sandwich)
19  0.061807      (Medialuna)
14  0.058320  (Hot chocolate)
56  0.054728   (Cake, Coffee)


In [9]:
#Generate rules (considering lift here for positive relation)
rules = association_rules(frequent_itemsets,metric="lift",min_threshold=1.0)

#Sorting rules by confidence and then by lift for strong and meaningful rules only
rules_sorted = rules.sort_values(by=['confidence','lift'],ascending=False)

print(rules_sorted.head(10))

              antecedents consequents  antecedent support  consequent support  \
30     (Keeping It Local)    (Coffee)            0.006656            0.478394   
49                (Toast)    (Coffee)            0.033597            0.478394   
38                (Salad)    (Coffee)            0.010460            0.478394   
89  (Cake, Hot chocolate)    (Coffee)            0.011410            0.478394   
45       (Spanish Brunch)    (Coffee)            0.018172            0.478394   
33            (Medialuna)    (Coffee)            0.061807            0.478394   
36               (Pastry)    (Coffee)            0.086107            0.478394   
46               (Tiffin)    (Coffee)            0.015425            0.478394   
0             (Alfajores)    (Coffee)            0.036344            0.478394   
22    (Hearty & Seasonal)    (Coffee)            0.010565            0.478394   

     support  confidence      lift  representativity  leverage  conviction  \
30  0.005388    0.809524  1.69

In [13]:
item_in = input("Enter an item to find rules for (e.g., Bread,Coffee): ").strip()

item_set = frozenset({item_in})

matching_rules = rules_sorted[rules_sorted['antecedents'] == item_set]

if matching_rules.empty:
    print(f"\nNo rules found where '{item_in}' is the only item.")
else:
    print(f"\n--- Rules for 'IF customer buys {item_in}' ---")
    print(matching_rules)

Enter an item to find rules for (e.g., Bread,Coffee):  Pastry



--- Rules for 'IF customer buys Pastry' ---
   antecedents      consequents  antecedent support  consequent support  \
36    (Pastry)         (Coffee)            0.086107            0.478394   
5     (Pastry)          (Bread)            0.086107            0.327205   
85    (Pastry)  (Bread, Coffee)            0.086107            0.090016   
62    (Pastry)      (Medialuna)            0.086107            0.061807   
56    (Pastry)  (Hot chocolate)            0.086107            0.058320   

     support  confidence      lift  representativity  leverage  conviction  \
36  0.047544    0.552147  1.154168               1.0  0.006351    1.164682   
5   0.029160    0.338650  1.034977               1.0  0.000985    1.017305   
85  0.011199    0.130061  1.444872               1.0  0.003448    1.046033   
62  0.009192    0.106748  1.727135               1.0  0.003870    1.050313   
56  0.005705    0.066258  1.136103               1.0  0.000683    1.008501   

    zhangs_metric   jaccard  certai