In [2]:
pip install mlxtend


Collecting mlxtend
  Downloading mlxtend-0.23.4-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.4-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   --------------- ------------------------ 0.5/1.4 MB 4.2 MB/s eta 0:00:01
   ---------------------------------------- 1.4/1.4 MB 3.5 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.4
Note: you may need to restart the kernel to use updated packages.


In [6]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

df = pd.read_excel("Documents/Python/Online retail.xlsx" , header=None)
df.columns = ['Items']
df.dropna(inplace=True)


In [16]:
# Preprocessing
print("\n Preprocessing transactions")
transactions = df['Items'].apply(lambda x: x.split(','))
print(f"Total transactions: {len(transactions)}")
print(" Sample transaction:", transactions.iloc[0])


 Preprocessing transactions
Total transactions: 7501
 Sample transaction: ['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice', 'low fat yogurt', 'green tea', 'honey', 'salad', 'mineral water', 'salmon', 'antioxydant juice', 'frozen smoothie', 'spinach', 'olive oil']


In [22]:
# Transaction Encoding
print("\n🔹 Performing one-hot encoding...")
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

print(df_encoded.head())


🔹 Performing one-hot encoding...
    asparagus  almonds  antioxydant juice  asparagus  avocado  babies food  \
0       False     True               True      False     True        False   
1       False    False              False      False    False        False   
2       False    False              False      False    False        False   
3       False    False              False      False     True        False   
4       False    False              False      False    False        False   

   bacon  barbecue sauce  black tea  blueberries  ...  turkey  vegetables mix  \
0  False           False      False        False  ...   False            True   
1  False           False      False        False  ...   False           False   
2  False           False      False        False  ...   False           False   
3  False           False      False        False  ...    True           False   
4  False           False      False        False  ...   False           False   

   water s

In [24]:
# Run Apriori
frequent_itemsets = apriori(df_encoded, min_support=0.02, use_colnames=True)
print(f" Found {len(frequent_itemsets)} frequent itemsets.")
print(" Sample frequent itemsets:")
print(frequent_itemsets.head())

 Found 103 frequent itemsets.
 Sample frequent itemsets:
    support    itemsets
0  0.020397   (almonds)
1  0.033329   (avocado)
2  0.033729  (brownies)
3  0.087188   (burgers)
4  0.030129    (butter)


In [26]:
# Generate Association Rules

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
print(f" Generated {len(rules)} rules.")
print("\n Top 10 rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10))

 Generated 94 rules.

 Top 10 rules:
       antecedents      consequents   support  confidence      lift
0           (eggs)        (burgers)  0.028796    0.160237  1.837830
1        (burgers)           (eggs)  0.028796    0.330275  1.837830
2   (french fries)        (burgers)  0.021997    0.128705  1.476173
3        (burgers)   (french fries)  0.021997    0.252294  1.476173
4  (mineral water)        (burgers)  0.024397    0.102349  1.173883
5        (burgers)  (mineral water)  0.024397    0.279817  1.173883
6        (burgers)      (spaghetti)  0.021464    0.246177  1.413918
7      (spaghetti)        (burgers)  0.021464    0.123277  1.413918
8  (mineral water)           (cake)  0.027463    0.115213  1.421397
9           (cake)  (mineral water)  0.027463    0.338816  1.421397


1. What is Lift and why is it important in Association Rules?
Lift = (Confidence of Rule) / (Support of Consequent)

It tells you how much more likely the consequent is, given the antecedent, compared to its general frequency.

Lift > 1: Positive association (useful)

Lift = 1: No association

Lift < 1: Negative association

2. What is Support and Confidence? How do you calculate them?
Support: Frequency of itemset appearing in the dataset.

Support (𝐴,𝐵) =Transactions containing A and B / Total transactions

Confidence: Likelihood that item B is purchased when A is.

Confidence(𝐴⇒𝐵)=Support(𝐴,𝐵)/ Support(𝐴)
 


3. What are some limitations or challenges of Association Rule Mining?

Too many rules: Difficult to interpret

Sparse data: Can lead to weak patterns

Requires threshold tuning: Bad thresholds → meaningless rules

Ignores sequence/timing: Doesn’t account for when items are bought

Binary format only: No support for quantity or price unless extended

