In [87]:
import numpy as np
import pandas as pd
import random

In [88]:
items = [
    {
        'transaction_id':1,
        'product_code': '4011',
        'product_description': 'Bananas',
        'qty': 5,
        'sales': 10.00
    },
    {
        'transaction_id':1,
        'product_code': '1',
        'product_description': 'Bread',
        'qty': 5,
        'sales': 10.00
    },
    {
        'transaction_id':1,
        'product_code': '2',
        'product_description': 'Milk',
        'qty': 5,
        'sales': 10.00
    },
    {
        'transaction_id':1,
        'product_code': '3',
        'product_description': 'Cookies',
        'qty': 5,
        'sales': 10.00
    },  
    {
        'transaction_id':1,
        'product_code': '4',
        'product_description': 'Hot Dogs',
        'qty': 5,
        'sales': 10.00
    }, 
    {
        'transaction_id':1,
        'product_code': '5',
        'product_description': 'HD Buns',
        'qty': 5,
        'sales': 10.00
    }, 
    {
        'transaction_id':1,
        'product_code': '6',
        'product_description': 'Ketchup',
        'qty': 5,
        'sales': 10.00
    }, 
    {
        'transaction_id':1,
        'product_code': '7',
        'product_description': 'Mustard',
        'qty': 5,
        'sales': 10.00
    },  
    {
        'transaction_id':1,
        'product_code': '8',
        'product_description': 'Pickles',
        'qty': 5,
        'sales': 10.00
    },    
    {
        'transaction_id':1,
        'product_code': '9',
        'product_description': 'Relish',
        'qty': 5,
        'sales': 10.00
    },  
    {
        'transaction_id':1,
        'product_code': '10',
        'product_description': 'HB Buns',
        'qty': 5,
        'sales': 10.00
    },    
    {
        'transaction_id':1,
        'product_code': '11',
        'product_description': 'Hamburgers',
        'qty': 5,
        'sales': 10.00
    },   
    {
        'transaction_id':1,
        'product_code': '12',
        'product_description': 'Cheese',
        'qty': 5,
        'sales': 10.00
    }, 
    {
        'transaction_id':1,
        'product_code': '13',
        'product_description': 'Beer',
        'qty': 5,
        'sales': 10.00
    },    
    {
        'transaction_id':1,
        'product_code': '14',
        'product_description': 'Wine',
        'qty': 5,
        'sales': 10.00
    },   
    {
        'transaction_id':1,
        'product_code': '15',
        'product_description': 'Diapers',
        'qty': 5,
        'sales': 10.00
    },                                    

]

In [89]:
items_ds = pd.DataFrame(items)
items_ds.head()

Unnamed: 0,transaction_id,product_code,product_description,qty,sales
0,1,4011,Bananas,5,10.0
1,1,1,Bread,5,10.0
2,1,2,Milk,5,10.0
3,1,3,Cookies,5,10.0
4,1,4,Hot Dogs,5,10.0


In [90]:
def generate_transactions(base_items, dtc=10, max_items=8, mqpi=10, minia=.99, maxia=5.99):
    """Generate a random set of transactions using a base dataset of items to choose from.
    
    Args:
        base_items (list): list containing the base dataset of items.
        dtc (int): Desired Transaction Count - Number of transactions to generate.
        max_items (int): Maximum number of items per transaction.
        mqpi (int): Maximum quantity per item.
        minia (float): Minimum item price for each item.
        maxia (float): Maximum item price for each item.

    """

    transaction_list = []
    for tx_id in range(1, dtc + 1):
        num_items = random.randint(1,max_items)

        for _ in range(num_items):
            item = random.choice(base_items)
            qty = random.randint(1, mqpi)
            price_per_unit = round(random.uniform(minia, maxia), 2)
            sales = round(qty * price_per_unit, 2)

            transaction_list.append({
                'transaction_id': tx_id,
                'product_code': item['product_code'],
                'product_description': item['product_description'],
                'qty': qty,
                'price_per_unit': price_per_unit,
                'sales': sales
            })
    return pd.DataFrame(transaction_list)

In [91]:
transactions = generate_transactions(items, 25, 8 )
transactions

Unnamed: 0,transaction_id,product_code,product_description,qty,price_per_unit,sales
0,1,5,HD Buns,8,3.38,27.04
1,1,9,Relish,8,5.79,46.32
2,1,13,Beer,3,5.51,16.53
3,2,7,Mustard,7,5.88,41.16
4,2,15,Diapers,7,1.35,9.45
...,...,...,...,...,...,...
86,24,1,Bread,7,2.72,19.04
87,24,12,Cheese,7,5.04,35.28
88,25,6,Ketchup,9,3.71,33.39
89,25,1,Bread,6,4.18,25.08


In [92]:
grouped = transactions.groupby('transaction_id')['product_description'].apply(list)

In [93]:
grouped

transaction_id
1                               [HD Buns, Relish, Beer]
2         [Mustard, Diapers, Mustard, HD Buns, Diapers]
3     [Bread, HD Buns, Hamburgers, Beer, Hamburgers,...
4                           [Diapers, HD Buns, Ketchup]
5                                    [Diapers, Bananas]
6                                [Beer, Bread, HD Buns]
7                                              [Cheese]
8     [Cookies, Cookies, Bread, Pickles, Relish, Hot...
9                      [Wine, Mustard, Cookies, Cheese]
10    [Ketchup, Hot Dogs, Beer, Cheese, Mustard, HB ...
11                            [Bread, HD Buns, Diapers]
12    [Cheese, HD Buns, Pickles, HD Buns, Cookies, B...
13    [Cookies, Wine, Mustard, Wine, Bananas, Cheese...
14                                    [Bananas, Relish]
15                                      [Bananas, Milk]
16                      [Hot Dogs, Beer, HB Buns, Wine]
17                                    [Relish, Diapers]
18                           [Pic

In [94]:
# get rid of transactions with only 1 item
filtered = grouped[grouped.apply(lambda x: len(x) > 1)]
item_breakdown = filtered.tolist()

In [95]:
item_breakdown

[['HD Buns', 'Relish', 'Beer'],
 ['Mustard', 'Diapers', 'Mustard', 'HD Buns', 'Diapers'],
 ['Bread', 'HD Buns', 'Hamburgers', 'Beer', 'Hamburgers', 'Pickles'],
 ['Diapers', 'HD Buns', 'Ketchup'],
 ['Diapers', 'Bananas'],
 ['Beer', 'Bread', 'HD Buns'],
 ['Cookies',
  'Cookies',
  'Bread',
  'Pickles',
  'Relish',
  'Hot Dogs',
  'HD Buns',
  'Cheese'],
 ['Wine', 'Mustard', 'Cookies', 'Cheese'],
 ['Ketchup', 'Hot Dogs', 'Beer', 'Cheese', 'Mustard', 'HB Buns'],
 ['Bread', 'HD Buns', 'Diapers'],
 ['Cheese', 'HD Buns', 'Pickles', 'HD Buns', 'Cookies', 'Bread'],
 ['Cookies',
  'Wine',
  'Mustard',
  'Wine',
  'Bananas',
  'Cheese',
  'Relish',
  'HB Buns'],
 ['Bananas', 'Relish'],
 ['Bananas', 'Milk'],
 ['Hot Dogs', 'Beer', 'HB Buns', 'Wine'],
 ['Relish', 'Diapers'],
 ['Pickles', 'Relish', 'Ketchup'],
 ['Milk', 'Hot Dogs', 'HB Buns', 'Ketchup', 'Wine', 'Hamburgers'],
 ['Wine', 'Hot Dogs', 'HD Buns', 'Hamburgers', 'Mustard', 'Bread', 'Cheese'],
 ['Ketchup', 'Bread', 'HB Buns']]

## Preprocessing

In [36]:
from collections import defaultdict, Counter

In [69]:
def preprocess(transactions, min_support=2):
    item_counts = Counter()
    for transaction in transactions:
        item_counts.update(transaction)
    
    # Filter out low support items
    items_above_support = {item for item, count in item_counts.items() if count >= min_support}
    
    # Reorder each transaction
    reordered = []
    for transaction in transactions:
        filtered = [item for item in transaction if item in items_above_support]
        sorted_items = sorted(filtered, key=lambda item: (-item_counts[item], item))  # descending freq
        if sorted_items:
            reordered.append(sorted_items)
    return reordered, item_counts

## Build the FP Tree

In [77]:
class FPNode:
    def __init__(self, item, parent):
        self.item = item
        self.count = 1
        self.parent = parent
        self.children = {}
        self.link = None  # link to next node with same item

    def increment(self, count=1):
        self.count += count

    


In [78]:
def build_tree(transactions):
    root = FPNode(None, None)
    header_table = {}

    for transaction in transactions:
        current_node = root
        for item in transaction:
            if item in current_node.children:
                current_node.children[item].increment()
            else:
                new_node = FPNode(item, current_node)
                current_node.children[item] = new_node

                # Add to header table
                if item in header_table:
                    last_node = header_table[item]
                    while last_node.link:
                        last_node = last_node.link
                    last_node.link = new_node
                else:
                    header_table[item] = new_node
            current_node = current_node.children[item]
    return root, header_table

## Mine the tree

In [84]:
def ascend_fp_tree(node):
    path = []
    while node.parent and node.parent.item is not None:
        node = node.parent
        path.append(node.item)
    return path[::-1]  # reverse to preserve top-down order

In [80]:
def find_prefix_paths(base_item, node):
    paths = []
    while node:
        path = ascend_fp_tree(node)
        if path:
            paths.append((path, node.count))
        node = node.link
    return paths

## recursively mine

In [81]:
def mine_tree(header_table, prefix, frequent_patterns, min_support):
    items = sorted(header_table.items(), key=lambda x: x[1].count)  # sort by frequency

    for item, node in items:
        new_pattern = prefix + [item]
        support = 0
        temp_node = node
        while temp_node:
            support += temp_node.count
            temp_node = temp_node.link
        if support >= min_support:
            frequent_patterns[tuple(new_pattern)] = support

            # Build conditional tree
            conditional_patterns = find_prefix_paths(item, node)
            conditional_transactions = []
            for path, count in conditional_patterns:
                conditional_transactions.extend([path] * count)
            if conditional_transactions:
                cond_tree, cond_header = build_tree(conditional_transactions)
                mine_tree(cond_header, new_pattern, frequent_patterns, min_support)

## main function

In [None]:
def fpgrowth(transactions, min_support=2):
    transactions, item_counts = preprocess(transactions, min_support)
    tree, header = build_tree(transactions)
    patterns = {}
    mine_tree(header, [], patterns, min_support)
    return patterns

In [97]:
frequent_patterns = fpgrowth(item_breakdown, min_support=2)
for pattern, support in frequent_patterns.items():
    print(f"{pattern}: {support}")

('Relish',): 6
('Relish', 'Cheese'): 2
('Relish', 'HD Buns'): 2
('Beer',): 5
('Beer', 'Bread'): 2
('Beer', 'Bread', 'HD Buns'): 2
('Beer', 'HD Buns'): 3
('Mustard',): 6
('Mustard', 'Diapers'): 4
('Mustard', 'Diapers', 'Diapers'): 2
('Mustard', 'Diapers', 'Diapers', 'HD Buns'): 2
('Mustard', 'Diapers', 'HD Buns'): 4
('Mustard', 'HD Buns'): 3
('Mustard', 'Cheese'): 4
('Hamburgers',): 4
('Hamburgers', 'Wine'): 2
('Hamburgers', 'Hot Dogs'): 2
('Hamburgers', 'Hot Dogs', 'Wine'): 2
('Hamburgers', 'Beer'): 2
('Hamburgers', 'Beer', 'HD Buns'): 2
('Hamburgers', 'Beer', 'Bread'): 2
('Hamburgers', 'Beer', 'Bread', 'HD Buns'): 2
('Hamburgers', 'HD Buns'): 3
('Hamburgers', 'Bread'): 3
('Hamburgers', 'Bread', 'HD Buns'): 3
('Pickles',): 4
('Pickles', 'Hamburgers'): 2
('Pickles', 'Hamburgers', 'HD Buns'): 2
('Pickles', 'Hamburgers', 'Bread'): 2
('Pickles', 'Hamburgers', 'Bread', 'HD Buns'): 2
('Pickles', 'Hamburgers', 'Beer'): 2
('Pickles', 'Hamburgers', 'Beer', 'HD Buns'): 2
('Pickles', 'Hamburgers'

## association rules

In [98]:
from itertools import combinations

In [99]:
def generate_association_rules(frequent_itemsets, min_confidence=0.5, min_lift=1.0):
    rules = []

    for itemset in frequent_itemsets:
        if len(itemset) < 2:
            continue  # Skip single items

        itemset_support = frequent_itemsets[itemset]

        for i in range(1, len(itemset)):
            for antecedent in combinations(itemset, i):
                antecedent = tuple(sorted(antecedent))
                consequent = tuple(sorted(set(itemset) - set(antecedent)))

                if not consequent:
                    continue

                antecedent_support = frequent_itemsets.get(antecedent)
                consequent_support = frequent_itemsets.get(consequent)

                if not antecedent_support or not consequent_support:
                    continue

                confidence = itemset_support / antecedent_support
                lift = confidence / (consequent_support / sum(frequent_itemsets.values()))

                if confidence >= min_confidence and lift >= min_lift:
                    rules.append({
                        'antecedent': antecedent,
                        'consequent': consequent,
                        'support': itemset_support,
                        'confidence': round(confidence, 3),
                        'lift': round(lift, 3)
                    })
    return rules

In [100]:
def print_rules(rules):
    for rule in rules:
        a = ', '.join(rule['antecedent'])
        c = ', '.join(rule['consequent'])
        print(f"If a customer buys [{a}], they also buy [{c}] "
              f"(Support: {rule['support']}, Confidence: {rule['confidence']*100:.1f}%, Lift: {rule['lift']})")

In [101]:
rules = generate_association_rules(frequent_patterns, min_confidence=0.6, min_lift=1.0)
print_rules(rules)

If a customer buys [Beer, Bread], they also buy [HD Buns] (Support: 2, Confidence: 100.0%, Lift: 44.7)
If a customer buys [Beer, HD Buns], they also buy [Bread] (Support: 2, Confidence: 66.7%, Lift: 42.571)
If a customer buys [Beer], they also buy [HD Buns] (Support: 3, Confidence: 60.0%, Lift: 26.82)
If a customer buys [Mustard], they also buy [Diapers] (Support: 4, Confidence: 66.7%, Lift: 49.667)
If a customer buys [Diapers], they also buy [Mustard] (Support: 4, Confidence: 66.7%, Lift: 49.667)
If a customer buys [Mustard], they also buy [Diapers, HD Buns] (Support: 4, Confidence: 66.7%, Lift: 74.5)
If a customer buys [Diapers, HD Buns], they also buy [Mustard] (Support: 4, Confidence: 100.0%, Lift: 74.5)
If a customer buys [Mustard], they also buy [Cheese] (Support: 4, Confidence: 66.7%, Lift: 49.667)
If a customer buys [Cheese], they also buy [Mustard] (Support: 4, Confidence: 66.7%, Lift: 49.667)
If a customer buys [Hamburgers, Hot Dogs], they also buy [Wine] (Support: 2, Confide

In [104]:
rules_ds = pd.DataFrame(rules)
rules_ds.head()

Unnamed: 0,antecedent,consequent,support,confidence,lift
0,"(Beer, Bread)","(HD Buns,)",2,1.0,44.7
1,"(Beer, HD Buns)","(Bread,)",2,0.667,42.571
2,"(Beer,)","(HD Buns,)",3,0.6,26.82
3,"(Mustard,)","(Diapers,)",4,0.667,49.667
4,"(Diapers,)","(Mustard,)",4,0.667,49.667


1️⃣ Antecedent
The "if" part of the rule.

"Bread and Milk"

These are the items that appear first in the rule (aka LHS = left-hand side).

2️⃣ Consequent
The "then" part of the rule.

"Diapers"

These are the items that follow (aka RHS = right-hand side). The rule implies that if someone buys the antecedent, they also tend to buy the consequent.

3️⃣ Support

 - 📊 How frequent the whole rule is across all transactions.

$
\Large
    \text{Support}=\frac{\text{transactions with Break, Milk, and Diapers}}{\text{total transactions}} = \frac{20}{100} = 0.20
$


Support tells us: How often this combo actually happens.

4️⃣ Confidence

 - 🔐 How likely the consequent is given the antecedent.

$
\Large
\text{Confidence} = \frac{\text{transactions with Break, Milk, and Diapers}}{\text{transactions with Bread and Milk}} = \frac{20}{30} = 0.67
$


Confidence tells us: “When Bread and Milk are bought, how often is Diapers also bought?”

5️⃣ Lift
 - 🚀 How much stronger this rule is than random chance.

$
\Large
\text{Lift} = \frac{\text{Confidence}}{\text{Support of Consequent}} = \frac{0.67}{0.40} = 1.675
$

Lift Tells us:

 - $>$ 1.0: The rule is better than random
 - = 1.0: The rule is no better then chance
 - $<$ 1.0: The rule is worse than random

🧠 Summary Table:
Term	Meaning	Example Value
Antecedent	Items on the left of the rule	Bread + Milk
Consequent	Items on the right of the rule	Diapers
Support	% of transactions that have all items	20%
Confidence	% of antecedent transactions that include consequent	67%
Lift	Strength compared to random chance	1.675