In [3]:
# Question 3: Implement Apriori algorithm on a simple transaction dataset.

In [4]:
from collections import defaultdict
from itertools import combinations

def generate_frequent_itemsets(transactions, min_support):
    item_counts = defaultdict(int)
    for transaction in transactions:
        for item in transaction:
            item_counts[frozenset([item])] += 1
    
    L1 = {itemset: count for itemset, count in item_counts.items() if count >= min_support}
    
    frequent_itemsets = {1: L1}
    k = 2
    while True:
        Ck = generate_candidate_itemsets(frequent_itemsets[k-1], k)
        Lk = defaultdict(int)
        for transaction in transactions:
            for candidate in Ck:
                if candidate.issubset(transaction):
                    Lk[candidate] += 1
        
        Lk = {itemset: count for itemset, count in Lk.items() if count >= min_support}
        if not Lk:
            break
        frequent_itemsets[k] = Lk
        k += 1
    return frequent_itemsets

def generate_candidate_itemsets(Lk_minus_1, k):
    candidates = set()
    items = sorted(list(Lk_minus_1.keys()))
    for i in range(len(items)):
        for j in range(i + 1, len(items)):
            itemset1 = list(items[i])
            itemset2 = list(items[j])
            
            itemset1.sort()
            itemset2.sort()

            if k == 2 or itemset1[:-1] == itemset2[:-1]:
                new_candidate = frozenset(sorted(list(itemset1) + list(itemset2)))
                candidates.add(new_candidate)
    return candidates

def generate_association_rules(frequent_itemsets, min_confidence):
    rules = []
    for k, Lk in frequent_itemsets.items():
        if k > 1:
            for itemset in Lk:
                for antecedent in combinations(itemset, 1):
                    antecedent = frozenset(antecedent)
                    consequent = itemset - antecedent
                    if consequent:
                        confidence = Lk[itemset] / frequent_itemsets[len(antecedent)][antecedent]
                        if confidence >= min_confidence:
                            rules.append((antecedent, consequent, confidence, Lk[itemset]))
    return rules
transactions = [
    ['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
    ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
    ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
    ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
    ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']
]
min_support = 3
min_confidence = 0.7
frequent_itemsets = generate_frequent_itemsets(transactions, min_support)
for k, itemsets in frequent_itemsets.items():
    print(f"Frequent {k}-itemsets:")
    for itemset, count in itemsets.items():
        print(f"  {list(itemset)}: {count}")

association_rules = generate_association_rules(frequent_itemsets, min_confidence)
print("\nAssociation Rules:")
for antecedent, consequent, confidence, support in association_rules:
    print(f"  {list(antecedent)} -> {list(consequent)} (Support: {support}, Confidence: {confidence:.2f})")

Frequent 1-itemsets:
  ['Milk']: 3
  ['Onion']: 4
  ['Kidney Beans']: 5
  ['Eggs']: 4
  ['Yogurt']: 3
Frequent 2-itemsets:
  ['Eggs', 'Onion']: 3
  ['Kidney Beans', 'Yogurt']: 3
  ['Kidney Beans', 'Eggs']: 4
  ['Kidney Beans', 'Milk']: 3
  ['Kidney Beans', 'Onion']: 3
Frequent 3-itemsets:
  ['Kidney Beans', 'Eggs', 'Onion']: 3

Association Rules:
  ['Eggs'] -> ['Onion'] (Support: 3, Confidence: 0.75)
  ['Onion'] -> ['Eggs'] (Support: 3, Confidence: 0.75)
  ['Yogurt'] -> ['Kidney Beans'] (Support: 3, Confidence: 1.00)
  ['Kidney Beans'] -> ['Eggs'] (Support: 4, Confidence: 0.80)
  ['Eggs'] -> ['Kidney Beans'] (Support: 4, Confidence: 1.00)
  ['Milk'] -> ['Kidney Beans'] (Support: 3, Confidence: 1.00)
  ['Onion'] -> ['Kidney Beans'] (Support: 3, Confidence: 0.75)
  ['Eggs'] -> ['Kidney Beans', 'Onion'] (Support: 3, Confidence: 0.75)
  ['Onion'] -> ['Kidney Beans', 'Eggs'] (Support: 3, Confidence: 0.75)
