In [71]:
# Function to calculate the support of an itemset
supports = {}
def get_support(transactions, itemset):
    itemset_key = tuple(itemset)
    if itemset_key in supports:
        return supports[itemset_key]
    count = 0
    for transaction in transactions:
        if all(item in transaction for item in itemset):
            count += 1
    support = count / len(transactions)
    supports[itemset_key] = support
    return support

# Function to calculate confidence of a rule
def get_confidence(left, right, transactions):
    left_support = get_support(transactions, left)
    combined_support = get_support(transactions, left + right)
    return combined_support / left_support if left_support != 0 else 0

# Function to find frequent itemsets
def find_frequent_itemsets(transactions, min_support):
    # Create a unique list of all items in transactions
    unique_items = []
    for transaction in transactions:
        for item in transaction:
            if [item] not in unique_items:
                unique_items.append([item])

    # Filter itemsets by support
    frequent_itemsets = []
    for itemset in unique_items:
        if get_support(transactions, itemset) >= min_support:
            frequent_itemsets.append(itemset)

    return frequent_itemsets


# Function to generate candidate itemsets of length k
def generate_candidates(prev_itemsets, k, transactions, min_support):
    candidates = []
    n = len(prev_itemsets)
    for i in range(n):
        for j in range(i + 1, n):
            itemset1 = prev_itemsets[i]
            itemset2 = prev_itemsets[j]
            if len(itemset1) == k - 1 and itemset1[:k - 2] == itemset2[:k - 2]:
                candidate = itemset1 + [itemset2[k - 2]]
                if candidate not in candidates:
                    if get_support(transactions, candidate) >= min_support:
                        candidates.append(candidate)
    return candidates

# Function to generate association rules
def generate_association_rules(frequent_itemsets, transactions, min_confidence):
    rules = []
    for itemset in frequent_itemsets:
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                left = itemset[:i]
                right = itemset[i:]
                confidence = get_confidence(left, right, transactions)
                if confidence >= min_confidence:
                    rules.append((left, right, confidence))
    return rules

# Main function
def apriori(transactions, min_support, min_confidence):
    frequent_itemsets = []
    k = 1
    prev_itemsets = find_frequent_itemsets(transactions, min_support)

    while len(prev_itemsets) > 0:
        frequent_itemsets.extend(prev_itemsets)
        prev_itemsets = generate_candidates(prev_itemsets, k + 1, transactions, min_support)
        k += 1

    association_rules = generate_association_rules(frequent_itemsets, transactions, min_confidence)

    return frequent_itemsets, association_rules

# Sample data
transactions = [
    ['bread', 'milk'],
    ['bread', 'diaper', 'beer', 'egg'],
    ['milk', 'diaper', 'beer', 'cola'],
    ['bread', 'milk', 'diaper', 'beer'],
    ['bread', 'milk', 'diaper', 'cola']
]

# Run Apriori algorithm
min_support = 0.3
min_confidence = 0.6
frequent_itemsets, association_rules = apriori(transactions, min_support, min_confidence)

# Print the results
print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(itemset)

print("\nAssociation Rules:")
for rule in association_rules:
    print(f"Rule: {rule[0]} -> {rule[1]} with confidence: {rule[2]:.2f}")

Frequent Itemsets:
['bread']
['milk']
['diaper']
['beer']
['cola']
['bread', 'milk']
['bread', 'diaper']
['bread', 'beer']
['milk', 'diaper']
['milk', 'beer']
['milk', 'cola']
['diaper', 'beer']
['diaper', 'cola']
['bread', 'milk', 'diaper']
['bread', 'diaper', 'beer']
['milk', 'diaper', 'beer']
['milk', 'diaper', 'cola']

Association Rules:
Rule: ['bread'] -> ['milk'] with confidence: 0.75
Rule: ['bread'] -> ['diaper'] with confidence: 0.75
Rule: ['milk'] -> ['diaper'] with confidence: 0.75
Rule: ['diaper'] -> ['beer'] with confidence: 0.75
Rule: ['bread', 'milk'] -> ['diaper'] with confidence: 0.67
Rule: ['bread', 'diaper'] -> ['beer'] with confidence: 0.67
Rule: ['milk', 'diaper'] -> ['beer'] with confidence: 0.67
Rule: ['milk', 'diaper'] -> ['cola'] with confidence: 0.67


# Transactions Reduction

In [64]:
# Function to generate candidate itemsets of length k
def generate_candidates(prev_itemsets, k, transactions, min_support):
    # Step 1: Filter transactions based on the database reduction method
    filtered_transactions = []
    for t in transactions:
        if len(t) >= k:  # Condition 1: Transaction must have at least k items
            for itemset in prev_itemsets:
                if set(itemset).issubset(set(t)):  # Condition 2: Transaction must contain at least one subset of C_k
                    filtered_transactions.append(t)
                    break
    transactions.clear()
    transactions.extend(filtered_transactions)
    # Step 2: Generate candidate itemsets
    candidates = []
    n = len(prev_itemsets)
    for i in range(n):
        for j in range(i + 1, n):
            itemset1 = prev_itemsets[i]
            itemset2 = prev_itemsets[j]

            # Check if the first k-2 items are identical to ensure join compatibility
            if len(itemset1) == k - 1 and itemset1[:k - 2] == itemset2[:k - 2]:
                candidate = itemset1 + [itemset2[k - 2]]
                if candidate not in candidates:
                    if get_support(filtered_transactions, candidate) >= min_support:
                        candidates.append(candidate)
    return candidates