In [None]:
import random

# Define 30 supermarket items
items = [f"Item_{i}" for i in range(1, 31)]

# Generate 5 different transaction databases, each with 20 transactions
def generate_transactions():
    return [[random.choice(items) for _ in range(random.randint(1, 10))] for _ in range(20)]

databases = [generate_transactions() for _ in range(5)]

# Print sample transactions
for i, db in enumerate(databases):
    print(f"\nDatabase {i+1}:")
    for t in db:
        print(t)


Database 1:
['Item_25', 'Item_20', 'Item_9', 'Item_13', 'Item_1', 'Item_23', 'Item_19', 'Item_7', 'Item_15', 'Item_22']
['Item_14', 'Item_14']
['Item_16', 'Item_27', 'Item_19', 'Item_30', 'Item_14', 'Item_29', 'Item_28']
['Item_30', 'Item_15']
['Item_13', 'Item_12', 'Item_14', 'Item_1', 'Item_5', 'Item_21', 'Item_9', 'Item_2']
['Item_25', 'Item_19']
['Item_28']
['Item_11', 'Item_21', 'Item_9', 'Item_4', 'Item_10', 'Item_4', 'Item_16', 'Item_17', 'Item_2']
['Item_22', 'Item_15', 'Item_29', 'Item_24', 'Item_9', 'Item_26', 'Item_5', 'Item_6', 'Item_14']
['Item_2', 'Item_13', 'Item_24', 'Item_6', 'Item_23', 'Item_29', 'Item_24', 'Item_15', 'Item_6']
['Item_28', 'Item_4', 'Item_22', 'Item_17', 'Item_4', 'Item_29', 'Item_13', 'Item_30']
['Item_1', 'Item_25', 'Item_12', 'Item_13']
['Item_29', 'Item_28', 'Item_28', 'Item_6', 'Item_23', 'Item_2', 'Item_10', 'Item_26', 'Item_26', 'Item_8']
['Item_6', 'Item_3', 'Item_21', 'Item_17', 'Item_16', 'Item_21', 'Item_29', 'Item_2']
['Item_17', 'Item_21

In [None]:
from itertools import combinations
import time

def get_frequent_itemsets(transactions, min_support):
    itemset_counts = {}
    num_transactions = len(transactions)

    # Count occurrences of items
    for transaction in transactions:
        for item in set(transaction):
            itemset_counts[item] = itemset_counts.get(item, 0) + 1

    # Convert to frequent itemsets
    frequent_itemsets = {item: count / num_transactions for item, count in itemset_counts.items() if count / num_transactions >= min_support}

    return frequent_itemsets

def apriori(transactions, min_support=0.3, min_confidence=0.6):
    start_time = time.time()
    frequent_itemsets = get_frequent_itemsets(transactions, min_support)

    # Generate association rules
    rules = []
    for itemset in combinations(frequent_itemsets.keys(), 2):
        support = frequent_itemsets[itemset[0]]  # Approximate support
        confidence = support / frequent_itemsets[itemset[1]] if frequent_itemsets[itemset[1]] > 0 else 0

        if confidence >= min_confidence:
            rules.append((itemset[0], itemset[1], confidence))

    end_time = time.time()
    return frequent_itemsets, rules, end_time - start_time

# Run Apriori on the first database
freq_items, rules, apriori_time = apriori(databases[0])
print("\nApriori Algorithm Results:")
print("Frequent Itemsets:", freq_items)
print("Association Rules:", rules)
print("Execution Time:", apriori_time)


Apriori Algorithm Results:
Frequent Itemsets: {'Item_13': 0.35, 'Item_23': 0.3, 'Item_14': 0.35, 'Item_29': 0.35, 'Item_21': 0.3}
Association Rules: [('Item_13', 'Item_23', 1.1666666666666667), ('Item_13', 'Item_14', 1.0), ('Item_13', 'Item_29', 1.0), ('Item_13', 'Item_21', 1.1666666666666667), ('Item_23', 'Item_14', 0.8571428571428572), ('Item_23', 'Item_29', 0.8571428571428572), ('Item_23', 'Item_21', 1.0), ('Item_14', 'Item_29', 1.0), ('Item_14', 'Item_21', 1.1666666666666667), ('Item_29', 'Item_21', 1.1666666666666667)]
Execution Time: 5.435943603515625e-05


In [None]:
def brute_force_frequent_itemsets(transactions, min_support):
    start_time = time.time()
    num_transactions = len(transactions)
    all_items = set(item for transaction in transactions for item in transaction)

    frequent_itemsets = {}

    # Generate all possible itemsets
    for k in range(1, len(all_items) + 1):
        for itemset in combinations(all_items, k):
            count = sum(1 for transaction in transactions if set(itemset).issubset(transaction))
            support = count / num_transactions

            if support >= min_support:
                frequent_itemsets[itemset] = support

    end_time = time.time()
    return frequent_itemsets, end_time - start_time

# Run Brute Force on the first database
bf_freq_items, bf_time = brute_force_frequent_itemsets(databases[0], min_support=0.3)

print("\nBrute Force Results:")
print("Frequent Itemsets:", bf_freq_items)
print("Execution Time:", bf_time)