In [16]:
df = pd.read_csv("transactions.csv")
df.head()

Unnamed: 0,Transaction
0,"milk, bread, butter"
1,"milk, bread"
2,"bread, butter"
3,"milk, bread, butter, eggs"
4,"milk, bread, eggs"


In [22]:
transactions = [
    ['milk', 'bread', 'butter'],
    ['milk', 'bread'],
    ['bread', 'butter'],
    ['milk', 'bread', 'butter', 'eggs'],
    ['milk', 'bread', 'eggs'],
    ['butter', 'eggs']
]

In [23]:
# Function to split a line into a list of items
def read_transactions_from_csv(file_path):
    transactions = []
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip header
        for row in reader:
            transactions.append([item.strip() for item in row[0].split(',')])
    return transactions

In [24]:
from itertools import combinations
from collections import defaultdict

def generate_candidates(itemsets, size):
    """Generate candidate itemsets of a given size."""
    candidates = set()
    for itemset in itemsets:
        for combo in combinations(itemset, size):
            candidates.add(frozenset(combo))
    return candidates

In [25]:
def count_support(transactions, candidates):
    """Count the support of each candidate itemset."""
    support_count = defaultdict(int)
    for transaction in transactions:
        transaction_set = set(transaction)
        for candidate in candidates:
            if candidate.issubset(transaction_set):
                support_count[candidate] += 1
    return support_count

In [26]:
def filter_frequent_itemsets(support_count, min_support):
    """Filter itemsets that meet the minimum support threshold."""
    total_transactions = len(transactions)
    frequent_itemsets = {itemset: count / total_transactions
                         for itemset, count in support_count.items()
                         if count / total_transactions >= min_support}
    return frequent_itemsets


In [27]:
def apriori(transactions, min_support):
    """Apriori algorithm to find frequent itemsets."""
    transactions = [set(transaction) for transaction in transactions]

    frequent_itemsets = {}
    k = 1
    current_itemsets = set(frozenset([item]) for transaction in transactions for item in transaction)
    
    while current_itemsets:
        candidates = generate_candidates(current_itemsets, k)
        
        support_count = count_support(transactions, candidates)
        
        frequent_k_itemsets = filter_frequent_itemsets(support_count, min_support)
        
        if not frequent_k_itemsets:
            break
        
        frequent_itemsets.update(frequent_k_itemsets)
        
        current_itemsets = generate_candidates(frequent_k_itemsets.keys(), k + 1)
        k += 1
    
    return frequent_itemsets


In [28]:
min_support = 0.5  # Minimum support threshold
frequent_itemsets = apriori(transactions, min_support)

# Print the frequent itemsets
for itemset, support in frequent_itemsets.items():
    print(f"Itemset: {set(itemset)}, Support: {support:.2f}")


Itemset: {'bread'}, Support: 0.83
Itemset: {'butter'}, Support: 0.67
Itemset: {'milk'}, Support: 0.67
Itemset: {'eggs'}, Support: 0.50
