In [6]:
import pandas as pd
from itertools import combinations

df = pd.read_csv('Apriori Dataset.csv')

transactions = []
for _, row in df.iterrows():
    transaction = []
    if row['X'] == 1: transaction.append('X1')
    if row['X'] == 2: transaction.append('X2')
    if row['Y'] == 1: transaction.append('Y1')
    if row['Y'] == 2: transaction.append('Y2')
    if row['Y'] == 4: transaction.append('Y4')
    if row['Z'] == 1: transaction.append('Z1')
    if row['Z'] == 2: transaction.append('Z2')
    transactions.append(transaction)

def get_frequent_items(transactions, min_support):
    items = {}
    n_transactions = len(transactions)
    
    for transaction in transactions:
        for item in transaction:
            items[item] = items.get(item, 0) + 1
    
    return {item: count for item, count in items.items() 
            if count/n_transactions >= min_support}

def generate_candidates(freq_items, k):
    return list(combinations(sorted(freq_items), k))

def apriori(transactions, min_support=0.07):
    n_transactions = len(transactions)
    freq_items = get_frequent_items(transactions, min_support)
    
    k = 2
    freq_itemsets = [freq_items]
    
    while True:
        candidates = generate_candidates(freq_items.keys(), k)
        item_count = {}
        
        for transaction in transactions:
            for candidate in candidates:
                if all(item in transaction for item in candidate):
                    item_count[candidate] = item_count.get(candidate, 0) + 1
        
        freq_k_items = {itemset: count for itemset, count in item_count.items()
                       if count/n_transactions >= min_support}
        
        if not freq_k_items:
            break
            
        freq_itemsets.append(freq_k_items)
        k += 1
    
    return freq_itemsets

frequent_itemsets = apriori(transactions, min_support=0.07)

for k, itemsets in enumerate(frequent_itemsets, 1):
    print(f"\nFrequent {k}-itemsets:")
    for itemset, count in itemsets.items():
        if isinstance(itemset, str):
            print(f"{{{itemset}}}: {count}")
        else:
            print(f"{set(itemset)}: {count}")
    print(f"Number of {k}-itemsets: {len(itemsets)}")


Frequent 1-itemsets:
{X1}: 5
{Y4}: 3
{Z1}: 3
{Y2}: 1
{Z2}: 5
{X2}: 3
{Y1}: 4
Number of 1-itemsets: 7

Frequent 2-itemsets:
{'Y4', 'X1'}: 2
{'Z1', 'X1'}: 2
{'Y4', 'Z1'}: 1
{'Y2', 'X1'}: 1
{'Z2', 'X1'}: 3
{'Y2', 'Z2'}: 1
{'Y4', 'Z2'}: 2
{'X2', 'Y1'}: 2
{'X2', 'Z2'}: 2
{'Y1', 'Z2'}: 2
{'Y1', 'X1'}: 2
{'Y1', 'Z1'}: 2
{'X2', 'Y4'}: 1
{'X2', 'Z1'}: 1
Number of 2-itemsets: 14

Frequent 3-itemsets:
{'Y4', 'Z1', 'X1'}: 1
{'Z2', 'Y2', 'X1'}: 1
{'Z2', 'Y4', 'X1'}: 1
{'X2', 'Y1', 'Z2'}: 1
{'Y1', 'Z1', 'X1'}: 1
{'X2', 'Y4', 'Z2'}: 1
{'Z2', 'Y1', 'X1'}: 1
{'X2', 'Y1', 'Z1'}: 1
Number of 3-itemsets: 8
