In [3]:
import pandas as pd
from itertools import combinations

# Sample transaction data
dataset = [
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Diapers', 'Beer', 'Eggs'],
    ['Milk', 'Bread', 'Butter', 'Diapers'],
    ['Bread', 'Milk', 'Butter', 'Diapers', 'Beer'],
    ['Milk', 'Bread', 'Diapers', 'Beer'],
    ['Bread', 'Milk', 'Diapers', 'Butter', 'Beer'],
    ['Milk', 'Bread', 'Butter', 'Diapers'],
]

# Convert the dataset into a DataFrame for convenience
df = pd.DataFrame(dataset, columns=['Item 1', 'Item 2', 'Item 3', 'Item 4', 'Item 5'])

# Function to one-hot encode the dataset
def encode_transactions(df):
    items = set()
    for transaction in df.values.flatten():
        if isinstance(transaction, str):
            items.add(transaction)
    
    # Create one-hot encoded dataframe
    one_hot = pd.DataFrame(columns=items, index=df.index).fillna(0)
    
    for idx, row in df.iterrows():
        for item in row:
            if pd.notna(item):
                one_hot.at[idx, item] = 1
    return one_hot

# One-hot encode the transaction data
one_hot_df = encode_transactions(df)

# Calculate support for itemsets
def get_support(itemset, transactions):
    itemset_len = len(itemset)
    count = 0
    for transaction in transactions:
        if all(item in transaction for item in itemset):
            count += 1
    return count / len(transactions)

# Find frequent itemsets using the Apriori algorithm
def apriori(transactions, min_support):
    itemsets = []
    unique_items = set([item for transaction in transactions for item in transaction])
    
    # Generate frequent itemsets with 1 item
    single_itemsets = [[item] for item in unique_items]
    itemsets.extend(single_itemsets)
    
    # Filter itemsets based on support
    frequent_itemsets = []
    for itemset in itemsets:
        support = get_support(itemset, transactions)
        if support >= min_support:
            frequent_itemsets.append((itemset, support))
    
    # Generate itemsets with more than 1 item
    k = 2
    while True:
        candidate_itemsets = []
        for itemset1 in frequent_itemsets:
            for itemset2 in frequent_itemsets:
                combined = list(set(itemset1[0] + itemset2[0]))
                if len(combined) == k and combined not in candidate_itemsets:
                    candidate_itemsets.append(combined)
        
        new_frequent_itemsets = []
        for itemset in candidate_itemsets:
            support = get_support(itemset, transactions)
            if support >= min_support:
                new_frequent_itemsets.append((itemset, support))
        
        if not new_frequent_itemsets:
            break
        
        frequent_itemsets.extend(new_frequent_itemsets)
        k += 1
    
    return frequent_itemsets

# Function to generate association rules from frequent itemsets
def generate_rules(frequent_itemsets, min_confidence):
    rules = []
    for itemset, support in frequent_itemsets:
        if len(itemset) > 1:
            subsets = [list(combinations(itemset, i)) for i in range(1, len(itemset))]
            for subset_list in subsets:
                for subset in subset_list:
                    antecedent = list(subset)
                    consequent = list(set(itemset) - set(antecedent))
                    confidence = get_support(antecedent + consequent, transactions) / get_support(antecedent, transactions)
                    if confidence >= min_confidence:
                        rules.append((antecedent, consequent, confidence, support))
    return rules

# Run Apriori Algorithm on the dataset
min_support = 0.4
min_confidence = 0.6
frequent_itemsets = apriori(dataset, min_support)

# Generate Association Rules
rules = generate_rules(frequent_itemsets, min_confidence)

# Display the rules
for rule in rules:
    print(f"Rule: {rule[0]} -> {rule[1]} | Confidence: {rule[2]:.2f} | Support: {rule[3]:.2f}")


ValueError: columns cannot be a set