In [6]:
# Title: Apriori Algorithm

# Task 1: Grocery Store Transactions
# Dataset: {Milk, Bread}, {Milk, Diaper, Beer, Bread}, {Milk, Diaper, Beer, Coke}, {Bread, Egg, Milk}, {Bread, Egg, Diaper, Milk, Beer}
# Task: Identify frequent item sets using the Apriori Algorithm with a minimum support threshold of 50%.

# Task 2: Retail Store Data
# Dataset: {Shirt, Tie}, {Shirt, Belt, Tie}, {Shirt, Belt}, {Tie, Belt}, {Shirt, Tie, Belt}
# Task: Generate association rules after identifying frequent itemsets with a confidence threshold of 60%.

# Task 3: Bookstore Purchases
# Dataset: {Book A, Book B}, {Book A, Book C}, {Book B, Book C, Book A}, {Book B, Book D}
# Task: Use the Apriori algorithm to find rules with a support threshold of 40% and confidence threshold of

In [7]:
from collections import defaultdict

def apriori_task1(transactions, min_support):
    # Count occurrences of individual items
    item_counts = defaultdict(int)
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1

    num_transactions = len(transactions)

    # Generate initial frequent 1-itemsets
    frequent_items = {frozenset([item]): count for item, count in item_counts.items() if count / num_transactions >= min_support}
    frequent_itemsets = {1: frequent_items}

    k = 2
    while True:
        candidate_itemsets = set()
        prev_frequent_itemsets = frequent_itemsets[k - 1]

        # Generate candidates for k-itemsets from (k-1)-itemsets
        # This part of candidate generation in your original code is a bit off
        # It should combine itemsets from the previous level.
        # A common way is to join itemsets that share k-2 items.
        for itemset1 in prev_frequent_itemsets:
            for itemset2 in prev_frequent_itemsets:
                union = itemset1.union(itemset2)
                if len(union) == k:
                    # Pruning step: Ensure all (k-1)-subsets are frequent
                    is_valid_candidate = True
                    for subset in itertools.combinations(union, k - 1):
                        if frozenset(subset) not in prev_frequent_itemsets:
                            is_valid_candidate = False
                            break
                    if is_valid_candidate:
                        candidate_itemsets.add(union)

        current_frequent_itemsets = {}
        for candidate in candidate_itemsets:
            count = 0
            for transaction in transactions:
                if candidate.issubset(transaction):
                    count += 1
            if count / num_transactions >= min_support:
                current_frequent_itemsets[candidate] = count

        if not current_frequent_itemsets:
            break
        frequent_itemsets[k] = current_frequent_itemsets
        k += 1

    return frequent_itemsets

import itertools # Added for combinations

transactions_task1 = [
    frozenset(['Milk', 'Bread']),
    frozenset(['Milk', 'Diaper', 'Beer', 'Bread']),
    frozenset(['Milk', 'Diaper', 'Beer', 'Coke']),
    frozenset(['Bread', 'Egg', 'Milk']),
    frozenset(['Bread', 'Egg', 'Diaper', 'Milk', 'Beer'])
]
min_support_task1 = 0.5

frequent_itemsets_task1 = apriori_task1(transactions_task1, min_support_task1)

print("Frequent Itemsets (Task 1 - Grocery Store Transactions):")
for k, itemsets in frequent_itemsets_task1.items():
    print(f"L{k}: {itemsets}")

Frequent Itemsets (Task 1 - Grocery Store Transactions):
L1: {frozenset({'Milk'}): 5, frozenset({'Bread'}): 4, frozenset({'Diaper'}): 3, frozenset({'Beer'}): 3}
L2: {frozenset({'Milk', 'Beer'}): 3, frozenset({'Milk', 'Diaper'}): 3, frozenset({'Milk', 'Bread'}): 4, frozenset({'Diaper', 'Beer'}): 3}
L3: {frozenset({'Milk', 'Diaper', 'Beer'}): 3}


In [8]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

transactions_task2 = [
    ['Shirt', 'Tie'],
    ['Shirt', 'Belt', 'Tie'],
    ['Shirt', 'Belt'],
    ['Tie', 'Belt'],
    ['Shirt', 'Tie', 'Belt']
]

te_task2 = TransactionEncoder()
te_ary_task2 = te_task2.fit(transactions_task2).transform(transactions_task2)
df_task2 = pd.DataFrame(te_ary_task2, columns=te_task2.columns_)

# Identify frequent itemsets with a minimum support threshold of 60% (0.6)
frequent_itemsets_task2 = apriori(df_task2, min_support=0.6, use_colnames=True)

# Generate association rules with a confidence threshold of 60% (0.6)
rules_task2 = association_rules(frequent_itemsets_task2, metric="confidence", min_threshold=0.6)

print("\nAssociation Rules (Task 2 - Retail Store Data):")
print(rules_task2)


Association Rules (Task 2 - Retail Store Data):
  antecedents consequents  antecedent support  consequent support  support  \
0     (Shirt)      (Belt)                 0.8                 0.8      0.6   
1      (Belt)     (Shirt)                 0.8                 0.8      0.6   
2       (Tie)      (Belt)                 0.8                 0.8      0.6   
3      (Belt)       (Tie)                 0.8                 0.8      0.6   
4       (Tie)     (Shirt)                 0.8                 0.8      0.6   
5     (Shirt)       (Tie)                 0.8                 0.8      0.6   

   confidence    lift  representativity  leverage  conviction  zhangs_metric  \
0        0.75  0.9375               1.0     -0.04         0.8          -0.25   
1        0.75  0.9375               1.0     -0.04         0.8          -0.25   
2        0.75  0.9375               1.0     -0.04         0.8          -0.25   
3        0.75  0.9375               1.0     -0.04         0.8          -0.25   
4   

In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

transactions_task3 = [
    ['Book A', 'Book B'],
    ['Book A', 'Book C'],
    ['Book B', 'Book C', 'Book A'],
    ['Book B', 'Book D']
]

te_task3 = TransactionEncoder()
te_ary_task3 = te_task3.fit(transactions_task3).transform(transactions_task3)
df_task3 = pd.DataFrame(te_ary_task3, columns=te_task3.columns_)

# Identify frequent itemsets with a minimum support threshold of 40% (0.4)
frequent_itemsets_task3 = apriori(df_task3, min_support=0.4, use_colnames=True)

# Generate association rules with a confidence threshold of 70% (0.7)
rules_task3 = association_rules(frequent_itemsets_task3, metric="confidence", min_threshold=0.7)

print("\nAssociation Rules (Task 3 - Bookstore Purchases):")
print(rules_task3)