In [8]:
# Title: Apriori Algorithm

# Task 1: Grocery Store Transactions
# Dataset: {Milk, Bread}, {Milk, Diaper, Beer, Bread}, {Milk, Diaper, Beer, Coke}, {Bread, Egg, Milk}, {Bread, Egg, Diaper, Milk, Beer}
# Task: Identify frequent item sets using the Apriori Algorithm with a minimum support threshold of 50%.

# Task 2: Retail Store Data
# Dataset: {Shirt, Tie}, {Shirt, Belt, Tie}, {Shirt, Belt}, {Tie, Belt}, {Shirt, Tie, Belt}
# Task: Generate association rules after identifying frequent itemsets with a confidence threshold of 60%.

# Task 3: Bookstore Purchases
# Dataset: {Book A, Book B}, {Book A, Book C}, {Book B, Book C, Book A}, {Book B, Book D}
# Task: Use the Apriori algorithm to find rules with a support threshold of 40% and confidence threshold of

In [9]:
from collections import defaultdict

def apriori_task1(transactions, min_support):
    # Count occurrences of individual items
    item_counts = defaultdict(int)
    for transaction in transactions:
        for item in transaction:
            item_counts[item] += 1

    num_transactions = len(transactions)

    # Generate initial frequent 1-itemsets
    frequent_items = {frozenset([item]): count for item, count in item_counts.items() if count / num_transactions >= min_support}
    frequent_itemsets = {1: frequent_items}

    k = 2
    while True:
        candidate_itemsets = set()
        prev_frequent_itemsets = frequent_itemsets[k - 1]

        # Generate candidates for k-itemsets from (k-1)-itemsets
        # This part of candidate generation in your original code is a bit off
        # It should combine itemsets from the previous level.
        # A common way is to join itemsets that share k-2 items.
        for itemset1 in prev_frequent_itemsets:
            for itemset2 in prev_frequent_itemsets:
                union = itemset1.union(itemset2)
                if len(union) == k:
                    # Pruning step: Ensure all (k-1)-subsets are frequent
                    is_valid_candidate = True
                    for subset in itertools.combinations(union, k - 1):
                        if frozenset(subset) not in prev_frequent_itemsets:
                            is_valid_candidate = False
                            break
                    if is_valid_candidate:
                        candidate_itemsets.add(union)

        current_frequent_itemsets = {}
        for candidate in candidate_itemsets:
            count = 0
            for transaction in transactions:
                if candidate.issubset(transaction):
                    count += 1
            if count / num_transactions >= min_support:
                current_frequent_itemsets[candidate] = count

        if not current_frequent_itemsets:
            break
        frequent_itemsets[k] = current_frequent_itemsets
        k += 1

    return frequent_itemsets

import itertools # Added for combinations

transactions_task1 = [
    frozenset(['Milk', 'Bread']),
    frozenset(['Milk', 'Diaper', 'Beer', 'Bread']),
    frozenset(['Milk', 'Diaper', 'Beer', 'Coke']),
    frozenset(['Bread', 'Egg', 'Milk']),
    frozenset(['Bread', 'Egg', 'Diaper', 'Milk', 'Beer'])
]
min_support_task1 = 0.5

frequent_itemsets_task1 = apriori_task1(transactions_task1, min_support_task1)

print("Frequent Itemsets (Task 1 - Grocery Store Transactions):")
for k, itemsets in frequent_itemsets_task1.items():
    print(f"L{k}: {itemsets}")

Frequent Itemsets (Task 1 - Grocery Store Transactions):
L1: {frozenset({'Bread'}): 4, frozenset({'Milk'}): 5, frozenset({'Beer'}): 3, frozenset({'Diaper'}): 3}
L2: {frozenset({'Bread', 'Milk'}): 4, frozenset({'Diaper', 'Milk'}): 3, frozenset({'Beer', 'Diaper'}): 3, frozenset({'Beer', 'Milk'}): 3}
L3: {frozenset({'Beer', 'Diaper', 'Milk'}): 3}


In [10]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Your transaction data
transactions_task2 = [
    ['Shirt', 'Tie'],
    ['Shirt', 'Belt', 'Tie'],
    ['Shirt', 'Shoes', 'Tie'],
    ['Tie', 'Belt', 'Shirt'],
    ['Shirt', 'Tie', 'Belt']
]

# --- 1. Encode the transactions into a one-hot encoded DataFrame ---
te = TransactionEncoder()
te_ary = te.fit(transactions_task2).transform(transactions_task2)
df = pd.DataFrame(te_ary, columns=te.columns_)

print("--- One-Hot Encoded DataFrame ---")
print(df)
print("\n" + "-"*40 + "\n")

# --- 2. Find frequent itemsets using the Apriori algorithm ---
# You can adjust the 'min_support' threshold.
# 'min_support=0.6' means an itemset must appear in at least 60% of transactions.
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)

print("--- Frequent Itemsets (min_support=0.6) ---")
print(frequent_itemsets)
print("\n" + "-"*40 + "\n")

# --- 3. Generate association rules from the frequent itemsets ---
# You can adjust the 'min_threshold' and 'metric'.
# 'metric="confidence"' and 'min_threshold=0.7' means
# A -> B is a rule if confidence(A -> B) >= 70%.
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print("--- Association Rules (min_threshold=0.7, metric='confidence') ---")
print(rules)
print("\n" + "-"*40 + "\n")

# --- Optional: Filter and sort rules for better insights ---
print("--- Sorted Rules by Lift (descending) ---")
rules_sorted_by_lift = rules.sort_values(by="lift", ascending=False)
print(rules_sorted_by_lift)
print("\n" + "-"*40 + "\n")

print("--- Rules with High Confidence and Lift ---")
# Example: Rules where confidence is greater than 0.8 and lift is greater than 1.2
high_confidence_lift_rules = rules[(rules['confidence'] > 0.8) & (rules['lift'] > 1.2)]
print(high_confidence_lift_rules)

--- One-Hot Encoded DataFrame ---
    Belt  Shirt  Shoes   Tie
0  False   True  False  True
1   True   True  False  True
2  False   True   True  True
3   True   True  False  True
4   True   True  False  True

----------------------------------------

--- Frequent Itemsets (min_support=0.6) ---
   support            itemsets
0      0.6              (Belt)
1      1.0             (Shirt)
2      1.0               (Tie)
3      0.6       (Shirt, Belt)
4      0.6         (Tie, Belt)
5      1.0        (Shirt, Tie)
6      0.6  (Shirt, Tie, Belt)

----------------------------------------

--- Association Rules (min_threshold=0.7, metric='confidence') ---
     antecedents   consequents  antecedent support  consequent support  \
0         (Belt)       (Shirt)                 0.6                 1.0   
1         (Belt)         (Tie)                 0.6                 1.0   
2        (Shirt)         (Tie)                 1.0                 1.0   
3          (Tie)       (Shirt)                 1.0 

  cert_metric = np.where(certainty_denom == 0, 0, certainty_num / certainty_denom)


In [11]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

transactions_task3 = [
    ['Book A', 'Book B'],
    ['Book A', 'Book C'],
    ['Book B', 'Book C', 'Book A'],
    ['Book B', 'Book D']
]

te_task3 = TransactionEncoder()
te_ary_task3 = te_task3.fit(transactions_task3).transform(transactions_task3)
df_task3 = pd.DataFrame(te_ary_task3, columns=te_task3.columns_)

# Identify frequent itemsets with a minimum support threshold of 40% (0.4)
frequent_itemsets_task3 = apriori(df_task3, min_support=0.4, use_colnames=True)

# Generate association rules with a confidence threshold of 70% (0.7)
rules_task3 = association_rules(frequent_itemsets_task3, metric="confidence", min_threshold=0.7)

print("\nAssociation Rules (Task 3 - Bookstore Purchases):")
print(rules_task3)


Association Rules (Task 3 - Bookstore Purchases):
  antecedents consequents  antecedent support  consequent support  support  \
0    (Book C)    (Book A)                 0.5                0.75      0.5   

   confidence      lift  representativity  leverage  conviction  \
0         1.0  1.333333               1.0     0.125         inf   

   zhangs_metric   jaccard  certainty  kulczynski  
0            0.5  0.666667        1.0    0.833333  
