In [4]:
# Title: Implement Apriori for Association Rule Mining

# Task 1: Market Basket Analysis with Simple Transactions
# Step 1: Define a simple dataset of transactions.
# Step 2: Implement the Apriori algorithm using the apyori library.

# Task 2: Apriori Implementation with a Groceries Dataset
# Step 1: Load a sample dataset of grocery transactions. (Consider creating or downloading a CSV file of transactions)
# Step 2: Convert transactions for Apriori and execute the algorithm.
    
# Task 3: Finding Frequent Itemsets in Large Dataset
# Step 1: Use a pre-existing large dataset or generate a synthetic dataset.
# Step 2: Run Apriori and identify frequent itemsets.


from apyori import apriori

def print_rules(results):
    for relation in results:
        items = list(relation.items)
        support = relation.support
        for ordered_stat in relation.ordered_statistics:
            base = list(ordered_stat.items_base)
            add = list(ordered_stat.items_add)
            confidence = ordered_stat.confidence
            lift = ordered_stat.lift
            if base:
                print(f"Rule: {base} -> {add} | Support: {support:.2f}, Confidence: {confidence:.2f}, Lift: {lift:.2f}")
            else:
                print(f"Itemset: {items} | Support: {support:.2f}")

# Task 1: Market Basket Analysis with Simple Transactions
dataset1 = [
    ['milk', 'bread', 'butter'],
    ['bread', 'butter'],
    ['milk', 'bread'],
    ['milk', 'bread', 'butter', 'eggs'],
    ['bread', 'butter']
]
results1 = list(apriori(dataset1, min_support=0.6, min_confidence=0.7))
print("Task 1 - Rules:")
print_rules(results1)
print("\n" + "-"*50 + "\n")

# Task 2: Apriori Implementation with a Groceries Dataset
dataset2 = [
    ['milk', 'bread', 'eggs', 'beer'],
    ['milk', 'bread', 'diaper', 'beer'],
    ['milk', 'diaper', 'beer', 'coke'],
    ['bread', 'egg', 'milk'],
    ['bread', 'egg', 'diaper', 'milk', 'beer']
]
results2 = list(apriori(dataset2, min_support=0.4, min_confidence=0.6))
print("Task 2 - Rules:")
print_rules(results2)
print("\n" + "-"*50 + "\n")

# Task 3: Finding Frequent Itemsets in Large Dataset (Synthetic example)
dataset3 = [
    ['Book A', 'Book B'],
    ['Book A', 'Book C'],
    ['Book B', 'Book C', 'Book A'],
    ['Book B', 'Book D']
]
results3 = list(apriori(dataset3, min_support=0.4, min_confidence=0.7))
print("Task 3 - Rules:")
print_rules(results3)


Task 1 - Rules:
Itemset: ['bread'] | Support: 1.00
Itemset: ['butter'] | Support: 0.80
Itemset: ['butter', 'bread'] | Support: 0.80
Rule: ['bread'] -> ['butter'] | Support: 0.80, Confidence: 0.80, Lift: 1.00
Rule: ['butter'] -> ['bread'] | Support: 0.80, Confidence: 1.00, Lift: 1.00
Rule: ['milk'] -> ['bread'] | Support: 0.60, Confidence: 1.00, Lift: 1.00

--------------------------------------------------

Task 2 - Rules:
Itemset: ['beer'] | Support: 0.80
Itemset: ['bread'] | Support: 0.80
Itemset: ['diaper'] | Support: 0.60
Itemset: ['milk'] | Support: 1.00
Itemset: ['beer', 'bread'] | Support: 0.60
Rule: ['beer'] -> ['bread'] | Support: 0.60, Confidence: 0.75, Lift: 0.94
Rule: ['bread'] -> ['beer'] | Support: 0.60, Confidence: 0.75, Lift: 0.94
Itemset: ['diaper', 'beer'] | Support: 0.60
Rule: ['beer'] -> ['diaper'] | Support: 0.60, Confidence: 0.75, Lift: 1.25
Rule: ['diaper'] -> ['beer'] | Support: 0.60, Confidence: 1.00, Lift: 1.25
Itemset: ['milk', 'beer'] | Support: 0.80
Rule: [