In [2]:
# Title: Apriori Algorithm

# Task 1: Grocery Store Transactions
# Dataset: {Milk, Bread}, {Milk, Diaper, Beer, Bread}, {Milk, Diaper, Beer, Coke}, {Bread, Egg, Milk}, {Bread, Egg, Diaper, Milk, Beer}
# Task: Identify frequent item sets using the Apriori Algorithm with a minimum support threshold of 50%.

# Task 2: Retail Store Data
# Dataset: {Shirt, Tie}, {Shirt, Belt, Tie}, {Shirt, Belt}, {Tie, Belt}, {Shirt, Tie, Belt}
# Task: Generate association rules after identifying frequent itemsets with a confidence threshold of 60%.

# Task 3: Bookstore Purchases
# Dataset: {Book A, Book B}, {Book A, Book C}, {Book B, Book C, Book A}, {Book B, Book D}
# Task: Use the Apriori algorithm to find rules with a support threshold of 40% and confidence threshold of 70%.


from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore", category=RuntimeWarning)

def apriori_analysis(dataset, min_support, min_confidence):
    te = TransactionEncoder()
    te_array = te.fit(dataset).transform(dataset)
    df = pd.DataFrame(te_array, columns=te.columns_)

    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    rules = rules.replace([np.inf, -np.inf], np.nan).dropna(subset=['lift', 'confidence', 'support'])

    return frequent_itemsets, rules

dataset1 = [
    ['Milk', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Bread'],
    ['Milk', 'Diaper', 'Beer', 'Coke'],
    ['Bread', 'Egg', 'Milk'],
    ['Bread', 'Egg', 'Diaper', 'Milk', 'Beer']
]
frequent_itemsets1, rules1 = apriori_analysis(dataset1, min_support=0.5, min_confidence=0)

print("Task 1 - Frequent Itemsets (Support >= 50%):")
print(frequent_itemsets1)
print("\n")

dataset2 = [
    ['Shirt', 'Tie'],
    ['Shirt', 'Belt', 'Tie'],
    ['Shirt', 'Belt'],
    ['Tie', 'Belt'],
    ['Shirt', 'Tie', 'Belt']
]
frequent_itemsets2, rules2 = apriori_analysis(dataset2, min_support=0.01, min_confidence=0.6)

print("Task 2 - Association Rules (Confidence >= 60%):")
print(rules2[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
print("\n")

dataset3 = [
    ['Book A', 'Book B'],
    ['Book A', 'Book C'],
    ['Book B', 'Book C', 'Book A'],
    ['Book B', 'Book D']
]
frequent_itemsets3, rules3 = apriori_analysis(dataset3, min_support=0.4, min_confidence=0.7)

print("Task 3 - Association Rules (Support >= 40%, Confidence >= 70%):")
print(rules3[['antecedents', 'consequents', 'support', 'confidence', 'lift']])



Task 1 - Frequent Itemsets (Support >= 50%):
   support              itemsets
0      0.6                (Beer)
1      0.8               (Bread)
2      0.6              (Diaper)
3      1.0                (Milk)
4      0.6        (Beer, Diaper)
5      0.6          (Beer, Milk)
6      0.8         (Bread, Milk)
7      0.6        (Diaper, Milk)
8      0.6  (Beer, Milk, Diaper)


Task 2 - Association Rules (Confidence >= 60%):
     antecedents consequents  support  confidence      lift
0         (Belt)     (Shirt)      0.6    0.750000  0.937500
1        (Shirt)      (Belt)      0.6    0.750000  0.937500
2         (Belt)       (Tie)      0.6    0.750000  0.937500
3          (Tie)      (Belt)      0.6    0.750000  0.937500
4          (Tie)     (Shirt)      0.6    0.750000  0.937500
5        (Shirt)       (Tie)      0.6    0.750000  0.937500
6    (Belt, Tie)     (Shirt)      0.4    0.666667  0.833333
7  (Belt, Shirt)       (Tie)      0.4    0.666667  0.833333
8   (Tie, Shirt)      (Belt)      0