In [1]:
import pandas as pd
import itertools
from collections import defaultdict

# Sample transaction data (list of transactions with items purchased)
dataset = [
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Diapers', 'Beer', 'Eggs'],
    ['Milk', 'Bread', 'Butter', 'Diapers'],
    ['Bread', 'Milk', 'Diapers', 'Beer'],
    ['Bread', 'Butter', 'Diapers', 'Eggs'],
    ['Milk', 'Bread', 'Butter', 'Beer']
]

# Step 1: Convert the dataset into a DataFrame (one-hot encoded)
def encode_transactions(dataset):
    all_items = sorted(set(itertools.chain(*dataset)))  # All unique items
    encoded_data = []
    for transaction in dataset:
        encoded_data.append([item in transaction for item in all_items])
    return pd.DataFrame(encoded_data, columns=all_items)

df = encode_transactions(dataset)

# Step 2: Generate frequent itemsets (using a simple support threshold)
def get_frequent_itemsets(df, min_support):
    itemsets_support = defaultdict(int)
    num_transactions = len(df)

    # Generate itemsets of all possible sizes
    for size in range(1, len(df.columns) + 1):
        for itemset in itertools.combinations(df.columns, size):
            itemset_support = df[list(itemset)].all(axis=1).sum()
            support = itemset_support / num_transactions
            if support >= min_support:
                itemsets_support[itemset] = support

    return itemsets_support

# Set minimum support threshold (e.g., 0.3 means 30% of transactions)
min_support = 0.3
frequent_itemsets = get_frequent_itemsets(df, min_support)

# Step 3: Generate association rules from frequent itemsets
def generate_association_rules(frequent_itemsets, min_lift):
    rules = []
    for itemset, support in frequent_itemsets.items():
        if len(itemset) > 1:  # Only consider itemsets with more than 1 item
            for subset_size in range(1, len(itemset)):
                for antecedent in itertools.combinations(itemset, subset_size):
                    consequent = set(itemset) - set(antecedent)
                    antecedent_support = frequent_itemsets.get(antecedent, 0)
                    consequent_support = frequent_itemsets.get(tuple(consequent), 0)

                    if antecedent_support > 0 and consequent_support > 0:
                        confidence = support / antecedent_support
                        lift = confidence / consequent_support

                        if lift >= min_lift:
                            rules.append({
                                'Antecedent': antecedent,
                                'Consequent': tuple(consequent),
                                'Support': support,
                                'Confidence': confidence,
                                'Lift': lift
                            })
    return rules

# Set minimum lift threshold
min_lift = 1.2
rules = generate_association_rules(frequent_itemsets, min_lift)

# Step 4: Display the results
print("\nFrequent Itemsets:")
for itemset, support in frequent_itemsets.items():
    print(f"Itemset: {itemset}, Support: {support}")

print("\nAssociation Rules:")
for rule in rules:
    print(f"Rule: {rule['Antecedent']} -> {rule['Consequent']}, "
          f"Support: {rule['Support']}, Confidence: {rule['Confidence']}, Lift: {rule['Lift']}")



Frequent Itemsets:
Itemset: ('Beer',), Support: 0.5
Itemset: ('Bread',), Support: 1.0
Itemset: ('Butter',), Support: 0.6666666666666666
Itemset: ('Diapers',), Support: 0.6666666666666666
Itemset: ('Eggs',), Support: 0.3333333333333333
Itemset: ('Milk',), Support: 0.6666666666666666
Itemset: ('Beer', 'Bread'), Support: 0.5
Itemset: ('Beer', 'Diapers'), Support: 0.3333333333333333
Itemset: ('Beer', 'Milk'), Support: 0.3333333333333333
Itemset: ('Bread', 'Butter'), Support: 0.6666666666666666
Itemset: ('Bread', 'Diapers'), Support: 0.6666666666666666
Itemset: ('Bread', 'Eggs'), Support: 0.3333333333333333
Itemset: ('Bread', 'Milk'), Support: 0.6666666666666666
Itemset: ('Butter', 'Diapers'), Support: 0.3333333333333333
Itemset: ('Butter', 'Milk'), Support: 0.5
Itemset: ('Diapers', 'Eggs'), Support: 0.3333333333333333
Itemset: ('Diapers', 'Milk'), Support: 0.3333333333333333
Itemset: ('Beer', 'Bread', 'Diapers'), Support: 0.3333333333333333
Itemset: ('Beer', 'Bread', 'Milk'), Support: 0.3