In [4]:
from collections import defaultdict
import itertools

def calculate_itemsets(data, min_support):
    """
    Returns a dictionary containing all itemsets whose support is greater than or equal to min_support.
    """
    itemsets = {}
    counts = defaultdict(int)
    for transaction in data:
        for item in transaction:
            counts[item] += 1
    for item in counts:
        if counts[item] >= min_support:
            itemsets[(item,)] = counts[item]
    return itemsets

def join_sets(itemsets, length):
    """
    Given a set of itemsets and an integer length, returns a set of the candidate itemsets of length length.
    """
    candidates = set()
    for i in itemsets:
        for j in itemsets:
            union = set(i).union(j)
            if len(union) == length:
                candidates.add(tuple(sorted(union)))
    return candidates

def calculate_itemset_support(itemset, data):
    """
    Given an itemset and a dataset, returns the support of the itemset (i.e., the number of times it appears in the dataset).
    """
    count = 0
    for transaction in data:
        if set(itemset).issubset(set(transaction)):
            count += 1
    return count

def calculate_frequent_itemsets(data, min_support):
    """
    Returns a dictionary containing all frequent itemsets whose support is greater than or equal to min_support.
    """
    itemsets = calculate_itemsets(data, min_support)
    k = 2
    while itemsets != {}:
        yield itemsets
        candidates = join_sets(itemsets, k)
        itemsets = {}
        for candidate in candidates:
            support = calculate_itemset_support(candidate, data)
            if support >= min_support:
                itemsets[candidate] = support
        k += 1

def calculate_confidence(itemset, subset, data):
    """
    Given an itemset and a subset of that itemset, calculates the confidence of the rule subset -> (itemset - subset).
    """
    support_itemset = calculate_itemset_support(itemset, data)
    support_subset = calculate_itemset_support(subset, data)
    return support_itemset / support_subset

def generate_rules(frequent_itemsets, min_confidence):
    """
    Given a dictionary of frequent itemsets and a minimum confidence threshold, generates all
    association rules with confidence greater than or equal to min_confidence.
    """
    rules = []
    for itemset in frequent_itemsets:
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                for subset in itertools.combinations(itemset, i):
                    confidence = calculate_confidence(itemset, subset, data)
                    if confidence >= min_confidence:
                        rules.append((subset, tuple(set(itemset) - set(subset)), confidence))
    return rules


# Example usage
data = [
    ("bread", "milk"),
    ("bread", "diapers", "beer", "eggs"),
    ("milk", "diapers", "beer", "cocacola"),
    ("bread", "milk", "diapers", "beer"),
    ("bread", "milk", "diapers", "cocacola")
]
min_support = 2
min_confidence = 0.7
frequent_itemsets = []
for itemsets in calculate_frequent_itemsets(data, min_support):
    frequent_itemsets += list(itemsets.keys())
association_rules = generate_rules(set(frequent_itemsets), min_confidence)
print('Frequent itemsets:')
for itemset in frequent_itemsets:
    print(itemset)
print('\nAssociation rules:')
for rule in association_rules:
    print('{} -> {}: {}'.format(rule[0], rule[1], rule[2]))


Frequent itemsets:
('bread',)
('milk',)
('diapers',)
('beer',)
('cocacola',)
('diapers', 'milk')
('bread', 'milk')
('beer', 'milk')
('beer', 'diapers')
('cocacola', 'diapers')
('cocacola', 'milk')
('beer', 'bread')
('bread', 'diapers')
('beer', 'diapers', 'milk')
('bread', 'diapers', 'milk')
('cocacola', 'diapers', 'milk')
('beer', 'bread', 'diapers')

Association rules:
('diapers',) -> ('milk',): 0.75
('milk',) -> ('diapers',): 0.75
('beer', 'milk') -> ('diapers',): 1.0
('bread',) -> ('milk',): 0.75
('milk',) -> ('bread',): 0.75
('cocacola',) -> ('diapers',): 1.0
('cocacola',) -> ('milk',): 1.0
('bread',) -> ('diapers',): 0.75
('diapers',) -> ('bread',): 0.75
('beer', 'bread') -> ('diapers',): 1.0
('cocacola',) -> ('diapers', 'milk'): 1.0
('cocacola', 'diapers') -> ('milk',): 1.0
('cocacola', 'milk') -> ('diapers',): 1.0
('beer',) -> ('diapers',): 1.0
('diapers',) -> ('beer',): 0.75
