In [2]:
from itertools import combinations

ORIGINAL_DATASET = ((1,2),(1,3,4),(2,5,7),(1,2,3),(2,3,5,6))
MIN_SUPPORT = 2
MIN_CONFIDENCE = 50.0

c1 = dict()

for itemset in ORIGINAL_DATASET:
    for i in itemset:
        c1[i] = c1.get(i,0) + 1

print("C1:", c1)

C1: {1: 3, 2: 4, 3: 3, 4: 1, 5: 2, 7: 1, 6: 1}


In [3]:
for item in list(c1):
    if c1[item]<MIN_SUPPORT:
        del c1[item]
print(f"C1: after Min Support Filter {MIN_SUPPORT}", c1)

C1: after Min Support Filter 2 {1: 3, 2: 4, 3: 3, 5: 2}


In [4]:

items = list(c1.keys())
support = [c1]

items

[1, 2, 3, 5]

In [5]:

for i in range(2,len(items)):
    s = dict()
    print(f"Combinations of {i} items:")
    for combo in combinations(items, i):
        print(combo)


Combinations of 2 items:
(1, 2)
(1, 3)
(1, 5)
(2, 3)
(2, 5)
(3, 5)
Combinations of 3 items:
(1, 2, 3)
(1, 2, 5)
(1, 3, 5)
(2, 3, 5)


In [6]:
# This code implements the Apriori algorithm for frequent itemset mining

# Loop through each possible itemset size (starting from 2)
for i in range(2, len(items)):
    s = dict()  # Dictionary to store itemsets and their support counts
    
    # Generate all possible combinations of size i from the frequent items
    for combo in combinations(items, i):
        # Count how many times this combination appears in the dataset
        for itemset in ORIGINAL_DATASET:
            # Check if the current combination is a subset of the itemset
            if set(combo).issubset(itemset):
                s[combo] = s.get(combo, 0) + 1
        
        # Prune: Remove combinations that don't meet minimum support threshold
        if s.get(combo) and s[combo] < MIN_SUPPORT:
            del s[combo]
    
    # If no combinations meet the minimum support, stop the algorithm
    if not s:
        break
        
    # Add this level's frequent itemsets to our support list
    support.append(s)

# Print all discovered frequent itemsets with their support counts
print(support)


[{1: 3, 2: 4, 3: 3, 5: 2}, {(1, 2): 2, (1, 3): 2, (2, 3): 2, (2, 5): 2}]


In [None]:
# Generate association rules from frequent itemsets
rules = dict()

# Iterate through each combination in the last level of frequent itemsets (largest size)
for combo in support[-1]:
    for item in combo:
        c = list(combo)
        c.remove(item)
        len_c = len(c)
        
        c = c[0] if len_c == 1 else tuple(c)

        rule_1 = support[-1][combo]/support[0][item]*100 # Answers question, "count of combo vs count of item, this tells us percentage of times combo occurs when item occurs" #	
        rule_2 = support[-1][combo]/support[len_c-1][c]*100 # Answers question, "count of combo vs count of {combo-item}, this tells us percentage of times item occurs when {combo-item} occurs"
        # Aka, if seeing some itemset #
        
        # Add rules that meet minimum confidence threshold to results
        if rule_1 >= MIN_CONFIDENCE: 
            rules[f"{item}->{c}"] = rule_1
        if rule_2 >= MIN_CONFIDENCE: 
            rules[f"{c}->{item}"] = rule_2

# Print all generated association rules with their confidence values
print(rules)

{'1->2': 66.66666666666666, '2->1': 50.0, '1->3': 66.66666666666666, '3->1': 66.66666666666666, '2->3': 50.0, '3->2': 66.66666666666666, '2->5': 50.0, '5->2': 100.0}
