## Utils

In [1]:
def read_input(inputf, data):
   with open(inputf) as f:
      for row in f:
        row = row.split()
        new_list = [int(item.strip()) for item in row]
        data.append(new_list)

In [2]:
data = []
read_input("datasets/ml_01/test.dat", data)

## 1. Generate all combinations without repetition of length 3 from 6 possible ones

In [3]:
from itertools import combinations

print(list(combinations(range(6), 3)))

[(0, 1, 2), (0, 1, 3), (0, 1, 4), (0, 1, 5), (0, 2, 3), (0, 2, 4), (0, 2, 5), (0, 3, 4), (0, 3, 5), (0, 4, 5), (1, 2, 3), (1, 2, 4), (1, 2, 5), (1, 3, 4), (1, 3, 5), (1, 4, 5), (2, 3, 4), (2, 3, 5), (2, 4, 5), (3, 4, 5)]


## 2. On one of the test files (chess, connect), generate numerous patterns and calculate Support.

In [4]:
data = []
read_input("datasets/ml_01/test.dat", data)

In [5]:
def calculate_support(data, pattern):
    n = len(data)
    count = 0
    for trans in data:
        if all(x in trans for x in pattern):
            count += 1
    return count / n


def generate_patterns_and_support(data, max_len=3, min_support=0.0):
    items = sorted({x for trans in data for x in trans})
    pattern_support = {}
    for k in range(1, max_len + 1):       
        for pattern in combinations(items, k):
            sup = calculate_support(data, pattern)
            if sup >= min_support:
                pattern_support[pattern] = sup
    return pattern_support

In [6]:
res = generate_patterns_and_support(data, max_len=2, min_support=0.3)
print(res)

{(1,): 0.7, (2,): 0.4, (3,): 0.5, (4,): 0.4, (5,): 0.7, (1, 3): 0.3, (1, 5): 0.5, (2, 5): 0.3, (3, 5): 0.3, (4, 5): 0.3}


## 3. From the generated frequent patterns, write down the rules and their Confidence.

In [7]:
def generate_rules(pattern_support, min_conf=0.5, only_from_len=None):
    rules = []
    for itemset, supp_ab in pattern_support.items():
        if len(itemset) < 2:
            continue
        if only_from_len is not None and len(itemset) != only_from_len:
            continue
        for r in range(1, len(itemset)):
            for A in combinations(itemset, r):
                A = tuple(sorted(A))
                B = tuple(sorted(set(itemset) - set(A)))
                supp_A = pattern_support.get(A)
                if not supp_A:
                    continue
                conf = supp_ab / supp_A
                if conf >= min_conf:
                    rules.append({
                        "A": A, "B": B,
                        "support_AB": supp_ab,
                        "support_A": supp_A,
                        "confidence": conf
                    })
    rules.sort(key=lambda d: d["confidence"], reverse=True)
    return rules

In [8]:
ps = generate_patterns_and_support(data, max_len=2, min_support=0.25)
rules = generate_rules(ps, min_conf=0.5, only_from_len=2)  # len z dvojÃ­c



In [9]:
for r in rules:
    A = ",".join(map(str, r["A"]))
    B = ",".join(map(str, r["B"]))
    print(f"{A} -> {B} | conf={r['confidence']:.3f}")

2 -> 5 | conf=0.750
4 -> 5 | conf=0.750
1 -> 5 | conf=0.714
5 -> 1 | conf=0.714
3 -> 1 | conf=0.600
3 -> 5 | conf=0.600


In [10]:
ps = generate_patterns_and_support(data, max_len=3, min_support=0.15)
rules_from_triples = generate_rules(ps, min_conf=0.5, only_from_len =3)


In [11]:
for r in rules_from_triples:
    A = ",".join(map(str, r["A"]))
    B = ",".join(map(str, r["B"]))
    print(f"{A} -> {B} | conf={r['confidence']:.3f}")

1,2 -> 5 | conf=1.000
1,4 -> 5 | conf=1.000
2,5 -> 1 | conf=0.667
1,3 -> 5 | conf=0.667
3,5 -> 1 | conf=0.667
4,5 -> 1 | conf=0.667
2 -> 1,5 | conf=0.500
4 -> 1,5 | conf=0.500
