## Utils

In [8]:
pip install numpy pandas scikit-learn matplotlib seaborn jupyter


Collecting pandas
  Downloading pandas-2.3.2-cp312-cp312-macosx_10_13_x86_64.whl.metadata (91 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m[31m1.5 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting scikit-learn
  Downloading scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl.metadata (11 kB)
Collecting matplotlib
  Using cached matplotlib-3.10.6-cp312-cp312-macosx_10_13_x86_64.whl.metadata (11 kB)
Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting jupyter
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Downloading scipy-1.16.2-cp312-cp312-macosx_10_14_x86_64.whl.metadata (62 kB)
[2K  

In [1]:
def read_input(inputf, data):
   with open(inputf) as f:
      for row in f:
        row = row.split()
        new_list = [int(item.strip()) for item in row]
        data.append(new_list)

In [3]:
data = []
read_input("datasets/ml_01/test.dat", data)
print(data)

[[1, 2, 5], [1, 4, 5], [1, 3], [2, 3, 4], [2, 3, 5], [4, 5], [1, 3, 4, 5], [1, 2, 5], [1, 3, 5], [1]]


## 1. Generate all combinations without repetition of length 3 from 6 possible ones

In [16]:
from math import factorial
numbers = [0, 1, 2, 3, 4, 5]
max_length = 3
unrepeated_combinations_length = factorial(len(numbers))/(factorial(max_length) * factorial(len(numbers) - max_length))
unrepeated_combinations = []
for i in range(len(numbers)):
  for j in range(i+1, len(numbers)):
    for k in range(j+1, len(numbers)):
      unrepeated_combinations.append([numbers[i], numbers[j], numbers[k]])

print("Count:", int(unrepeated_combinations_length))
print(unrepeated_combinations)
print("Actual count:", len(unrepeated_combinations))


Count: 20
[[0, 1, 2], [0, 1, 3], [0, 1, 4], [0, 1, 5], [0, 2, 3], [0, 2, 4], [0, 2, 5], [0, 3, 4], [0, 3, 5], [0, 4, 5], [1, 2, 3], [1, 2, 4], [1, 2, 5], [1, 3, 4], [1, 3, 5], [1, 4, 5], [2, 3, 4], [2, 3, 5], [2, 4, 5], [3, 4, 5]]
Actual count: 20


[(0, 1, 2), (0, 1, 3), (0, 1, 4), (0, 1, 5), (0, 2, 3), (0, 2, 4), (0, 2, 5), (0, 3, 4), (0, 3, 5), (0, 4, 5), (1, 2, 3), (1, 2, 4), (1, 2, 5), (1, 3, 4), (1, 3, 5), (1, 4, 5), (2, 3, 4), (2, 3, 5), (2, 4, 5), (3, 4, 5)]


## 2. On one of the test files (chess, connect), generate numerous patterns and calculate Support.

In [29]:
numbers_in_data = set()
for transaction in data:
  for number in transaction:
    numbers_in_data.add(number)

#Generate all unrepeated combinations until the length of the numbers in data
max_length = len(numbers_in_data)
unrepeated_combinations = []
for length in range(1, max_length + 1):
  if length == 1:
    for number in numbers_in_data:
      unrepeated_combinations.append([number])
  else:
    start_index = len(unrepeated_combinations) - 1
    for i in range(start_index, -1, -1):
      current_combination = unrepeated_combinations[i]
      if len(current_combination) == length - 1:
        last_number = current_combination[-1]
        for number in numbers_in_data:
          if number > last_number:
            new_combination = current_combination + [number]
            unrepeated_combinations.append(new_combination)


#Get support for each combination
combination_support = {}
for combination in unrepeated_combinations:
  support = 0
  for transaction in data:
    if all(item in transaction for item in combination):
      support += 1
  combination_support[tuple(combination)] = support

combination_support[()] = len(data)  

for combination, support in combination_support.items():
  act_sup = support / len(data)
  if act_sup >= 0.25:
    final_supp[combination] = act_sup

for combination, support in final_supp.items():
  print(f"Combination: {combination}, Support: {support:.2f}")

Combination: (), Support: 1.00
Combination: (1,), Support: 0.70
Combination: (2,), Support: 0.40
Combination: (3,), Support: 0.50
Combination: (4,), Support: 0.40
Combination: (5,), Support: 0.70
Combination: (4, 5), Support: 0.30
Combination: (3, 5), Support: 0.30
Combination: (2, 5), Support: 0.30
Combination: (1, 3), Support: 0.30
Combination: (1, 5), Support: 0.50


{(): 1.0,
 (1,): 0.7,
 (2,): 0.4,
 (3,): 0.5,
 (4,): 0.4,
 (5,): 0.7,
 (1, 3): 0.3,
 (1, 5): 0.5,
 (2, 5): 0.3,
 (3, 5): 0.3,
 (4, 5): 0.3}

## 3. From the generated frequent patterns, write down the rules and their Confidence.

In [34]:
confidence = {}
for combination in final_supp.keys():
  if len(combination) > 1:
    for i in range(len(combination)):
      antecedent = combination[:i] + combination[i+1:]
      if antecedent in final_supp:
        conf = final_supp[combination] / final_supp[antecedent]
        confidence[(antecedent, (combination[i],))] = conf

real_confidence = {}
for (antecedent, consequent), conf in confidence.items():
  if conf >= 0.5:
    real_confidence[(antecedent, consequent)] = conf

for (antecedent, consequent), conf in real_confidence.items():
  print(f"Rule: {antecedent} -> {consequent}, Confidence: {conf:.2f}")

Rule: (4,) -> (5,), Confidence: 0.75
Rule: (3,) -> (5,), Confidence: 0.60
Rule: (2,) -> (5,), Confidence: 0.75
Rule: (3,) -> (1,), Confidence: 0.60
Rule: (5,) -> (1,), Confidence: 0.71
Rule: (1,) -> (5,), Confidence: 0.71


{((3,), (1,)): 0.7142857142857143,
 ((1,), (3,)): 0.7142857142857143,
 ((5,), (1,)): 1.0,
 ((1,), (5,)): 1.0,
 ((5,), (2,)): 0.5714285714285715,
 ((2,), (5,)): 0.5714285714285715,
 ((5,), (3,)): 0.7142857142857143,
 ((3,), (5,)): 0.7142857142857143,
 ((5,), (4,)): 0.5714285714285715,
 ((4,), (5,)): 0.5714285714285715}