In [8]:
import numpy as np
import pandas as pd
import pyECLAT as eclat
from template import *
from itertools import combinations
from collections import defaultdict

In [9]:
### HINT ON UNIT TESTS ###
### All unit tests assume that itemsets are ordered lexicographically.
### An itemset [B, A] is NOT valid! It must be [A, B]! This convention allows extreme speed up.

In [11]:
## Unit test
dbExample = read_database("example.dat")

def testSyntax(F):
    found_problem = False
    for t in F:
        if type(t) != tuple:
            print("Entry " + str(t) + " in frequent itemset collection F is not a tuple.")
            found_problem = True
        elif len(t) != 2:
            print("Entry " + str(t) + " in frequent itemset collection F is a tuple but of length " + str(len(t)) + " instead of 2.")
            found_problem = True
        elif type(t[0]) != list:
            print("Itemset type in entry " + str(t) + " is " + str(type(t[0])) + " but should be list!")
            found_problem = True
        elif type(t[1]) != int:
            print("Support in entry " + str(t) + " is of type " + str(type(t[1])) + " but should be int!")
            found_problem = True
    print("Syntax check on frequent itemset collection: " + ("FAILED" if found_problem else "OK"))

def testFrequentItemsetCollection(F_exp, F_act):
    testSyntax(F_act)
    length_exp = len(F_exp)
    length_act = len(F_act)
    print("Length of collection: " + str("OK" if length_exp == length_act else "FAILED, expected length " + str(length_exp) + " but saw " + str(length_act)))
    obsolete = [f for f in F_act if not f in F_exp]
    missing = [f for f in F_exp if not f in F_act]
    if obsolete:
        print("Found unexpected entries in F: " + str(obsolete))
    if missing:
        print("Missing entries in F: " + str(missing))
    if not missing and not obsolete:
        print("F seems to be correct.")

F_exp = [(['A'], 4), (['A', 'B'], 4), (['A', 'B', 'D'], 3), (['A', 'B', 'D', 'E'], 3), (['A', 'B', 'E'], 4), (['A', 'D'], 3), (['A', 'D', 'E'], 3), (['A', 'E'], 4), (['B'], 6), (['B', 'C'], 4), (['B', 'C', 'E'], 3), (['B', 'D'], 4), (['B', 'D', 'E'], 3), (['B', 'E'], 5), (['C'], 4), (['C', 'E'], 3), (['D'], 4), (['D', 'E'], 3), (['E'], 5)]

# Test ECLAT
F_act = eclat(dbExample, 3)
testFrequentItemsetCollection(F_exp, F_act)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\juanc\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\juanc\AppData\Local\Temp\ipykernel_24016\1803632442.py", line 38, in <module>
    F_act = eclat(dbExample, 3)
            ^^^^^^^^^^^^^^^^^^^
TypeError: 'module' object is not callable

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\juanc\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 2168, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\juanc\AppData\Roaming\Python\Python312\site-packages\IPython\core\ultratb.py", line 1454, in structured_traceback
    return FormattedTB.structured_traceback(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\jua

In [None]:
def testRuleGeneration(R_exp, R_act):

    # test syntax
    found_problem = False
    for r in R_act:
        if type(r) != tuple:
            print("Rule " + str(r) + " is not represented as a tuple!")
            found_problem = True
        if len(r) != 4:
            print("Rule " + str(r) + " does not consist of 4 entries. Should be (premise, conclusion, (absolute) support, confidence)")
            found_problem = True
        if type(r[0]) != list:
            print("Premise of rule " + str(r) + " is of type " + str(type(r[0])) + " but should be list!")
            found_problem = True
        if type(r[1]) != list:
            print("Conclusion of rule " + str(r) + " is of type " + str(type(r[0])) + " but should be list!")
            found_problem = True
    print("Syntax check " + ("FAILED. Skipping rest of the test." if found_problem else "OK"))
    if found_problem:
        return
    
    # semantics test
    length_exp = len(R_exp)
    length_act = len(R_act)
    print("Length of rule set: " + str("OK" if length_exp == length_act else "FAILED, expected length " + str(length_exp) + " but saw " + str(length_act)))
    obsolete = [f for f in R_act if not f in R_exp]
    missing = [f for f in R_exp if not f in R_act]
    if obsolete:
        print("Found unexpected entries in R: " + str(obsolete))
        found_problem = True
    if missing:
        print("Missing entries in R: " + str(missing))
        found_problem = True
    print("Rule set test: " + ("FAILED" if found_problem else "OK"))

R_exp = [(['A'], ['B'], 4, 1.0), (['A', 'D'], ['B'], 3, 1.0), (['B', 'D', 'E'], ['A'], 3, 1.0), (['A', 'D', 'E'], ['B'], 3, 1.0), (['A', 'B', 'D'], ['E'], 3, 1.0), (['D', 'E'], ['A', 'B'], 3, 1.0), (['A', 'D'], ['B', 'E'], 3, 1.0), (['A', 'E'], ['B'], 4, 1.0), (['A', 'B'], ['E'], 4, 1.0), (['A'], ['B', 'E'], 4, 1.0), (['D', 'E'], ['A'], 3, 1.0), (['A', 'D'], ['E'], 3, 1.0), (['A'], ['E'], 4, 1.0), (['C'], ['B'], 4, 1.0), (['C', 'E'], ['B'], 3, 1.0), (['D'], ['B'], 4, 1.0), (['D', 'E'], ['B'], 3, 1.0), (['E'], ['B'], 5, 1.0)]
R_act = getStrongRulesForDatabase(dbExample, 3, 1.0)
testRuleGeneration(R_exp, R_act)

In [None]:
def read_transaction_data(file_path):
    transactions = []
    with open(file_path, 'r') as file:
        for line in file:
            transactions.append(line.strip().split())
    return transactions
transactions = read_transaction_data('shop.dat')
#generate itemsets
def generate_itemsets(transactions, size):
    itemsets = defaultdict(int)
    for transaction in transactions:
        for itemset in combinations(transaction, size):
            itemsets[itemset] += 1
    return itemsets
#generate association rules
def generate_association_rules(transactions, min_support, min_confidence):
    # Calculate support for itemsets of size 1
    itemsets = generate_itemsets(transactions, 1)
    frequent_itemsets = {itemset: support for itemset, support in itemsets.items() if support >= min_support}
    # Generate rules
    rules = []
    for size in range(2, len(frequent_itemsets.keys()) + 1):
        itemsets = generate_itemsets(transactions, size)
        for itemset, support in itemsets.items():
            for i in range(1, size):
                for antecedent in combinations(itemset, i):
                    antecedent = tuple(sorted(antecedent))  # Ensure antecedent is a tuple
                    if antecedent not in frequent_itemsets:
                        # Skip antecedents not found in frequent_itemsets
                        continue
                    consequent = tuple(item for item in itemset if item not in antecedent)
                    confidence = support / frequent_itemsets[antecedent]
                    if confidence >= min_confidence and support >= min_support:
                        rules.append((antecedent, consequent, support, confidence))
    return rules
rules = generate_association_rules(transactions, min_support, min_confidence)
print("Generated association rules:", rules)
def filter_rules(rules):
    return [rule for rule in rules if len(rule[1]) >= 2]
def sort_rules_by_support(rules):
    return sorted(rules, key=lambda x: x[2], reverse=True)
transactions = read_transaction_data('shop.dat')
min_support = 500
min_confidence = 0.75
#association rules
rules = generate_association_rules(transactions, min_support, min_confidence)
filtered_rules = filter_rules(rules)
#Sort rules
sorted_rules = sort_rules_by_support(rules)
sorted_filtered_rules = sort_rules_by_support(filtered_rules)
#strong rules
print("Strong Rules:")
for idx, rule in enumerate(sorted_rules, start=1):
    antecedent = ", ".join(rule[0])
    consequent = ", ".join(rule[1])
    support = rule[2]
    confidence = rule[3]
    print(f"{idx}. If {{{antecedent}}} then {{{consequent}}}")
    print(f"   - Support: {support}")
    print(f"   - Confidence: {confidence:.2%}")
    print()
#sub-list of strong rules
print("Sub-list:")
for idx, rule in enumerate(sorted_filtered_rules, start=1):
    antecedent = ", ".join(rule[0])
    consequent = ", ".join(rule[1])
    support = rule[2]
    confidence = rule[3]
    print(f"{idx}. If {{{antecedent}}} then {{{consequent}}}")
    print(f"   - Support: {support}")
    print(f"   - Confidence: {confidence:.2%}")
    print()

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\juanc\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\juanc\AppData\Local\Temp\ipykernel_24016\2804586820.py", line 36, in <module>
    rules = generate_association_rules(transactions, min_support, min_confidence)
                                                     ^^^^^^^^^^^
NameError: name 'min_support' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\juanc\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 2168, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\juanc\AppData\Roaming\Python\Python312\site-packages\IPython\core\ultratb.py", line 1454, in structured_traceback
    return FormattedTB.st