In [61]:
def read_dataset(filepath):
    with open(filepath, 'r') as file:
        # Read the file and split each transaction by spaces
        transactions = [[int(item) for item in line.strip().split()] for line in file.readlines()]
    return transactions

In [62]:
from collections import defaultdict

def find_frequent_1_itemsets_in_desc(transactions, min_sup):
    item_count = defaultdict(int)
    for transaction in transactions:
        for item in set(transaction):  
            item_count[item] += 1
    

    # print(item_count)

    F = set()
    for item, count in item_count.items():
        if count >= min_sup:
            F.add(item)  
    

    return sorted(list(F), key=lambda x: (-item_count[x], x))


In [63]:
from itertools import combinations

class Node:
    def __init__(self, root=False):
        self.children = dict() 
        self.count = 0
        self.root = root

class Tree:
    def __init__(self):
        self.root = Node(root=True)

    def insert_helper(self, node, itemset, idx):
        if idx == len(itemset):
            return
        
        item = itemset[idx]
        if item not in node.children:
            node.children[item] = Node()
        node.children[item].count += 1
        self.insert_helper(node.children[item], itemset, idx + 1)

    def insert(self, itemset):
        self.insert_helper(self.root, itemset, 0)

    def print_tree(self):
        def _print(node, depth):
            for item, child in node.children.items():
                print("  " * depth + f"{item} ({child.count})")
                _print(child, depth + 1)

        _print(self.root, 0)

    def get_patterns(self, min_sup):
        L = []
        def _get_patterns(node, path):

            for item, child in node.children.items():
                if child.count >= min_sup:
                    new_path = path + [item]  
                    L.append(new_path)
                    _get_patterns(child, new_path)

        _get_patterns(self.root, [])

        new_L = []

        # if len(node.children) == 0:
        # Add all non-empty combinations of the path
        for item in L:
            for i in range(1, len(item) + 1):
                for combo in combinations(item, i):
                    new_L.append(list(combo))
        
        new_L = [list(pat) for pat in set(tuple(pat) for pat in new_L)]

        return new_L

    
    
def clean_up(transactions, F):
    new_transactions = []
    F_set = set(F)
    for transaction in transactions:
        new_transaction = [item for item in F if item in transaction]
        if new_transaction:
            new_transactions.append(new_transaction)
    return new_transactions




In [64]:
import math

filepath = 'Datasets/sample.txt'
Transactions = read_dataset(filepath) 
min_sup =  2 #math.ceil(0.20 * len(Transactions))

L = []

F = find_frequent_1_itemsets_in_desc(Transactions, min_sup)
# print(F)

# print(Transactions)

Transactions = clean_up(Transactions, F)

# print(Transactions)

tree = Tree()

for itemset in Transactions:
    tree.insert(itemset)


tree.print_tree()

L = tree.get_patterns(min_sup)

# L = sorted(L)

print(L)




2 (7)
  1 (4)
    5 (1)
    4 (1)
    3 (2)
      5 (1)
  4 (1)
  3 (2)
1 (2)
  3 (2)
[[1, 3], [2], [2, 1], [2, 1, 3], [2, 3], [1], [3]]
