In [17]:
def read_dataset(filepath):
    with open(filepath, 'r') as file:
        # Read the file and split each transaction by spaces
        transactions = [[int(item) for item in line.strip().split()] for line in file.readlines()]
    return transactions

In [18]:
from collections import defaultdict

def find_frequent_1_itemsets_in_desc(transactions, min_sup):
    item_count = defaultdict(int)
    for transaction in transactions:
        for item in set(transaction):  
            item_count[item] += 1
    

    # print(item_count)

    F = set()
    for item, count in item_count.items():
        if count >= min_sup:
            F.add(item)  
    

    return sorted(list(F), key=lambda x: (-item_count[x], x))


In [19]:
from itertools import combinations


class Node:
    def __init__(self, root=False, item=None):
        self.children = dict() 
        self.count = 0
        self.root = root
        self.item = item
        self.parent = None
        self.node_linker_next = None  


class Conditional_Tree:
    def __init__(self):
        self.root = Node(root=True)
        self.Node_Linker_Table = dict() # {int : Node}
        
    
    def __add_to_Node_Linker_Table(self, item, node:Node):
        if item not in self.Node_Linker_Table:
            self.Node_Linker_Table[item] = node
            return
        
        curr = self.Node_Linker_Table[item]
        
        while curr != None:
            if curr.node_linker_next == None:
                curr.node_linker_next = node
                break
            curr = curr.node_linker_next
    
    def __insert_helper(self, node, itemset, idx, item_count):
        if idx == len(itemset):
            return
        
        item = itemset[idx]
        if item not in node.children:
            node.children[item] = Node(item=item)
            self.__add_to_Node_Linker_Table(item=item, node=node.children[item])
            
        node.children[item].count += item_count
        node.children[item].parent = node
        self.__insert_helper(node.children[item], itemset, idx + 1, item_count)

    def insert(self, itemset, item_count):
        self.__insert_helper(self.root, itemset, 0, item_count)
        
    
    def print_tree(self):
        def _print(node, depth):
            for item, child in node.children.items():
                print("  " * depth + f"{item} ({child.count})")
                _print(child, depth + 1)

        _print(self.root, 0)
        
    


class Tree:
    def __init__(self):
        self.root = Node(root=True)
        self.Node_Linker_Table = dict()

    def get_upward(self, node):
        if node.root == True:
            return []
        return self.get_upward(node.parent) + [node.item]
    
    def __add_to_Node_Linker_Table(self, item, node:Node):
        if item not in self.Node_Linker_Table:
            self.Node_Linker_Table[item] = node
            return
        
        curr = self.Node_Linker_Table[item]
        
        while curr != None:
            if curr.node_linker_next == None:
                curr.node_linker_next = node
                break
            curr = curr.node_linker_next
    
    
    def insert_helper(self, node, itemset, idx):
        if idx == len(itemset):
            return
        
        item = itemset[idx]
        if item not in node.children:
            node.children[item] = Node(item=item)
            self.__add_to_Node_Linker_Table(item=item, node=node.children[item])
            
        node.children[item].count += 1
        node.children[item].parent = node
        self.insert_helper(node.children[item], itemset, idx + 1)

    def insert(self, itemset):
        self.insert_helper(self.root, itemset, 0)

    def print_tree(self):
        def _print(node, depth):
            for item, child in node.children.items():
                print("  " * depth + f"{item} ({child.count})")
                _print(child, depth + 1)

        _print(self.root, 0)

    def get_patterns(self, min_sup):
        L = []
        def _get_patterns(node, path):

            for item, child in node.children.items():
                if child.count >= min_sup:
                    new_path = path + [item]  
                    L.append(new_path)
                    _get_patterns(child, new_path)

        _get_patterns(self.root, [])

        new_L = []

        # if len(node.children) == 0:
        # Add all non-empty combinations of the path
        for item in L:
            for i in range(1, len(item) + 1):
                for combo in combinations(item, i):
                    new_L.append(list(combo))
        
        new_L = [list(pat) for pat in set(tuple(pat) for pat in new_L)]

        return new_L
    
    def create_conditional_tree_for_item(self,item):
        cond_tree = Conditional_Tree()
        
        
        if item in self.Node_Linker_Table:
            
            curr = self.Node_Linker_Table[item]
            
            while curr != None:
                item_count = curr.count
                itemset = self.get_upward(curr.parent)
                
                print(f"itemset : {itemset} item_count : {item_count}")
                
                
                cond_tree.insert(itemset=itemset, item_count=item_count)
                curr = curr.node_linker_next
            
        
        cond_tree.print_tree()
        
        return cond_tree
        
    
    def generate_frequent_patterns(cond_tree : Conditional_Tree, item , min_sup):
        
        
    
    def get_all_combinations_from_cond_tree():
        


    
def clean_up(transactions, F):
    new_transactions = []
    F_set = set(F)
    for transaction in transactions:
        new_transaction = [item for item in F if item in transaction]
        if new_transaction:
            new_transactions.append(new_transaction)
    return new_transactions




In [20]:
import math

filepath = 'Datasets/sample.txt'
Transactions = read_dataset(filepath) 
min_sup =  2 #math.ceil(0.20 * len(Transactions))

L = []

F = find_frequent_1_itemsets_in_desc(Transactions, min_sup)
# print(F)

# print(Transactions)

Transactions = clean_up(Transactions, F)

# print(Transactions)

tree = Tree()

for itemset in Transactions:
    tree.insert(itemset)


tree.print_tree()

print("create cond tree for item : 3")

tree.create_conditional_tree_for_item(3)






2 (7)
  1 (4)
    5 (1)
    4 (1)
    3 (2)
      5 (1)
  4 (1)
  3 (2)
1 (2)
  3 (2)
create cond tree for item : 3
itemset : [2] item_count : 2
itemset : [1] item_count : 2
itemset : [2, 1] item_count : 2
2 (4)
  1 (2)
1 (2)
