In [22]:
def read_dataset(filepath):
    with open(filepath, 'r') as file:
        transactions = [[int(item) for item in line.strip().split()] for line in file.readlines()]
    return transactions

In [23]:
from collections import defaultdict

class Node:
    def __init__(self, item, count, parent):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}
        self.link_next= None


    def display(self, ind=1):
        print('  ' * ind, f'{self.item}: {self.count}')
        for child in self.children.values():
            child.display(ind + 1)


def BUILD_TREE(transactions, min_support):
    header_table = defaultdict(int)
    for transaction in transactions:
        for item in transaction:
            header_table[item] += 1
    header_table = {k: v for k, v in header_table.items() if v >= min_support}
    if not header_table:
        return None, None
    for key in header_table:
        header_table[key] = [header_table[key], None]
    root = Node(None, 1, None)
    for transaction in transactions:
        filtered_transaction = [item for item in transaction if item in header_table]
        filtered_transaction.sort(key=lambda x: header_table[x][0], reverse=True)
        insert_tree(filtered_transaction, root, header_table)
    return root, header_table

def insert_tree(items, node, header_table):
    if items:
        first_item = items[0]
        if first_item in node.children:
            node.children[first_item].count += 1
        else:
            new_node = Node(first_item, 1, node)
            node.children[first_item] = new_node
            if header_table[first_item][1] is None:
                header_table[first_item][1] = new_node
            else:
                current = header_table[first_item][1]
                while current.link_next is not None:
                    current = current.link_next
                current.link_next= new_node
        insert_tree(items[1:], node.children[first_item], header_table)


def MINE_TREE(header_table, min_support, prefix, frequent_itemsets):
    sorted_items = sorted(header_table.items(), key=lambda x: x[1][0])
    for base_item, (count, node) in sorted_items:
        new_prefix = prefix.copy()
        new_prefix.add(base_item)
        frequent_itemsets.append((new_prefix, count))
        conditional_pattern_base = []
        while node is not None:
            path = []
            parent = node.parent
            while parent is not None and parent.item is not None:
                path.append(parent.item)
                parent = parent.parent
            path.reverse()
            for _ in range(node.count):
                conditional_pattern_base.append(path)
            node = node.link_next
        conditional_tree, conditional_header = BUILD_TREE(conditional_pattern_base, min_support)
        if conditional_header is not None:
            # print(f"\nConditional FP-tree for prefix {new_prefix}:")
            # conditional_tree.display()
            MINE_TREE(conditional_header, min_support, new_prefix, frequent_itemsets)

In [24]:

def FP_TREE_ALGO(Transactions, min_sup):
    root, header_table = BUILD_TREE(Transactions, min_sup)
    frequent_itemsets = []
    if root is not None:
        MINE_TREE(header_table, min_sup, set(), frequent_itemsets)

    print(f"length of frequent itemsets : {len(frequent_itemsets)}")
    print(frequent_itemsets)



In [25]:
filepath = 'Datasets/sample.txt'
Transactions = read_dataset(filepath) 
min_sup =  2 #math.ceil(0.20 * len(Transactions))

FP_TREE_ALGO(Transactions=Transactions, min_sup=min_sup)

length of frequent itemsets : 13
[({5}, 2), ({2, 5}, 2), ({1, 5}, 2), ({1, 2, 5}, 2), ({4}, 2), ({2, 4}, 2), ({1}, 6), ({1, 2}, 4), ({3}, 6), ({2, 3}, 4), ({1, 3}, 4), ({1, 2, 3}, 2), ({2}, 7)]
