In [6]:
from __future__ import annotations
import re
from dataclasses import dataclass, field
from typing import TypeVar, Dict, Set, Optional, Tuple, FrozenSet
from collections import defaultdict
from copy import deepcopy
from config import DATASET, PARAMS, DISPLAY
from utils import *


# Define some type.
Item                        = TypeVar(int)
ItemSet                     = TypeVar(FrozenSet[Item])
ItemSets                    = TypeVar(List[ItemSet])
TID                         = TypeVar(int)

ori_isets: ItemSets          = set()
transactions: List[ItemSet]     = list()
t_dict: Dict[TID, transactions] = dict()
item_counter: Dict[ItemSet, int]  = dict()

DISPLAY = False

# Get data
ori_isets, transactions, item_counter = get_data(DATASET)


def filter_data(
    item_counter: Dict[ItemSet] = item_counter,
    min_freq: float = PARAMS.min_freq):
    r"""Filter out frequence > min_frequence itemsets."""
    return filter(lambda i: i[1] >= min_freq, item_counter.items())


def get_dropout_itemset(
    ori_isets: ItemSets,
    filtered_isets: ItemSets):
    r"""Get the droped out itemsets."""
    return ori_isets - {itemset[0] for itemset in deepcopy(filtered_isets)}


DISPLAY = True
def filter_order_transactions(transactions: list):
    r"""Filter and Order the transactions."""
    result: List[ItemSets] = list()
    # Filter out frequence > min_frequence itemsets.
    filtered_isets = filter_data(item_counter, PARAMS.min_freq)
    # Get dropout isets.
    drop_out_isets = get_dropout_itemset(ori_isets, filtered_isets)
    for k, v in enumerate(transactions):
        if DISPLAY: print(f"{k} before drop out:\n\t{list(deepcopy(v))}")
        droped_out = list(filter(lambda x: x not in drop_out_isets, v))
        if DISPLAY: print(f"{k} after drop out:\n\t{list(deepcopy(droped_out))}")
        ordered = sorted(droped_out, key=lambda i: item_counter[i], reverse=True)
        if DISPLAY: print(f"{k} after ordered:\n\t{list(deepcopy(ordered))}\n", '='*100)
        result.append(ordered)
    return result

# transactions = filter_order_transactions(transactions)
filtered_isets = filter_data(item_counter, PARAMS.min_freq)
# Get dropout isets.
drop_out_isets = get_dropout_itemset(ori_isets, filtered_isets)
print(list(drop_out_isets))

[frozenset({'z'}), frozenset({'s'}), frozenset({'t'}), frozenset({'o'}), frozenset({'u'}), frozenset({'v'}), frozenset({'j'}), frozenset({'e'}), frozenset({'y'}), frozenset({'r'}), frozenset({'h'}), frozenset({'q'}), frozenset({'x'}), frozenset({'p'}), frozenset({'n'}), frozenset({'m'}), frozenset({'w'})]


In [9]:
class Tree:
    node_dict = defaultdict(list)
    def __init__(self,
                 tag: str,
                 parent: Optional[Tree] = None):
        self.val = 1
        self.children = dict()
        self.tag = tag
        self.parent = parent
        Tree.node_dict[tag].append(self)

    def __getitem__(self, tag):
        return self.children[tag]
    
    def __setitem__(self, tag, subtree):
        self.children[tag] = subtree

    def __repr__(self):
        return f"<{self.tag}: {self.val}>"

    def show(self, level=0):
        r"""Show the tree structure."""
        print(f"{'  '*level + repr(self)}")
        for child in self.children.values():
            child.show(level + 1)

    def update(self, *tags: Tuple[str]):
        r"""Update the tree."""
        if not tags: return
        first, other = tags[0], tags[1:]
        child = self.children.get(first, False)
        # is a child already.
        if child:
            child.val += 1
        # not a child.
        else:
            child = Tree(first, parent=self)
            self[first] = child
        child.update(*other)

    def truncate(self):
        r"""Truncate this tree."""
        del self.children
        setattr(self, 'children', dict())

    def prefix_path(self):
        r"""Get prefix path."""
        parent = self.parent
        ancestors = []
        while parent:
            ancestors.append(parent.tag)
            parent = parent.parent
        return ancestors[:-1]

    @staticmethod
    def build_FPtree(transactions):
        r"""Build the FP-Tree."""
        root = Tree('-')
        for k, v in enumerate(transactions):
            root.update(*v)
        return root
    
fp_tree = Tree.build_FPtree(transactions)
cond_pattern_base = defaultdict(frozenset)

# for tag, nodes in Tree.node_dict.items():
#     for node in nodes:
for tag, nodes in Tree.node_dict.items():
    for node in nodes:
        pp = node.prefix_path()
        print(frozenset(pp))
        cond_pattern_base[tag].union(node.prefix_path())

print(cond_pattern_base[frozenset({'r'})])
fp_tree.show()

frozenset()
frozenset()
frozenset({frozenset({'z'})})
frozenset({frozenset({'x'})})
frozenset({frozenset({'x'}), frozenset({'y'}), frozenset({'z'})})
frozenset({frozenset({'z'})})
frozenset()
frozenset({frozenset({'x'}), frozenset({'z'})})
frozenset({frozenset({'x'}), frozenset({'y'}), frozenset({'z'})})
frozenset({frozenset({'r'}), frozenset({'x'}), frozenset({'y'}), frozenset({'z'})})
frozenset({frozenset({'x'}), frozenset({'t'}), frozenset({'y'}), frozenset({'z'})})
frozenset({frozenset({'r'}), frozenset({'x'})})
frozenset()
<-: 1>
  <frozenset({'z'}): 5>
    <frozenset({'r'}): 1>
    <frozenset({'x'}): 3>
      <frozenset({'y'}): 3>
        <frozenset({'t'}): 2>
          <frozenset({'s'}): 2>
        <frozenset({'r'}): 1>
          <frozenset({'t'}): 1>
  <frozenset({'x'}): 1>
    <frozenset({'r'}): 1>
      <frozenset({'s'}): 1>


{1: 2}