In [31]:
from __future__ import annotations
import re
from dataclasses import dataclass, field
from typing import TypeVar, Dict, Set, Optional, Tuple, FrozenSet
from collections import defaultdict
from copy import deepcopy
from config import DATASET, PARAMS, DISPLAY
from utils import *


# Define some type.
Item                        = TypeVar(int)
ItemSet                     = TypeVar(FrozenSet[Item])
ItemSets                    = TypeVar(List[ItemSet])
TID                         = TypeVar(int)

ori_isets: ItemSets                 = set()
transactions: List[ItemSet]         = list()
t_dict: Dict[TID, transactions]     = dict()
item_counter: Dict[ItemSet, int]    = dict()
cond_pattern: ItemSets              = set()

DISPLAY = False

# Get data
ori_isets, transactions, item_counter = get_data(DATASET)


def filter_data(
    item_counter: Dict[ItemSet] = item_counter,
    min_freq: float = PARAMS.min_freq):
    r"""Filter out frequence > min_frequence itemsets."""
    return filter(lambda i: i[1] >= min_freq, item_counter.items())


def get_dropout_itemset(
    ori_isets: ItemSets,
    filtered_isets: ItemSets):
    r"""Get the droped out itemsets."""
    diff = ori_isets - {frozenset(itemset[0]) for itemset in filtered_isets}
    if diff:
        return frozenset.union(*diff)
    return frozenset({})


DISPLAY = False
def filter_order_transactions(transactions: list, drop_out_isets: FrozenSet, item_counter: Dict[ItemSet, int]):
    r"""Filter and Order the transactions."""
    result: List[ItemSets] = list()

    for k, v in enumerate(transactions):
        if DISPLAY: print(f"{k} before drop out:\n\t{list(deepcopy(v))}")
        droped_out = list(filter(lambda x: x not in drop_out_isets, v))
        if DISPLAY: print(f"{k} after drop out:\n\t{list(deepcopy(droped_out))}")
        ordered = sorted(droped_out, key=lambda i: item_counter[i], reverse=True)
        if DISPLAY: print(f"{k} after ordered:\n\t{list(deepcopy(ordered))}\n", '='*100)
        result.append(ordered)
    return result

# Filter out frequence > min_frequence itemsets.
filtered_isets = filter_data(item_counter, PARAMS.min_freq)
# Get dropout isets.
drop_out_isets = get_dropout_itemset(ori_isets, filtered_isets)
# Deal up with transactions.
transactions = filter_order_transactions(transactions, drop_out_isets, item_counter)
# format the transactions.
transactions = {frozenset(set(trans)): 1 for trans in transactions}
transactions

[['z', 'r'],
 ['z', 'x', 'y', 't', 's'],
 ['z'],
 ['x', 'r', 's'],
 ['z', 'x', 'y', 'r', 't'],
 ['z', 'x', 'y', 't', 's']]

In [32]:


# ans = list()
# ans.extend(Tree.node_dict.keys())
# cond_pattern_bases

fp_tree = Tree.build_FPtree(transactions)
cond_pattern_bases = Tree.get_cond_pattern_base()
fp_tree.show()


<-: 1>
  <z: 5>
    <r: 1>
    <x: 3>
      <y: 3>
        <t: 2>
          <s: 2>
        <r: 1>
          <t: 1>
  <x: 1>
    <r: 1>
      <s: 1>


In [34]:
cond_pbs = cond_pattern_bases['r']
# cond_pbs = map(cond_pattern_base_counter, cond_pattern_bases.values())

def cond_pattern_base_counter(cond_pbs=cond_pbs):
    r"""Conditional pattern base item counter.
    
    Return
    ======
    counter
        The mapping of each itemset to it's count.
    """
    item_map = defaultdict(int)
    for cond_pb, count in cond_pbs.items():
        for item in cond_pb:
            item_map[frozenset(item)] += count
    return item_map

def mineTree(cond_pattern_bases=cond_pattern_bases):
    for k, cond_pbs in cond_pattern_bases.items():
        # print(k, cond_pbs)
        transactions = cond_pbs.keys()
        # print(f'transactions: {list(transactions)}\n')

        item_counter = cond_pattern_base_counter(cond_pbs)
        # print(f'item_counter: {dict(item_counter)}\n')

        filtered_isets = filter_data(item_counter)
        # print(f'filtered_isets: {list(deepcopy(filtered_isets))}\n')

        drop_out_isets = get_dropout_itemset(item_counter.keys(), filtered_isets)
        # print(f'drop_out_isets: {drop_out_isets}\n')

        transactions = filter_order_transactions(transactions, drop_out_isets, item_counter)

        insert_ans(k, deepcopy(transactions))

        fp_tree = Tree.build_FPtree(transactions)

        cond_pattern_bases = Tree.get_cond_pattern_base()

        Tree.cls_truncate()

        mineTree(cond_pattern_bases)

        # print(f'cond_pattern_base: {dict(cond_pattern_base)}\n', '='*50, '\n')

mineTree(cond_pattern_bases)


In [35]:
ans

{frozenset({'z'}),
 frozenset({'y'}),
 frozenset({'x', 'y', 'z'}),
 frozenset({'s', 'x'}),
 frozenset({'r'}),
 frozenset({'x', 'z'}),
 frozenset({'t', 'x', 'y', 'z'})}