In [3]:
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TypeVar, Dict, Set, Optional, Tuple, FrozenSet
from collections import defaultdict
from copy import deepcopy
from config import DATASET, PARAMS, DISPLAY
from utils import *


# Define some type.
Item                        = TypeVar(int)
ItemSet                     = TypeVar(FrozenSet[Item])
ItemSets                    = TypeVar(List[ItemSet])
TID                         = TypeVar(int)

transactions: List[ItemSet]         = list()
item_counter: Dict[ItemSet, int]    = dict()

DISPLAY = False

def format_trans(transactions: List[ItemSets]):
    r"""Format the original transactions into **mapping of itemset to count values**.

    Example
    =======
    >>> trans = [[1, 2, 3], [1, 2, 3], [2, 3]
    >>> Formater(trans)
    {
        frozenset({1, 2, 3}): 2,
        frozenset({2, 3}): 1
    }

    Return
    ======
    counter: Dict[FrozenSet, int]
        The mapping of each itemset to it's count.
    """
    result = dict()
    for trans in transactions:
        trans = frozenset(trans)
        result[trans] = result.get(trans, 0) + 1
    return result


def count(fmt_trans: Dict[FrozenSet, int]):
    r"""Conditional pattern base item counter.

    Example
    =======
    >>> fmt_trans = {
    ...     frozenset({1, 2, 3}): 2,
    ...     frozenset({2, 3}): 1}
    >>> Counter(trans)
    {
        frozenset({1}): 2,
        frozenset({2}): 3,
        frozenset({3}): 3
    }

    Return
    ======
    counter: Dict[FrozenSet, int]
        The mapping of each itemset to it's count.
    """
    counter = dict()
    for trans, count in fmt_trans.items():
        for item in trans:
            counter[item] = counter.get(item, 0) + count
    return counter


def filter_and_order(
    fmt_trans: Dict[FrozenSet, int],
    counter: Dict[FrozenSet, int],
    min_sup: Optional[float] = PARAMS.MIN_SUP):
    r"""Filter out the itemsets which there count value less than min_sup,
        then ordering the transaction in fmt_trans(formated transactions).

    Example
    =======
    >>> fmt_trans = {
    ...     frozenset({1, 2, 3}): 2,
    ...     frozenset({2, 3}): 1
    ... }
    >>> counter = {
    ...     frozenset({1}): 2,
    ...     frozenset({2}): 3,
    ...     frozenset({3}): 3
    ... }
    >>> min_sup = 2
    >>> filter_and_order(fmt_trans, counter, min_sup)
    {frozenset({2, 3}): 3}

    Return
    ======
    counter: Dict[FrozenSet, int]
        The mapping of each itemset to it's count.
    """
    result = dict()
    drop_out_itemsets: set = {k if v < min_sup else None for k, v in counter.items()}
    print(f"droped out itemsets: {drop_out_itemsets}\n")
    for trans, count in fmt_trans.items():
        filtered_iset = trans - drop_out_itemsets
        result[filtered_iset] = result.get(filtered_iset, 0) + count
    return result


# Get data
_, transactions, _ = get_data(DATASET)

# format the transactions.
formated = format_trans(transactions)

counter = count(formated)

filter_and_order(formated, counter)

# transactions = [[1,2,3], [1,2,3], [2,3]]

# formated = format_trans(transactions)
# item_counter

droped out itemsets: {None, 'o', 'p', 'j', 'u', 'h', 'm', 'v', 'w', 'n', 'e', 'q'}



{frozenset({'r', 'z'}): 1,
 frozenset({'s', 't', 'x', 'y', 'z'}): 2,
 frozenset({'z'}): 1,
 frozenset({'r', 's', 'x'}): 1,
 frozenset({'r', 't', 'x', 'y', 'z'}): 1}

In [4]:
a = {None, frozenset({1})}
b = frozenset({1, 2, 3})
c = frozenset({3, 2, 1})
b == c
# set(b) - a

True