In [1]:
import math
import numpy as np
import random
from itertools import combinations
from scipy.stats import norm
from scipy.fft import fft, ifft


In [2]:
def dynamic_programming_algorithm(X, UD):
    """_summary_

    Args:
        X (_type_): _description_
        UD (_type_): _description_

    Returns:
        _type_: _description_
    """
    f_X = np.zeros(len(UD) + 1)
    f_X[0] = 1

    for T_i in UD:
        p_Xi = get_probability_in_transaction(X, T_i)
        f_X_prime = np.zeros_like(f_X)
        f_X_prime[0] = (1 - p_Xi) * f_X[0]

        for k in range(1, len(UD) + 1):
            f_X_prime[k] = p_Xi * f_X[k - 1] + (1 - p_Xi) * f_X[k]
        f_X = f_X_prime

    return f_X

def get_probability_in_transaction(X, T):
    prob = 1.0
    for i in X:
        prob *= T.get(i, 0.0)
    return prob

def get_weighted_itemset(X, W):
    weighted = 0

    for i in X:
        weighted += W.get(i, 0.0)

    return weighted / len(X)

In [3]:
def compute_prF(X, UD, wfpt):
    convolution_vector = dynamic_programming_algorithm(X, UD)
    prF = 0

    for i in range(len(UD), 0, -1):
        prF += convolution_vector[i]

        if prF > wfpt:
            return i

    return -1

def compute_prWF(X, UD, W, wfpt):
    convolution_vector = dynamic_programming_algorithm(X, UD)
    prWF = 0

    for i in range(len(UD), 0, -1):
        prWF += convolution_vector[i]*get_weighted_itemset(X, W)

        if prWF > wfpt:
            return i

    return -1

def compute_prMWF(X, UD, W, min_sup):
    max_weighted = 0

    for i in X:
        if W.get(i) > max_weighted:
            max_weighted = W.get(i)

    prF = compute_prF(X, UD, min_sup)

    return max_weighted * prF

def compute_support(X, UD):
    support = 0

    for Ti in UD:
        probX= get_probability_in_transaction(X, Ti)
        
        if probX > 0:
            support += 1

    return support

def compute_expected_support(X, UD):
    expected_support = 0

    for Ti in UD:
        expected_support += get_probability_in_transaction(X, Ti)

    return expected_support

def compute_lower_bound(expected_support, min_conf, weighted_itemset):
    return expected_support - np.sqrt(-2 * expected_support * np.log(1 - min_conf / weighted_itemset))

def compute_upper_bound(expected_support, min_conf, weighted_itemset):
    return (2 * expected_support - np.log(min_conf / weighted_itemset) + np.sqrt(np.log(min_conf/weighted_itemset)**2 - 8 * expected_support * np.log(min_conf / weighted_itemset))) / 2

In [4]:
def discover_single_items(UD):
    # Implement discovery of single items using Chernoff-Hoeffding bound
    single_items = set()

    for Ti in UD:
        for item in Ti.keys():
            single_items.add(item)

    return single_items

def get_sorted_list_item(UD, W, min_sup, min_conf):
    single_items = discover_single_items(UD)
    list_single_items = []
    
    for item in single_items:
        support = compute_support([item], UD)
        
        if support < min_sup :
            continue
        
        expected_support = compute_expected_support([item], UD)
        weighted = get_weighted_itemset([item], W)
        lower_bound = compute_lower_bound(expected_support, min_conf, weighted)
        upper_bound = compute_upper_bound(expected_support, min_conf, weighted)
        
        probabilistic_support = compute_prWF([item], UD, W, min_conf)
        list_single_items.append([item, support, expected_support, lower_bound, upper_bound, probabilistic_support])
        
    return sorted(list_single_items, key=lambda x: x[5], reverse=True)
        
        

# Implement algorithms

In [5]:
def algorithms(UD, W, min_sup, min_conf):
    list_result = []
    
    sorted_item_list = get_sorted_list_item(UD, W, min_sup, min_conf)
    
    root = [[], 0, 0, 0, 0, 0, []]
    
    WPMFIM(root, list_result, sorted_item_list, UD, W, min_sup, min_conf)
    
    return list_result
    
    
def WPMFIM(node, list_result, sorted_item_list, UD, W, min_sup, min_conf):
    itemset_of_node = node[0]
    
    item_J_order_larger_than_I_List = get_itemset_j_order_larger_than_i(itemset_of_node, sorted_item_list)
    
    for index in range(0, len(item_J_order_larger_than_I_List)):
        itemJ = item_J_order_larger_than_I_List[index]
        
        tempNode = [itemJ, 0, 0, 0, 0, 0, []]
        
        tempChild = node[6]
        tempChild.append(tempNode)
        
        support = compute_support(itemJ, UD)
        expected_support = compute_expected_support(itemJ, UD)
        weighted_J = get_weighted_itemset(itemJ, W)
        lower_bound = compute_lower_bound(expected_support, min_conf, weighted_J)
        uppper_bound = compute_upper_bound(expected_support, min_conf, weighted_J)
        
        if min(support, uppper_bound) < min_sup :
            tempChild.pop(len(node[6]) - 1)
            continue
        
        if lower_bound > min_sup:
            WPMFIM(tempNode, list_result, sorted_item_list, UD, W, min_sup, min_conf)
        else:
            probabilistic_support = compute_prWF(itemJ, UD, W, min_conf)
            
            if probabilistic_support >= min_sup:
                WPMFIM(tempNode, list_result, sorted_item_list, UD, W, min_sup, min_conf)
            else:
                tempChild.pop(len(node[6]) - 1)
    isContain = False
    
    for item in list_result:
        if set(itemset_of_node).issubset(set(item)):
            isContain = True
            break
        
    if len(node[6]) == 0 and not isContain:
        list_result.append(itemset_of_node)
        
    

def get_itemset_j_order_larger_than_i(itemset_i, sorted_item_list):
    itemset_j_order_larger_than_i = []

    item_i_size = len(itemset_i)
    index_of_last_item_i = -1

    # Get index of the last item of itemset in Sorted Item List
    if item_i_size > 0:
        for index in range(0, len(sorted_item_list)):
            if itemset_i[-1] == sorted_item_list[index][0]:
                index_of_last_item_i = index
        # index_of_last_item_i = len(sorted_item_list) - 1 - sorted_item_list[::-1].index(itemset_i[-1])

    # Return [] if the last item in itemset is the last item in Sorted Item List
    if index_of_last_item_i == len(sorted_item_list) - 1:
        return []

    for i in range(index_of_last_item_i + 1, len(sorted_item_list)):
        temp_itemset = itemset_i.copy()
        temp_itemset.append(sorted_item_list[i][0])
        itemset_j_order_larger_than_i.append(temp_itemset)

    return itemset_j_order_larger_than_i


# Example

In [6]:
# Example 1
UD = [
    {'A': 0.6, 'B': 0.7},
    {'A': 0.2, 'C': 0.3},
]

weighted = {
    'A': 1,
    'B': 1,
    'C': 1
}

min_sup = 1
wfpt = 0.1

# #Example 2
UD = [
    {"A": 0.5, "B": 0.7, "D": 0.8, "E": 0.9},
    {"B": 0.6, "C": 0.8, "D": 0.6, "E": 0.8},
    {"C": 0.6, "D": 0.9, "E": 0.5},
    {"A": 0.6, "C": 0.7, "D": 0.8, "E": 0.8},
    {"A": 0.8, "B": 0.9, "C": 0.5, "D": 0.6, "E": 0.7},
    {"B": 0.6, "D": 0.9, "E": 0.8},
]

weighted = {
    "A": 0.3,
    "B": 0.9,
    "C": 0.5,
    "D": 0.6,
    "E": 0.9
}

min_sup = 2
wfpt = 0.2

# print(candidate_generate_expected_bound(UD, min_sup, wfpt))
print(algorithms(UD, weighted, min_sup, wfpt))
# for item in get_sorted_list_item(UD, weighted, min_sup, wfpt):
#     print(item)
    
# print(get_itemset_j_order_larger_than_i(['D'], get_sorted_list_item(UD, weighted, min_sup, wfpt)))

[['E', 'D', 'B'], ['E', 'D', 'C'], ['E', 'A']]


# Real dataset

In [7]:
def read_dataset(file_path, mean, variance):
    uncertain_database = []

    # Initialize normal distribution
    random.seed(12345)

    try:
        with open(file_path, 'r') as file:
            curr_id_transaction = 0
            count_num_line = 1

            for line in file:
                data_line_transaction = line.strip().split(" ")
                cur_transaction = {}

                for item in data_line_transaction:
                    data_item = item.split("-")
                    value = data_item[0]
                    prob = float(data_item[1])
                    cur_transaction[value] = prob

                uncertain_database.append(cur_transaction)
                curr_id_transaction += 1
                count_num_line += 1
    except IOError as e:
        print(e)

    print(uncertain_database)

    return uncertain_database

def generate_weighted_table(uncertain_database):
    weighted_table = {}
    distinct_itemset_database = discover_single_items(uncertain_database)

    random.seed(12345)

    for item in distinct_itemset_database:
        weighted = round(random.random(), 1)
        if weighted <= 0:
            weighted = 0.1
        else:
            if weighted >= 1:
                weighted = 0.9
        weighted_table[item] = weighted

    return weighted_table

In [8]:
path = './data/connect4_10K.data'

UD = read_dataset(path, 0.78, 0.65)
W = generate_weighted_table(UD)

for i in range(0, 5):
    print(UD[i])

[{'22': 0.9, '88': 0.5, '67': 0.4, '46': 0.4, '25': 0.3, '49': 0.2, '28': 0.7, '112': 0.5, '115': 0.8, '91': 0.6, '118': 0.9, '70': 0.7, '94': 0.8, '73': 0.3, '52': 0.5, '31': 1.0, '97': 0.5, '10': 0.3, '76': 0.0, '55': 0.2, '34': 0.5, '13': 0.1, '79': 0.2, '58': 0.3, '37': 0.1, '16': 0.3, '19': 0.2, '121': 0.6, '1': 0.4, '100': 0.7, '124': 0.2, '4': 0.7, '103': 0.1, '127': 0.4, '7': 0.8, '106': 0.9, '82': 0.6, '109': 1.0, '61': 0.5, '40': 0.9, '85': 0.1, '64': 0.4, '43': 0.7}, {'22': 0.9, '44': 0.6, '88': 0.1, '67': 0.1, '46': 0.4, '25': 1.0, '49': 0.8, '28': 0.9, '112': 0.1, '115': 0.4, '91': 0.9, '118': 0.4, '70': 0.3, '94': 0.2, '73': 0.1, '52': 0.6, '31': 0.8, '97': 0.5, '10': 0.9, '55': 0.7, '77': 0.5, '34': 0.6, '13': 0.9, '79': 0.3, '58': 0.9, '37': 0.9, '16': 0.1, '19': 0.7, '121': 0.7, '1': 0.1, '100': 0.5, '124': 0.5, '4': 0.2, '103': 0.3, '127': 0.4, '7': 0.1, '106': 0.5, '82': 0.8, '109': 0.7, '61': 0.9, '40': 1.0, '85': 0.6, '64': 0.1}, {'44': 0.5, '88': 0.4, '23': 0.1, '

In [9]:
import time

# Start the timer
start_time = time.time()

min_sup = 0.5*10000
wfpt = 0.9
# Your code here
print(algorithms(UD, W, min_sup, wfpt))

# End the timer
end_time = time.time()

# Calculate the runtime
runtime = end_time - start_time
print(f"Runtime: {runtime} seconds")


  return expected_support - np.sqrt(-2 * expected_support * np.log(1 - min_conf / weighted_itemset))
  return (2 * expected_support - np.log(min_conf / weighted_itemset) + np.sqrt(np.log(min_conf/weighted_itemset)**2 - 8 * expected_support * np.log(min_conf / weighted_itemset))) / 2
