# Database preparation

In [58]:
databaseTable3 = [
    {
        'tid': 1,
        'items': ['a', 'b', 'd', 'e', 'f', 'g'], 
        'quantities': [2, 2, 1, 3, 2, 1], 
        'profits': [-2, 1, 4, 1, -1, -2]},
    {
        'tid': 2, 
        'items': ['b', 'c'], 
        'quantities': [1, 5], 
        'profits': [-1, 1]},
    {
        'tid': 3, 
        'items': ['b', 'c', 'd', 'e', 'f'], 
        'quantities': [2, 1, 3, 2, 1], 
        'profits': [-1, 1, 4, 1, -1]},
    {
        'tid': 4, 
        'items': ['c', 'd', 'e'],
        'quantities': [2, 1, 3], 
        'profits': [1, 4, 1]},
    {
        'tid': 5, 
        'items': ['a', 'f'], 
        'quantities': [2, 3], 
        'profits': [2, -1]},
    {
        'tid': 6, 
        'items': ['a', 'b', 'c', 'd', 'e', 'f', 'g'], 
        'quantities': [2, 1, 4, 2, 1, 3, 1], 
        'profits': [1, 1, 1, 4, 1, -1, -2]},
    {
        'tid': 7, 
        'items': ['b', 'c', 'e'], 
        'quantities': [3, 2, 2], 
        'profits': [1, 2, 2]}
]

In [59]:
# root_database = list()
# with open('data/chess_negative.txt', 'r') as file:
#     count = 0
#     for line in file:
#         temp_struc = line.split(":")
#         temp_items = list(map(lambda x: "item " + x, temp_struc[0].split(" ")))
#         temp_transaction = {
#             "tid": count,
#             "items": temp_items,
#             "quantities": [1] * len(temp_items),
#             "profits": list(map(int, temp_struc[2].split(" ")))
#         }
#         root_database.append(temp_transaction)
#         count += 1
#         if count >= 10:
#             break

In [53]:
# print(root_database[1])

# Proposed TK_EMHUN algorithm

In [54]:
import sys

In [55]:
class ItemSet:
    def __init__(self, itemSet, U):
        self.itemSet = itemSet
        self.U = U
    def __eq__(self, other):
        return self.U == other.U
    def __gt__(self, other):
        return self.U > other.U
    def __lt__(self, other):
        return self.U < other.U
class TopK():
    def __init__(self, K):
        self.K = K
        self.list = []
        self.minU = ItemSet({},-sys.float_info.max)
    def add_to_list(self, itemSet):
        inserted = False
        if len(self.list) < self.K or itemSet > self.minU:
            for i in range(len(self.list)):
                if itemSet > self.list[i]:
                    self.list.insert(i,itemSet)
                    inserted = True
                    break 
                
        if len(self.list) == 0 or inserted == False:
            self.list.append(itemSet)
        if len(self.list) >= self.K:
            self.list = self.list[:self.K]
            self.minU = self.list[-1]

In [56]:
class TKEMHUN:
    def __init__(self, database: list, top_k: int):
        self.top_k = top_k
        self.database = database
        self.positive_items, self.negative_items, self.hybrid_items = self.classify_items(self.database)
        self.RTWU = self.calculate_RTWU(self.database, self.positive_items, self.hybrid_items)
        self.RTWU_all_items = self.calculate_RTWU_all_items(self.database)
        self.minU = -sys.float_info.max
        self.secondary = self.find_secondary(self.RTWU, self.minU)
        self.sorted_secondary, self.sorted_negative_items = self.sort_items_in_second_ni(self.secondary, self.negative_items, self.positive_items, self.hybrid_items, self.RTWU_all_items)

        self.pruned_database = self.prune_transactions(self.database, self.sorted_secondary, self.sorted_negative_items)
        self.sorted_pruned_database = self.sort_items_in_transactions(self.pruned_database, self.sorted_secondary, self.sorted_negative_items)
        self.final_sorted_database = self.sort_based_on_Def13(self.sorted_pruned_database, self.sorted_secondary, self.sorted_negative_items)

        self.RSU_values = self.calculate_RSU(self.final_sorted_database, self.sorted_secondary)
        self.primary = self.determine_Primary(self.RSU_values, self.minU)

    def classify_items(self, database: list):
        positive_items = set()
        negative_items = set()
        hybrid_items = set()
        for transaction in database:
            for item, profit in zip(transaction['items'], transaction['profits']):
                if profit > 0:
                    positive_items.add(item)
                elif profit < 0:
                    negative_items.add(item)
    
        hybrid_items = positive_items.intersection(negative_items)
        positive_items -= hybrid_items
        negative_items -= hybrid_items
        
        return list(positive_items), list(negative_items), list(hybrid_items)

    def determine_Primary(self, RSU, minU):
        return [item for item, utility in RSU.items() if utility >= minU]

    def find_secondary(self, RTWU, minU):
        Secondary = []
        for item, value in RTWU.items():
            if value >= minU:
                Secondary.append(item)
        return Secondary

    def sort_items_in_second_ni(self, Secondary, negative_items, positive_items, hybrid_items, RTWU):
        positive_secondary = [item for item in Secondary if item in positive_items]
        hybrid_secondary = [item for item in Secondary if item in hybrid_items]
        negative_secondary = [item for item in Secondary if item in negative_items]
        
        negative_only = [item for item in negative_items if item not in Secondary]
        
        positive_secondary.sort(key=lambda x: RTWU.get(x, 0))
        hybrid_secondary.sort(key=lambda x: RTWU.get(x, 0))
        negative_secondary.sort(key=lambda x: RTWU.get(x, 0))
        negative_only.sort(key=lambda x: RTWU.get(x, 0))
        
        sorted_secondary = positive_secondary + hybrid_secondary + negative_secondary
        sorted_negative = negative_only
        
        return sorted_secondary, sorted_negative
    
    def calculate_RSU(self, database, itemset):
        RSU = {item: 0 for item in itemset}

        for transaction in database:
            items = transaction['items']
            profits = transaction['profits']
            quantities = transaction.get('quantities', [1] * len(items))
            
            for index, item in enumerate(items):
                if item in itemset:
                    # utility of item X
                    u_item = profits[index] * quantities[index]
                    
                    # remaining relevant utility (rru)
                    rru = sum(
                        profits[i] * quantities[i]
                        for i in range(index + 1, len(items))
                        if profits[i] > 0
                    )

                    RSU[item] += u_item + rru
                    RSU[item] += transaction.get("u_project", 00)
            
        return RSU

    def calculate_RTU(self, transaction):
        RTU = 0
        for profit, quantity in zip(transaction['profits'], transaction['quantities']):
            if profit > 0:
                RTU += profit * quantity
        return RTU

    # RTWU without negative items
    def calculate_RTWU(self, database, positive_items, hybrid_items):
        RTWU = {}
        
        for transaction in database:
            RTU = self.calculate_RTU(transaction)
            for item in transaction['items']:
                if item in positive_items or item in hybrid_items :
                    if item not in RTWU:
                        RTWU[item] = 0
                    RTWU[item] += RTU
        
        return RTWU

    # RTWU including negative items
    def calculate_RTWU_all_items(self, database):
        RTWU = {}
        for transaction in database:
            RTU = self.calculate_RTU(transaction)
            for item in transaction['items']:
                if item not in RTWU:
                    RTWU[item] = 0
                RTWU[item] += RTU
        
        return RTWU

    def calculate_utility(self, itemset, database):
        utility = 0
        for transaction in database:
            if all(item in transaction['items'] for item in itemset):
                indices = [transaction['items'].index(item) for item in itemset if item in transaction['items']]
                quantities = transaction.get('quantities', [1] * len(transaction['items']))
                utility += sum(transaction['profits'][index] * quantities[index] for index in indices)
        return utility


    def project_database(self, itemset, database):
        print(f"***************projecting database***********")
        print(f"itemset: {itemset}")
        print(f"database: {database}")
        projected_db = []
        u_beta = 0
        
        for transaction in database:
            if itemset[-1] in transaction['items']:
                last_index = transaction['items'].index(itemset[-1]) 
                projected_items = transaction['items'][last_index + 1:]
                projected_profits = transaction['profits'][last_index + 1:]

                projected_quantities = (
                    transaction.get('quantities', [1] * len(transaction['items']))[last_index + 1:]
                )

                tid = transaction['tid']
                u_project = transaction['quantities'][last_index]*transaction['profits'][last_index]  + transaction.get("u_project", 00)
                u_beta += u_project
                if projected_items:
                    projected_db.append({
                        'tid': tid,
                        'items': projected_items,
                        'profits': projected_profits,
                        'quantities': projected_quantities,
                        'u_project': u_project
                    })
                    
        return projected_db, u_beta

    def calculate_RLU(self, database, itemset):
        RLU = {item: 0 for item in itemset}

        for transaction in database:
            for index, item in enumerate(transaction['items']):
                if item in itemset:
                    RLU[item] += transaction.get("u_project", 00) 
                    RLU[item] += sum(transaction['profits'][index] * transaction['quantities'][index] for index in range(len(transaction['profits'])) if transaction['profits'][index] > 0)
        
        return RLU

    def sort_items_in_transactions(self, pruned_database, sorted_secondary, sorted_negative_items):
        sorted_database = []
        combined_order = sorted_secondary + sorted_negative_items 
        
        for transaction in pruned_database:
            sorted_items = sorted(
                transaction['items'],
                key=lambda x: combined_order.index(x) if x in combined_order else float('inf')
            )
            
            sorted_quantities = [quantity for _, quantity in sorted(zip(transaction['items'], transaction['quantities']), key=lambda x: combined_order.index(x[0]))]
            sorted_profits = [profit for _, profit in sorted(zip(transaction['items'], transaction['profits']), key=lambda x: combined_order.index(x[0]))]
            
            sorted_transaction = {
                'tid': transaction['tid'],
                'items': sorted_items,
                'quantities': sorted_quantities,
                'profits': sorted_profits
            }
            sorted_database.append(sorted_transaction)
        
        return sorted_database
    
    def prune_transactions(self, database, sorted_secondary, sorted_negative_items):
        pruned_database = []
        
        for transaction in database:
            pruned_items = []
            pruned_quantities = []
            pruned_profits = []
            
            for item, quantity, profit in zip(transaction['items'], transaction['quantities'], transaction['profits']):
                if item in sorted_secondary or item in sorted_negative_items:
                    pruned_items.append(item)
                    pruned_quantities.append(quantity)
                    pruned_profits.append(profit)
            
            if pruned_items:
                pruned_transaction = {
                    'tid': transaction['tid'],
                    'items': pruned_items,
                    'quantities': pruned_quantities,
                    'profits': pruned_profits
                }
                pruned_database.append(pruned_transaction)
        
        return pruned_database
    
    def sort_based_on_Def13(self, sorted_pruned_database, sorted_secondary, sorted_negative_items):
        combined_order = sorted_secondary + sorted_negative_items

        def transaction_sort_key(transaction):
            items_order = [-combined_order.index(item) for item in transaction['items']]
            return items_order[::-1], len(transaction['items']), transaction['tid']

        sorted_database = sorted(
            sorted_pruned_database,
            key=transaction_sort_key
        )
        return sorted_database

    
    def run(self):
        top_k = TopK(15)
        self.search(self.sorted_negative_items, [], self.final_sorted_database, self.primary, self.sorted_secondary, top_k)
        for i in top_k.list:
            print("itemset",i.itemSet, "\n", "Utility", i.U, "\n")

    
    def search(self, eta, X, database, primary_items, secondary_items, top_k_object: TopK):
        print("************search******************")
        print(f"*****************minu:{top_k_object.minU.U}")
        for iter, i in enumerate(primary_items):
            beta = X + [i]
            print(f"\nProcessing item: {i}, Current Beta: {beta}")

            #u_beta = calculate_utility(beta, database)
            projected_db, u_beta = self.project_database(beta, database)
            
            print(f"Utility of Beta: {u_beta}")
            print(f"Projected Database: {projected_db}")
            beta_itemset = ItemSet(set(beta), u_beta)

            if beta_itemset > top_k_object.minU:
                top_k_object.add_to_list(beta_itemset)
                print(f"High Utility Itemset Found: {beta} with Utility {u_beta}")

            if beta_itemset > top_k_object.minU:
                self.searchN(eta, beta, projected_db, top_k_object)

            print("out if")
            print(f"projected_db: {projected_db}")
            print(f'secondary_items: {secondary_items}')
            print(f'iter: {iter}')
            rsu = self.calculate_RSU(projected_db, secondary_items[iter + 1:])
            rlu = self.calculate_RLU(projected_db, secondary_items[iter + 1:])
            
            print(f"RSU: {rsu}\nRLU: {rlu}")

            primary_beta = [z for z in secondary_items[iter + 1:] if rsu[z] >= top_k_object.minU.U]
            secondary_beta = [z for z in secondary_items[iter + 1:] if rlu[z] >= top_k_object.minU.U]
            
            print(f"Primary(β): {primary_beta}")
            print(f"Secondary(β): {secondary_beta}")


            self.search(eta, beta, projected_db, primary_beta, secondary_beta, top_k_object)


    def searchN(self, eta, X, database, top_k_object:TopK):
        print("************searchN******************")
        print(f"*****************minu:{top_k_object.minU.U}")
        for iter, i in enumerate(eta):
            beta = X + [i]
            print(f"\nProcessing item: {i}, Current Beta: {beta}")

            projected_db, u_beta = self.project_database(beta, database)
            #u_beta = calculate_utility(beta, final_sorted_database)

            print(f"Database from Search: {database}")
            print(f"Utility of Beta: {u_beta}")
            print(f"Projected Database: {projected_db}")

            beta_itemset = ItemSet(set(beta), u_beta)

            if beta_itemset > top_k_object.minU:
                top_k_object.add_to_list(beta_itemset)
                print(f"High Utility Itemset Found: {beta} with Utility {u_beta}")

            rsu = self.calculate_RSU(projected_db, eta[iter + 1:])
            print(f"RSU for Negative Items: {rsu}")

            primary_beta = [z for z in eta[iter + 1:] if rsu.get(z, 0) >= top_k_object.minU.U]
            print(f"Filtered Negative Items: {primary_beta}")

            self.searchN(primary_beta, beta, projected_db, top_k_object)

In [57]:
tk_emhun = TKEMHUN(database=root_database, top_k=15)
tk_emhun.run()

************search******************
*****************minu:-1.7976931348623157e+308

Processing item: item 63, Current Beta: ['item 63']
***************projecting database***********
itemset: ['item 63']
database: [{'tid': 1, 'items': ['item 50', 'item 15', 'item 19', 'item 46', 'item 1', 'item 5', 'item 9', 'item 13', 'item 21', 'item 25', 'item 27', 'item 34', 'item 42', 'item 54', 'item 68', 'item 70', 'item 74', 'item 12', 'item 23', 'item 64', 'item 62', 'item 40', 'item 3', 'item 31', 'item 52', 'item 60', 'item 29', 'item 58', 'item 72', 'item 7', 'item 17', 'item 36', 'item 48', 'item 44', 'item 56', 'item 38', 'item 66'], 'quantities': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'profits': [42, 10, 6, 48, 1, 15, 15, 32, 50, 10, 18, 3, 18, 28, 10, 20, 5, -24, -18, -40, -24, -42, -16, -30, -16, -14, -18, -8, -40, -8, -15, -54, -18, -28, -12, -24, -24]}, {'tid': 2, 'items': ['item 16', 'item 50', 'item 19', 'ite

UnboundLocalError: cannot access local variable 'u_beta' where it is not associated with a value