# EMHUN Algorithm Implementation
The implementation of the EMHUN algorithm to mine high-utility itemsets from data with unstable profits.

### Database from Table 3

In [316]:
import sys

In [317]:
databaseTable3 = [
    {
        'tid': 1,
        'items': ['a', 'b', 'd', 'e', 'f', 'g'], 
        'quantities': [2, 2, 1, 3, 2, 1], 
        'profits': [-2, 1, 4, 1, -1, -2]},
    {
        'tid': 2, 
        'items': ['b', 'c'],
        'quantities': [1, 5], 
        'profits': [-1, 1]},
    {
        'tid': 3, 
        'items': ['b', 'c', 'd', 'e', 'f'], 
        'quantities': [2, 1, 3, 2, 1], 
        'profits': [-1, 1, 4, 1, -1]},
    {
        'tid': 4, 
        'items': ['c', 'd', 'e'], 
        'quantities': [2, 1, 3], 
        'profits': [1, 4, 1]},
    {
        'tid': 5, 
        'items': ['a', 'f'], 
        'quantities': [2, 3], 
        'profits': [2, -1]},
    {
        'tid': 6, 
        'items': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
        'quantities': [2, 1, 4, 2, 1, 3, 1],
        'profits': [1, 1, 1, 4, 1, -1, -2]},
    {
        'tid': 7, 
        'items': ['b', 'c', 'e'], 
        'quantities': [3, 2, 2], 
        'profits': [1, 2, 2]}
]

In [318]:
class ItemSet:
    def __init__(self, itemSet, U):
        self.itemSet = itemSet
        self.U = U
    def __eq__(self, other):
        return self.U == other.U
    def __gt__(self, other):
        return self.U > other.U
    def __lt__(self, other):
        return self.U < other.U
class TopK():
    def __init__(self, K, loose_lower_bound):
        self.K = K
        self.list = []
        self.minU = ItemSet({},-sys.float_info.max)
    def add_to_list(self, itemSet):
        inserted = False
        if len(self.list) < self.K or itemSet > self.minU:
            for i in range(len(self.list)):
                if itemSet > self.list[i]:
                    self.list.insert(i,itemSet)
                    inserted = True
                    break 
                
        if len(self.list) == 0 or inserted == False:
            self.list.append(itemSet)
        if len(self.list) >= self.K:
            self.list = self.list[:self.K]
            self.minU = self.list[-1]

Show variables in Table 3

Min Utility variable and X

In [319]:
minU = 25
X = []
TOP_K = 10
top_k_patterns = list()
print(top_k_patterns)

[]


### Classify items
This function classifies items into positive, negative, and hybrid based on their profits.

In [320]:
def classify_items(database):
    positive_items = set()
    negative_items = set()
    hybrid_items = set()
    
    for transaction in database:
        for item, profit in zip(transaction['items'], transaction['profits']):
            if profit > 0:
                positive_items.add(item)
            elif profit < 0:
                negative_items.add(item)
    
    hybrid_items = positive_items.intersection(negative_items)
    positive_items -= hybrid_items
    negative_items -= hybrid_items
    
    return list(positive_items), list(negative_items), list(hybrid_items)

Output:

In [321]:
positive_items, negative_items, hybrid_items = classify_items(databaseTable3)
print("Positive Items (ρ):", positive_items)
print("Hybrid Items (δ):", hybrid_items)
print("Negative Items (η):", negative_items)

Positive Items (ρ): ['d', 'e', 'c']
Hybrid Items (δ): ['b', 'a']
Negative Items (η): ['g', 'f']


### Calulate RTWU

In [322]:
def calculate_RTU(transaction):
    RTU = 0
    for profit, quantity in zip(transaction['profits'], transaction['quantities']):
        if profit > 0:
            RTU += profit * quantity
    return RTU

# RTWU without negative items
def calculate_RTWU(database, positive_items, hybrid_items):
    RTWU = {}
    
    for transaction in database:
        RTU = calculate_RTU(transaction)
        for item in transaction['items']:
            if item in positive_items or item in hybrid_items :
                if item not in RTWU:
                    RTWU[item] = 0
                RTWU[item] += RTU
    
    return RTWU
# RTWU including negative items
def calculate_RTWU_all_items(database, positive_items, hybrid_items, negative_items):
    RTWU = {}
    for transaction in database:
        RTU = calculate_RTU(transaction)
        for item in transaction['items']:
            #if item in positive_items or item in hybrid_items or item in negative_items:
            if item not in RTWU:
                RTWU[item] = 0
            RTWU[item] += RTU
    
    return RTWU

Input:

In [323]:
RTWU = calculate_RTWU(databaseTable3, positive_items, hybrid_items)
RTWU_all_items = calculate_RTWU_all_items(databaseTable3, positive_items, hybrid_items, negative_items)

Output:

In [324]:
print("\nRTWU:")
for item, value in RTWU.items():
    print(f"Item {item}: {value}")
print("\nRTWU including negative items:")
for item, value in RTWU_all_items.items():
    print(f"Item {item}: {value}")


RTWU:
Item a: 29
Item b: 56
Item d: 49
Item e: 60
Item c: 56

RTWU including negative items:
Item a: 29
Item b: 56
Item d: 49
Item e: 60
Item f: 44
Item g: 25
Item c: 56


### Find Secondary(X)
Using RTWU values base on Step 5,6 ( Page 8)

In [325]:
def find_Secondary(RTWU, minU):
    Secondary = []
    for item, value in RTWU.items():
        if value >= minU:
            Secondary.append(item)
    return Secondary

Input: RTWU list and MinU variable

In [326]:
Secondary = find_Secondary(RTWU, minU)

Output:

In [327]:
print("\nSecondary: ", Secondary)


Secondary:  ['a', 'b', 'd', 'e', 'c']


### Sort Secondary(X) ∪ η by definition 7;

Sort items in Secondary(X) and Negative Items (η) based on their RTWU values.

In [328]:
def sort_items_in_second_ni(Secondary, negative_items, RTWU):
    positive_secondary = [item for item in Secondary if item in positive_items]
    hybrid_secondary = [item for item in Secondary if item in hybrid_items]
    negative_secondary = [item for item in Secondary if item in negative_items]
    
    negative_only = [item for item in negative_items if item not in Secondary]
    
    positive_secondary.sort(key=lambda x: RTWU.get(x, 0))
    hybrid_secondary.sort(key=lambda x: RTWU.get(x, 0))
    negative_secondary.sort(key=lambda x: RTWU.get(x, 0))
    negative_only.sort(key=lambda x: RTWU.get(x, 0))
    
    sorted_secondary = positive_secondary + hybrid_secondary + negative_secondary
    sorted_negative = negative_only
    
    return sorted_secondary, sorted_negative

Output:

In [329]:
sorted_secondary, sorted_negative_items = sort_items_in_second_ni(Secondary, negative_items, RTWU_all_items)
print("\nSorted Secondary:", sorted_secondary)
print("Sorted Negative Items (η):", sorted_negative_items)


Sorted Secondary: ['d', 'c', 'e', 'a', 'b']
Sorted Negative Items (η): ['g', 'f']


### Prune transactions based on Secondary(X) and Negative items (η)

Remove items not in Secondary(X) or Negative Items to reduce the database size.

In [330]:
def prune_transactions(database, sorted_secondary, sorted_negative_items):
    pruned_database = []
    
    for transaction in database:
        pruned_items = []
        pruned_quantities = []
        pruned_profits = []
        
        for item, quantity, profit in zip(transaction['items'], transaction['quantities'], transaction['profits']):
            if item in sorted_secondary or item in sorted_negative_items:
                pruned_items.append(item)
                pruned_quantities.append(quantity)
                pruned_profits.append(profit)
        
        if pruned_items:
            pruned_transaction = {
                'tid': transaction['tid'],
                'items': pruned_items,
                'quantities': pruned_quantities,
                'profits': pruned_profits
            }
            pruned_database.append(pruned_transaction)
    
    return pruned_database

Database after pruned:

In [331]:
pruned_database = prune_transactions(databaseTable3, sorted_secondary, sorted_negative_items)
print("\nAfter pruned transactions:")
for transaction in pruned_database:
    print(transaction)


After pruned transactions:
{'tid': 1, 'items': ['a', 'b', 'd', 'e', 'f', 'g'], 'quantities': [2, 2, 1, 3, 2, 1], 'profits': [-2, 1, 4, 1, -1, -2]}
{'tid': 2, 'items': ['b', 'c'], 'quantities': [1, 5], 'profits': [-1, 1]}
{'tid': 3, 'items': ['b', 'c', 'd', 'e', 'f'], 'quantities': [2, 1, 3, 2, 1], 'profits': [-1, 1, 4, 1, -1]}
{'tid': 4, 'items': ['c', 'd', 'e'], 'quantities': [2, 1, 3], 'profits': [1, 4, 1]}
{'tid': 5, 'items': ['a', 'f'], 'quantities': [2, 3], 'profits': [2, -1]}
{'tid': 6, 'items': ['a', 'b', 'c', 'd', 'e', 'f', 'g'], 'quantities': [2, 1, 4, 2, 1, 3, 1], 'profits': [1, 1, 1, 4, 1, -1, -2]}
{'tid': 7, 'items': ['b', 'c', 'e'], 'quantities': [3, 2, 2], 'profits': [1, 2, 2]}


### Sorting stage

Sort the items in the remaining transactions in the order of items with positive utility only, items with both negative and positive utility, items with negative utility only;

In [332]:
def sort_items_in_transactions(pruned_database, sorted_secondary, sorted_negative_items):
    sorted_database = []
    combined_order = sorted_secondary + sorted_negative_items 
    
    for transaction in pruned_database:
        sorted_items = sorted(
            transaction['items'],
            key=lambda x: combined_order.index(x) if x in combined_order else float('inf')
        )
        
        sorted_quantities = [quantity for _, quantity in sorted(zip(transaction['items'], transaction['quantities']), key=lambda x: combined_order.index(x[0]))]
        sorted_profits = [profit for _, profit in sorted(zip(transaction['items'], transaction['profits']), key=lambda x: combined_order.index(x[0]))]
        
        sorted_transaction = {
            'tid': transaction['tid'],
            'items': sorted_items,
            'quantities': sorted_quantities,
            'profits': sorted_profits
        }
        sorted_database.append(sorted_transaction)
    
    return sorted_database

Output:

In [333]:
sorted_pruned_database = sort_items_in_transactions(pruned_database, sorted_secondary, sorted_negative_items)
print("\nSorted Items in Transactions (Step 9):")
for transaction in sorted_pruned_database:
    print(transaction)


Sorted Items in Transactions (Step 9):
{'tid': 1, 'items': ['d', 'e', 'a', 'b', 'g', 'f'], 'quantities': [1, 3, 2, 2, 1, 2], 'profits': [4, 1, -2, 1, -2, -1]}
{'tid': 2, 'items': ['c', 'b'], 'quantities': [5, 1], 'profits': [1, -1]}
{'tid': 3, 'items': ['d', 'c', 'e', 'b', 'f'], 'quantities': [3, 1, 2, 2, 1], 'profits': [4, 1, 1, -1, -1]}
{'tid': 4, 'items': ['d', 'c', 'e'], 'quantities': [1, 2, 3], 'profits': [4, 1, 1]}
{'tid': 5, 'items': ['a', 'f'], 'quantities': [2, 3], 'profits': [2, -1]}
{'tid': 6, 'items': ['d', 'c', 'e', 'a', 'b', 'g', 'f'], 'quantities': [2, 4, 1, 2, 1, 1, 3], 'profits': [4, 1, 1, 1, 1, -2, -1]}
{'tid': 7, 'items': ['c', 'e', 'b'], 'quantities': [2, 2, 3], 'profits': [2, 2, 1]}


Sort transactions based on Definition 13

In [334]:
def sort_based_on_Def13(sorted_pruned_database, sorted_secondary, sorted_negative_items):
    combined_order = sorted_secondary + sorted_negative_items

    def transaction_sort_key(transaction):
        items_order = [-combined_order.index(item) for item in transaction['items']]
        return items_order[::-1], len(transaction['items']), transaction['tid']

    sorted_database = sorted(
        sorted_pruned_database,
        key=transaction_sort_key
    )
    return sorted_database

Output:

In [335]:
final_sorted_database = sort_based_on_Def13(sorted_pruned_database, sorted_secondary, sorted_negative_items)
print("\nFinal Sorted Transactions (Step 10):")
for transaction in final_sorted_database:
    print(transaction)


Final Sorted Transactions (Step 10):
{'tid': 6, 'items': ['d', 'c', 'e', 'a', 'b', 'g', 'f'], 'quantities': [2, 4, 1, 2, 1, 1, 3], 'profits': [4, 1, 1, 1, 1, -2, -1]}
{'tid': 1, 'items': ['d', 'e', 'a', 'b', 'g', 'f'], 'quantities': [1, 3, 2, 2, 1, 2], 'profits': [4, 1, -2, 1, -2, -1]}
{'tid': 3, 'items': ['d', 'c', 'e', 'b', 'f'], 'quantities': [3, 1, 2, 2, 1], 'profits': [4, 1, 1, -1, -1]}
{'tid': 5, 'items': ['a', 'f'], 'quantities': [2, 3], 'profits': [2, -1]}
{'tid': 7, 'items': ['c', 'e', 'b'], 'quantities': [2, 2, 3], 'profits': [2, 2, 1]}
{'tid': 2, 'items': ['c', 'b'], 'quantities': [5, 1], 'profits': [1, -1]}
{'tid': 4, 'items': ['d', 'c', 'e'], 'quantities': [1, 2, 3], 'profits': [4, 1, 1]}


### Scan D to compute RSU(X,i), ∀i ∈ Secondary(X)

In [336]:
def calculate_RSU(database, itemset):
    RSU = {item: 0 for item in itemset}

    for transaction in database:
        items = transaction['items']
        profits = transaction['profits']
        quantities = transaction.get('quantities', [1] * len(items))
        
        for index, item in enumerate(items):
            if item in itemset:
                # utility of item X
                u_item = profits[index] * quantities[index]
                
                # remaining relevant utility (rru)
                rru = sum(
                    profits[i] * quantities[i]
                    for i in range(index + 1, len(items))
                    if profits[i] > 0
                )

                RSU[item] += u_item + rru
                RSU[item] += transaction.get("u_project", 00)
        
    return RSU




Output:

In [337]:
RSU_values = calculate_RSU(final_sorted_database, sorted_secondary)
print("RSU Values:", RSU_values)

RSU Values: {'d': 49, 'c': 32, 'e': 21, 'a': 5, 'b': 3}


### Primary(X) = {i|i ∈ Secondary(X) ∧ RSU(X, i) ≥ minU};

In [338]:
def determine_Primary(RSU, minU):
    return [item for item, utility in RSU.items() if utility >= minU]

Output:

In [339]:
Primary = determine_Primary(RSU_values, minU)
print("Primary(X):", Primary)

Primary(X): ['d', 'c']


In [340]:
def calculate_utility(itemset, database):
    utility = 0
    for transaction in database:
        if all(item in transaction['items'] for item in itemset):
            indices = [transaction['items'].index(item) for item in itemset if item in transaction['items']]
            quantities = transaction.get('quantities', [1] * len(transaction['items']))
            utility += sum(transaction['profits'][index] * quantities[index] for index in indices)
    return utility


def project_database(itemset, database):
    print(f"*************projecting database*********")
    print(f"itemset: {itemset}")
    print(f"database: {database}")
    projected_db = []
    u_beta = 0
    for transaction in database:
        if itemset[-1] in transaction['items']: # all(item in transaction['items'] for item in itemset):
            last_index = transaction['items'].index(itemset[-1]) 
            # Project items, profits, and quantities starting from the last index + 1
            projected_items = transaction['items'][last_index + 1:]
            projected_profits = transaction['profits'][last_index + 1:]

            projected_quantities = (
                transaction.get('quantities', [1] * len(transaction['items']))[last_index + 1:]
            )

            tid = transaction['tid']
            u_project = transaction['quantities'][last_index]*transaction['profits'][last_index]  + transaction.get("u_project", 00)
            u_beta += u_project
            if projected_items:
                projected_db.append({
                    'tid': tid,
                    'items': projected_items,
                    'profits': projected_profits,
                    'quantities': projected_quantities,
                    'u_project': u_project
                })
                
    return projected_db, u_beta

def calculate_RLU(database, itemset):
    RLU = {item: 0 for item in itemset}

    for transaction in database:
        for index, item in enumerate(transaction['items']):
            if item in itemset:
                RLU[item] += transaction.get("u_project", 00) 
                RLU[item] += sum(transaction['profits'][index] * transaction['quantities'][index] for index in range(len(transaction['profits'])) if transaction['profits'][index] > 0)

        #         # Calculate the utility of the specific item
        #         quantities = transaction.get('quantities', [1] * len(transaction['items']))
        #         u_item = transaction['profits'][index] * quantities[index]
        #         RLU[item] += sum(transaction['profits'][index] * quantities[index] for index in indices)
        #         # Add to RLU: utility of the specific item
        #         RLU[item] += transaction.get("u_project", 00) 
    return RLU




def search(eta, X, database, primary_items, secondary_items, top_k_object: TopK):
    print("************search******************")
    print(f"*****************minu:{top_k_object.minU.U}")
    for iter, i in enumerate(primary_items):
        beta = X + [i]
        print(f"\nProcessing item: {i}, Current Beta: {beta}")

        #u_beta = calculate_utility(beta, database)
        projected_db, u_beta = project_database(beta, database)
        
        print(f"Utility of Beta: {u_beta}")
        print(f"Projected Database: {projected_db}")
        beta_itemset = ItemSet(set(beta), u_beta)

        if beta_itemset > top_k_object.minU:
            top_k_object.add_to_list(beta_itemset)
            print(f"High Utility Itemset Found: {beta} with Utility {u_beta}")

        if beta_itemset > top_k_object.minU:
            searchN(eta, beta, projected_db, top_k_object)

        print("out if")
        print(f"projected_db: {projected_db}")
        print(f'secondary_items: {secondary_items}')
        print(f'iter: {iter}')
        rsu = calculate_RSU(projected_db, secondary_items[iter + 1:])
        rlu = calculate_RLU(projected_db, secondary_items[iter + 1:])
        
        print(f"RSU: {rsu}\nRLU: {rlu}")

        primary_beta = [z for z in secondary_items[iter + 1:] if rsu[z] >= top_k_object.minU.U]
        secondary_beta = [z for z in secondary_items[iter + 1:] if rlu[z] >= top_k_object.minU.U]
        
        print(f"Primary(β): {primary_beta}")
        print(f"Secondary(β): {secondary_beta}")


        search(eta, beta, projected_db, primary_beta, secondary_beta, top_k_object)


def searchN(eta, X, database, top_k_object:TopK):
    print("************searchN******************")
    print(f"*****************minu:{top_k_object.minU.U}")
    for iter, i in enumerate(eta):
        beta = X + [i]
        print(f"\nProcessing item: {i}, Current Beta: {beta}")

        projected_db, u_beta = project_database(beta, database)
        #u_beta = calculate_utility(beta, final_sorted_database)

        print(f"Database from Search: {database}")
        print(f"Utility of Beta: {u_beta}")
        print(f"Projected Database: {projected_db}")

        beta_itemset = ItemSet(set(beta), u_beta)

        if beta_itemset > top_k_object.minU:
            top_k_object.add_to_list(beta_itemset)
            print(f"High Utility Itemset Found: {beta} with Utility {u_beta}")

        rsu = calculate_RSU(projected_db, eta[iter + 1:])
        print(f"RSU for Negative Items: {rsu}")

        primary_beta = [z for z in eta[iter + 1:] if rsu.get(z, 0) >= top_k_object.minU.U]
        print(f"Filtered Negative Items: {primary_beta}")

        searchN(primary_beta, beta, projected_db, top_k_object)

top_k = TopK(TOP_K, loose_lower_bound)

search(sorted_negative_items, [], final_sorted_database, Primary, sorted_secondary, top_k)

************search******************
*****************minu:-1.7976931348623157e+308

Processing item: d, Current Beta: ['d']
*************projecting database*********
itemset: ['d']
database: [{'tid': 6, 'items': ['d', 'c', 'e', 'a', 'b', 'g', 'f'], 'quantities': [2, 4, 1, 2, 1, 1, 3], 'profits': [4, 1, 1, 1, 1, -2, -1]}, {'tid': 1, 'items': ['d', 'e', 'a', 'b', 'g', 'f'], 'quantities': [1, 3, 2, 2, 1, 2], 'profits': [4, 1, -2, 1, -2, -1]}, {'tid': 3, 'items': ['d', 'c', 'e', 'b', 'f'], 'quantities': [3, 1, 2, 2, 1], 'profits': [4, 1, 1, -1, -1]}, {'tid': 5, 'items': ['a', 'f'], 'quantities': [2, 3], 'profits': [2, -1]}, {'tid': 7, 'items': ['c', 'e', 'b'], 'quantities': [2, 2, 3], 'profits': [2, 2, 1]}, {'tid': 2, 'items': ['c', 'b'], 'quantities': [5, 1], 'profits': [1, -1]}, {'tid': 4, 'items': ['d', 'c', 'e'], 'quantities': [1, 2, 3], 'profits': [4, 1, 1]}]
Utility of Beta: 28
Projected Database: [{'tid': 6, 'items': ['c', 'e', 'a', 'b', 'g', 'f'], 'profits': [1, 1, 1, 1, -2, -1], 

In [341]:
for i in top_k.list:
    print("itemset",i.itemSet, "\n", "Utility", i.U, "\n")

itemset {'e', 'c', 'd'} 
 Utility 37 

itemset {'e', 'd'} 
 Utility 37 

itemset {'c', 'd'} 
 Utility 31 

itemset {'e', 'b', 'd'} 
 Utility 31 

itemset {'d'} 
 Utility 28 

itemset {'e', 'c', 'b', 'd'} 
 Utility 27 

itemset {'e', 'f', 'b', 'd'} 
 Utility 25 

itemset {'b', 'd'} 
 Utility 25 

itemset {'e', 'c', 'f', 'd'} 
 Utility 24 

itemset {'b', 'c', 'd'} 
 Utility 24 

