# EMHUN Algorithm Implementation
The implementation of the EMHUN algorithm to mine high-utility itemsets from data with unstable profits.

In [1]:
import numpy as np
import pandas as pd

## Utility Functions
These functions assist in calculating utilities, sorting items, and preparing the data for mining.

In [4]:
def calculate_utility(profits, quantities):
    """
    Calculate utility based on profit values and quantities.
    """
    return np.multiply(profits, quantities)

def calculate_rtu(transaction):
    """
    Calculate RTU (Redefined Transaction Utility).
    Only consider items with positive utility values.
    """
    return np.sum([u for u in transaction.values() if u > 0])

def sort_items(item_list, utility_types, rtwus):
    """
    Sort items based on type and RTWU values.
    """
    positive_items = sorted([i for i, t in zip(item_list, utility_types) if t == 'positive'], key=lambda x: -rtwus[x])
    hybrid_items = sorted([i for i, t in zip(item_list, utility_types) if t == 'hybrid'], key=lambda x: -rtwus[x])
    negative_items = sorted([i for i, t in zip(item_list, utility_types) if t == 'negative'], key=lambda x: -rtwus[x])
    return positive_items + hybrid_items + negative_items

def calculate_rtwus(database, items):
    """
    Calculate RTWU (Redefined Transaction Weight Utility) for items.
    """
    rtwus = {}
    for item in items:
        rtwus[item] = sum([transaction.get(item, 0) for transaction in database if transaction.get(item, 0) > 0])
    return rtwus

## EMHUN Algorithm
The main algorithm to mine high-utility itemsets.

In [5]:
def emhun_algorithm(database, min_utility):
    """
    EMHUN - Mine high-utility itemsets from data with unstable profits.

    Parameters:
        database: list of transactions (each transaction is a dictionary of utilities).
        min_utility: minimum utility threshold.

    Returns:
        high_utility_itemsets: list of itemsets with utility >= min_utility.
    """
    # Item Classification
    positive_items = []
    hybrid_items = []
    negative_items = []

    for transaction in database:
        for item, utility in transaction.items():
            if utility > 0 and item not in positive_items:
                positive_items.append(item)
            elif utility < 0 and item not in negative_items:
                negative_items.append(item)
            elif item not in hybrid_items:
                hybrid_items.append(item)

    # Calculate RTWU
    all_items = positive_items + hybrid_items + negative_items
    rtwus = calculate_rtwus(database, all_items)

    # SSort items by RTWU
    sorted_items = sort_items(all_items, ["positive"] * len(positive_items) + ["hybrid"] * len(hybrid_items) + ["negative"] * len(negative_items), rtwus)             

    # Identify High-Utility Itemsets
    high_utility_itemsets = []
    for item in sorted_items:
        total_utility = sum([transaction.get(item, 0) for transaction in database])
        if total_utility >= min_utility:
            high_utility_itemsets.append((item, total_utility))

    return high_utility_itemsets

## Example Usage
Testing the algorithm with a sample database.

In [6]:
database = [
    {"a": 2, "b": -3, "c": 1},
    {"a": 1, "b": 5, "c": -1},
]

min_utility = 2
result = emhun_algorithm(database, min_utility)

# Output Results
print("High-Utility Itemsets:", result)

High-Utility Itemsets: [('b', 2), ('a', 3), ('a', 3), ('b', 2)]
