In [1]:
# Required modules preimport.
import itertools, pandas as pd

# MLXtend
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# Previous document dataset
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [3]:
# Encoding and preprocessing
te = TransactionEncoder()
te_ary = te.fit_transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

In [4]:
l2 = [[1,2], [1,4], [1,5], [2,3], [2,4], [2,5]]

# Join l2 * l2, 1, 2 -> 4, 5 etc
c3 = [[1,2,4], [1,2,5], [1,4,5], [2,3,4], [2,3,5], [2,4,5]]

In [5]:
# Proof (Not in freq itemset with threshold for .5)
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
display(frequent_itemsets) # Itemset not in output.

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Kidney Beans, Eggs)"
6,0.6,"(Eggs, Onion)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Kidney Beans, Yogurt)"


Kulczynski= 1/2 (P(A|B) + P(B|A))

kulc = 0.5*((supAUB/supA)+(supAUB/supB))

In [6]:
import itertools

frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
strong_rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.6)
# Respective antecedents and consequents.
antecedents = frozenset(strong_rules["antecedents"])
consequents = frozenset(strong_rules["consequents"])

In [7]:
def kulczynski_measure(freq_itemset, antecedents, consequents):
    # Only  >= .5 are "interesting"
    rules = association_rules(freq_itemset, metric='confidence', min_threshold=0.6)
    # Aggregate support values from the dataframe.
    support = {}
    for _, row in freq_itemset.iterrows():
        support[row['itemsets']] = row['support']
    # Iterate first over each antecedent in frozenset.
    for ant in antecedents:
        for con in consequents: # Iterate over each consequent in order to find correct rule.
             # The row that contains the correctly ordered A B rule is assigned as rowAC.
            rowAC = rules.loc[(rules['antecedents'] == ant) & (rules['consequents'] == con)]

            rowCA = rules.loc[(rules['antecedents'] == con) & (rules['consequents'] == ant)]

            if (not rowAC.empty) & (not rowCA.empty):
                supAC = rowAC['confidence'].values[0]
                supCA = rowCA['confidence'].values[0]
                # Formula for measuring relationship between itemsets on a 0 to 1 bound.
                kulczynski = 0.5*(supAC + supCA)
                print(f'Kulczynski for {ant} --> {con}: {round(kulczynski, 2)}')
                
kulczynski_measure(frequent_itemsets, antecedents, consequents)

Kulczynski for frozenset({'Yogurt'}) --> frozenset({'Kidney Beans'}): 0.8
Kulczynski for frozenset({'Eggs', 'Kidney Beans'}) --> frozenset({'Onion'}): 0.88
Kulczynski for frozenset({'Kidney Beans', 'Onion'}) --> frozenset({'Eggs'}): 0.88
Kulczynski for frozenset({'Eggs', 'Onion'}) --> frozenset({'Kidney Beans'}): 0.8
Kulczynski for frozenset({'Eggs'}) --> frozenset({'Kidney Beans', 'Onion'}): 0.88
Kulczynski for frozenset({'Eggs'}) --> frozenset({'Onion'}): 0.88
Kulczynski for frozenset({'Eggs'}) --> frozenset({'Kidney Beans'}): 0.9
Kulczynski for frozenset({'Onion'}) --> frozenset({'Eggs', 'Kidney Beans'}): 0.88
Kulczynski for frozenset({'Onion'}) --> frozenset({'Eggs'}): 0.88
Kulczynski for frozenset({'Onion'}) --> frozenset({'Kidney Beans'}): 0.8
Kulczynski for frozenset({'Milk'}) --> frozenset({'Kidney Beans'}): 0.8
Kulczynski for frozenset({'Kidney Beans'}) --> frozenset({'Yogurt'}): 0.8
Kulczynski for frozenset({'Kidney Beans'}) --> frozenset({'Eggs', 'Onion'}): 0.8
Kulczynski fo

IR = abs(supA-supB)/(supA+supB-supAUB)

In [8]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)

strong_rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.6)

# Respective antecedents and consequents.
antecedents = frozenset(strong_rules["antecedents"])
consequents = frozenset(strong_rules["consequents"])

In [9]:
# IR = abs(supA-supB)/(supA+supB-supAUB)
def imbalance_ratio(freq_itemset, antecedents, consequents): 
    # Recalculate the confidence values from the dataset freq
    rules = association_rules(freq_itemset, metric='confidence', min_threshold=0.6)
    # Aggregate support values from the dataframe.
    support = {}
    for _, row in freq_itemset.iterrows():
        support[row['itemsets']] = row['support']
    # Iterate first over each antecedent in frozenset.
    for ant in antecedents:
        supA = support[ant] # support value is taken from freq support.
        for con in consequents: # Iterate over each consequent in order to find correct rule.
            # The row that contains the correctly ordered A B rule is assigned as rowAC.
            rowAC = rules.loc[(rules['antecedents'] == ant) & (rules['consequents'] == con)]
            # Support for B / consequent is taken from freq_itemset.
            supB = support[con]
            # Now row for the reverse rule is taken.
            rowCA = rules.loc[(rules['antecedents'] == con) & (rules['consequents'] == ant)]
            # Only continue if the dataframe is not empty, seems to spit out dataframes regardless without this.
            if (not rowAC.empty) & (not rowCA.empty):
                # Confidence for A union B is the confidence of rule AB
                supAB = rowAC['confidence'].values[0]
                # Imbalance ratio formula.
                ir = abs(supA-supB) / (supA+supB-supAB)
                # Round to 2.dp and show rule order.
                print(f'IR for {ant} --> {con}: {round(ir, 2)}')
# Call function with 3 parameters.              
imbalance_ratio(frequent_itemsets, antecedents, consequents)

IR for frozenset({'Yogurt'}) --> frozenset({'Kidney Beans'}): 0.67
IR for frozenset({'Eggs', 'Kidney Beans'}) --> frozenset({'Onion'}): 0.31
IR for frozenset({'Kidney Beans', 'Onion'}) --> frozenset({'Eggs'}): 0.5
IR for frozenset({'Eggs', 'Onion'}) --> frozenset({'Kidney Beans'}): 0.67
IR for frozenset({'Eggs'}) --> frozenset({'Kidney Beans', 'Onion'}): 0.31
IR for frozenset({'Eggs'}) --> frozenset({'Onion'}): 0.31
IR for frozenset({'Eggs'}) --> frozenset({'Kidney Beans'}): 0.25
IR for frozenset({'Onion'}) --> frozenset({'Eggs', 'Kidney Beans'}): 0.5
IR for frozenset({'Onion'}) --> frozenset({'Eggs'}): 0.5
IR for frozenset({'Onion'}) --> frozenset({'Kidney Beans'}): 0.67
IR for frozenset({'Milk'}) --> frozenset({'Kidney Beans'}): 0.67
IR for frozenset({'Kidney Beans'}) --> frozenset({'Yogurt'}): 0.4
IR for frozenset({'Kidney Beans'}) --> frozenset({'Eggs', 'Onion'}): 0.4
IR for frozenset({'Kidney Beans'}) --> frozenset({'Eggs'}): 0.2
IR for frozenset({'Kidney Beans'}) --> frozenset({'