In [21]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import fpgrowth
import graphviz

data = [['Milk', 'Bread', 'Butter'],
        ['Milk', 'Bread'],
        ['Milk', 'Eggs'],
        ['Bread', 'Eggs'],
        ['Milk', 'Bread', 'Eggs', 'Butter'],
        ['Tea', 'Bread', 'Eggs']]

df = pd.DataFrame(data, columns=['item1', 'item2', 'item3', 'item4'])

# Convert the dataset to transaction
te = TransactionEncoder()
te_ary = te.fit(data).transform(data)
df2 = pd.DataFrame(te_ary, columns=te.columns_)
df2


  and should_run_async(code)


Unnamed: 0,Bread,Butter,Eggs,Milk,Tea
0,True,True,False,True,False
1,True,False,False,True,False
2,False,False,True,True,False
3,True,False,True,False,False
4,True,True,True,True,False
5,True,False,True,False,True


In [22]:
#apply the apriori algorithm
frequent_itemsets = apriori(df2, min_support=0.33, use_colnames=True)  #which means that only itemsets occurring in at least 33% of the transactions will be considered frequent.
frequent_itemsets

  and should_run_async(code)


Unnamed: 0,support,itemsets
0,0.833333,(Bread)
1,0.333333,(Butter)
2,0.666667,(Eggs)
3,0.666667,(Milk)
4,0.333333,"(Bread, Butter)"
5,0.5,"(Bread, Eggs)"
6,0.5,"(Bread, Milk)"
7,0.333333,"(Milk, Butter)"
8,0.333333,"(Milk, Eggs)"
9,0.333333,"(Bread, Milk, Butter)"


In [23]:
#apply fp growth algorithm
frequent_itemsets = fpgrowth(df2, min_support=0.33, use_colnames=True)
frequent_itemsets

  and should_run_async(code)


Unnamed: 0,support,itemsets
0,0.833333,(Bread)
1,0.666667,(Milk)
2,0.333333,(Butter)
3,0.666667,(Eggs)
4,0.5,"(Bread, Milk)"
5,0.333333,"(Milk, Butter)"
6,0.333333,"(Bread, Butter)"
7,0.333333,"(Bread, Milk, Butter)"
8,0.333333,"(Milk, Eggs)"
9,0.5,"(Bread, Eggs)"


In [25]:
class Node:
    def __init__(self, item, count, parent):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}
        self.nodeLink = None  # Pointer to next node with the same item

def build_tree(data, min_support):
    header_table = {}
    for index, row in data.iterrows():
        for item in row:
            header_table[item] = header_table.get(item, 0) + 1

    # Remove items with support less than min_support
    header_table = {k: v for k, v in header_table.items() if v >= min_support}

    frequent_items = list(header_table.keys())
    frequent_items.sort(key=lambda x: header_table[x], reverse=True)

    root = Node("Null", 1, None)
    for index, row in data.iterrows():
        ordered_items = [item for item in frequent_items if item in row]
        if ordered_items:
            insert_tree(ordered_items, root, header_table, 1)

    # Ensure 'Null' is in header_table
    if 'Null' not in header_table:
        header_table['Null'] = (0, None)

    return root, header_table

def insert_tree(items, node, header_table, count):
    if not items:
        return
    if items[0] in node.children:
        node.children[items[0]].count += count
    else:
        node.children[items[0]] = Node(items[0], count, node)
        # Update node link of the last occurrence
        if header_table[items[0]][1] is None:
            header_table[items[0]] = (header_table[items[0]][0], node.children[items[0]])
        else:
            update_header(header_table[items[0]][1], node.children[items[0]])
    if len(items) > 1:
        insert_tree(items[1:], node.children[items[0]], header_table, count)

def update_header(node_to_test, target_node):
    while node_to_test.nodeLink is not None:
        node_to_test = node_to_test.nodeLink
    node_to_test.nodeLink = target_node

# Example usage:
import pandas as pd

data = [['Milk', 'Bread', 'Butter'],
        ['Milk', 'Bread'],
        ['Milk', 'Eggs'],
        ['Bread', 'Eggs'],
        ['Milk', 'Bread', 'Eggs', 'Butter'],
        ['Tea', 'Bread', 'Eggs']]

df = pd.DataFrame(data, columns=['item1', 'item2', 'item3', 'item4'])
root, header_table = build_tree(df, min_support=2)


  and should_run_async(code)


In [29]:
# Assuming you have already imported necessary libraries and performed frequent itemset mining using FP-Growth and Apriori algorithms
# Example:
from mlxtend.frequent_patterns import fpgrowth, apriori

# Perform FP-Growth algorithm to mine frequent itemsets
frequent_itemsets_fpgrowth = fpgrowth(df, min_support=0.2, use_colnames=True)

# Perform Apriori algorithm to mine frequent itemsets
frequent_itemsets_apriori = apriori(df, min_support=0.2, use_colnames=True)

# Now you can use the variables frequent_itemsets_fpgrowth and frequent_itemsets_apriori
max_frequent_itemset_fp = frequent_itemsets_fpgrowth[frequent_itemsets_fpgrowth['support'] == frequent_itemsets_fpgrowth['support'].max()]
max_frequent_itemset_ap = frequent_itemsets_apriori[frequent_itemsets_apriori['support'] == frequent_itemsets_apriori['support'].max()]


  and should_run_async(code)


ValueError: The allowed values for a DataFrame are True, False, 0, 1. Found value Milk

In [31]:
num_transactions = len(data)
num_transactions

  and should_run_async(code)


6