In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [3]:
dataset = pd.read_csv("Market_Basket_Optimisation (2).csv", header=None)
print(dataset.head())

              0          1           2                 3             4   \
0         shrimp    almonds     avocado    vegetables mix  green grapes   
1        burgers  meatballs        eggs               NaN           NaN   
2        chutney        NaN         NaN               NaN           NaN   
3         turkey    avocado         NaN               NaN           NaN   
4  mineral water       milk  energy bar  whole wheat rice     green tea   

                 5     6               7             8             9   \
0  whole weat flour  yams  cottage cheese  energy drink  tomato juice   
1               NaN   NaN             NaN           NaN           NaN   
2               NaN   NaN             NaN           NaN           NaN   
3               NaN   NaN             NaN           NaN           NaN   
4               NaN   NaN             NaN           NaN           NaN   

               10         11     12     13             14      15  \
0  low fat yogurt  green tea  honey  sala

In [4]:
row_index = 1
row_list = dataset.iloc[row_index].dropna().tolist()

row_list

['burgers', 'meatballs', 'eggs']

In [5]:
transactions = []
for i in range(len(dataset)):
  curr_row = dataset.iloc[i].dropna().tolist()
  transactions.append(curr_row)

transactions[2]

['chutney']

In [6]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
new_df = pd.DataFrame(te_ary, columns=te.columns_)
new_df.head()

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


In [7]:
from mlxtend.frequent_patterns import fpgrowth

fpgrowth(new_df, min_support=0.10, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.238368,(mineral water)
1,0.132116,(green tea)
2,0.179709,(eggs)
3,0.129583,(milk)
4,0.170911,(french fries)
5,0.17411,(spaghetti)
6,0.163845,(chocolate)


In [8]:
from collections import Counter, defaultdict
from itertools import chain

def generate_frequent_pattern(items, min_support):
    # Count frequency of each item
    all_items = list(chain(*items))

    item_count = Counter(all_items)

    frequent_items = {}

    for item, count in item_count.items():
        if count >= min_support:
            frequent_items[item] = count

    # Sort  in descending order of frequency
    sorted_frequent_items = dict(sorted(frequent_items.items(), key=lambda item: item[1], reverse=True))

    return sorted_frequent_items

def build_ordered_item_sets(transactions, frequent_items):

    ordered_item_sets = []

    for transaction in transactions:
        # Keep only the items from the Frequent Pattern set & maintain the sorted order
        ordered_transaction = [i for i in frequent_items if i in transaction]
        ordered_item_sets.append(ordered_transaction)

    return ordered_item_sets


class TrieNode:

    def __init__(self, item):
        self.item = item
        self.count = 1  # Occurrence count
        self.children = {}  # Dictionary to store child nodes
        self.parent = None

class FPTree:

    def __init__(self):
        self.root = TrieNode(None)  # Root node with no item

    def insert_transaction(self, transaction):
        current_node = self.root

        for item in transaction:
            if item in current_node.children:
                current_node.children[item].count += 1  # Increment count if exists
            else:
                new_node = TrieNode(item)  # Create new node
                new_node.parent = current_node  # Set parent link
                current_node.children[item] = new_node  # Add to children

            current_node = current_node.children[item]

    def get_conditional_patterns(self, item):

        conditional_patterns = []
        node_counts = {}

        # Find all paths leading to 'item'
        def find_paths(node, path):
            if node.item is not None:
                path.append(node.item)
            if node.parent:
                find_paths(node.parent, path)

        def traverse_tree(node):
            for child in node.children.values():
                if child.item == item:
                    path = []
                    find_paths(child, path)
                    path.reverse()  # Reverse to maintain top-down order
                    conditional_patterns.append(path[:-1])  # Remove the item itself
                    node_counts[tuple(path[:-1])] = child.count
                traverse_tree(child)

        traverse_tree(self.root)
        return conditional_patterns, node_counts

# FP-Growth Recursive Mining
def mine_fp_tree(fp_tree, frequent_items, min_support, base_pattern=[]):

    frequent_itemsets = []

    for item in reversed(frequent_items.keys()):  # Start from least frequent
        new_pattern = base_pattern + [item]
        frequent_itemsets.append((new_pattern, frequent_items[item]))  # Store pattern

        # Generate conditional patterns
        conditional_patterns, counts = fp_tree.get_conditional_patterns(item)

        # Create conditional FP-Tree
        conditional_tree = FPTree()
        for pattern in conditional_patterns:
            for _ in range(counts[tuple(pattern)]):  # Insert pattern count times
                conditional_tree.insert_transaction(pattern)

        # Recursively mine the conditional FP-Tree
        if conditional_tree.root.children:
            frequent_itemsets.extend(mine_fp_tree(conditional_tree, frequent_items, min_support, new_pattern))

    return frequent_itemsets

In [9]:
min_support = 1300

frequent_items = generate_frequent_pattern(transactions, min_support)
print("Frequent Pattern Set:", frequent_items)

ordered_transactions = build_ordered_item_sets(transactions, frequent_items)

fp_tree = FPTree()
for transaction in ordered_transactions:
    fp_tree.insert_transaction(transaction)

frequent_itemsets = mine_fp_tree(fp_tree, frequent_items, min_support)
print("\nFrequent Itemsets:")
for pattern, count in frequent_itemsets:
    print(f"{pattern}: {count}")

Frequent Pattern Set: {'mineral water': 1788, 'eggs': 1348, 'spaghetti': 1306}

Frequent Itemsets:
['spaghetti']: 1306
['spaghetti', 'spaghetti']: 1306
['spaghetti', 'eggs']: 1348
['spaghetti', 'eggs', 'spaghetti']: 1306
['spaghetti', 'eggs', 'eggs']: 1348
['spaghetti', 'eggs', 'mineral water']: 1788
['spaghetti', 'mineral water']: 1788
['eggs']: 1348
['eggs', 'spaghetti']: 1306
['eggs', 'eggs']: 1348
['eggs', 'mineral water']: 1788
['mineral water']: 1788


In [10]:

from mlxtend.frequent_patterns import fpgrowth

# Applying FP-Growth algorithm
frequent_itemsets = fpgrowth(new_df, min_support=0.02, use_colnames=True)
print("Frequent Itemsets:")
print(frequent_itemsets)


Frequent Itemsets:
      support                      itemsets
0    0.238368               (mineral water)
1    0.132116                   (green tea)
2    0.076523              (low fat yogurt)
3    0.071457                      (shrimp)
4    0.065858                   (olive oil)
..        ...                           ...
98   0.040928  (ground beef, mineral water)
99   0.039195      (ground beef, spaghetti)
100  0.021997           (ground beef, milk)
101  0.023064      (ground beef, chocolate)
102  0.027463         (cake, mineral water)

[103 rows x 2 columns]


In [11]:

from mlxtend.frequent_patterns import association_rules

# Generating association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
print("Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])


Association Rules:
        antecedents      consequents   support  confidence      lift
0       (green tea)      (spaghetti)  0.026530    0.200807  1.153335
1       (spaghetti)      (green tea)  0.026530    0.152374  1.153335
2       (green tea)   (french fries)  0.028530    0.215943  1.263488
3    (french fries)      (green tea)  0.028530    0.166927  1.263488
4       (green tea)      (chocolate)  0.023464    0.177598  1.083943
..              ...              ...       ...         ...       ...
89           (milk)    (ground beef)  0.021997    0.169753  1.727704
90    (ground beef)      (chocolate)  0.023064    0.234735  1.432669
91      (chocolate)    (ground beef)  0.023064    0.140765  1.432669
92           (cake)  (mineral water)  0.027463    0.338816  1.421397
93  (mineral water)           (cake)  0.027463    0.115213  1.421397

[94 rows x 5 columns]
