**Aim :** To perform FP Growth Algorithm on the given dataset   (market-basket-optimisation.csv) using 
1. By creating functions. 
2. By using NumPy and FP Growth library.

And validating the results

---

**By Creating Function**

In [None]:
import numpy as np
import pandas as pd
from __future__ import division, print_function
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules
import warnings
warnings.filterwarnings('ignore')

In [None]:
%pip install mlxtend --upgrade

In [None]:
# Define the Node class for the FP-Tree
class Node:
    def __init__(self, item, count, parent):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}

    def increment(self, count):
        self.count += count

# Create the FP-Tree
def create_fp_tree(data, min_support):
    # Count the frequency of each item in the dataset
    item_counts = {}
    for transaction in data:
        for item in transaction:
            if item in item_counts:
                item_counts[item] += 1
            else:
                item_counts[item] = 1

    # Remove infrequent items from the dataset
    data = [[item for item in transaction if item_counts[item] >= min_support] for transaction in data]

    # Sort the items in each transaction by their frequency
    data = [sorted(transaction, key=lambda item: item_counts[item], reverse=True) for transaction in data]

    # Create the root node of the FP-Tree
    root = Node(None, 0, None)

    # Add each transaction to the FP-Tree
    for transaction in data:
        current_node = root
        for item in transaction:
            if item in current_node.children:
                child_node = current_node.children[item]
                child_node.increment(1)
            else:
                child_node = Node(item, 1, current_node)
                current_node.children[item] = child_node
            current_node = child_node

    return root, item_counts


In [None]:
# Define the FP-Growth algorithm
def fp_growth(data, min_support):
    # Create the FP-Tree
    root, item_counts = create_fp_tree(data, min_support)

    # Mine the FP-Tree for frequent itemsets
    itemset_list = []
    mine_fp_tree(root, [], itemset_list, min_support)

    # Return the frequent itemsets and their counts
    return itemset_list

# Define the function to recursively mine the FP-Tree for frequent itemsets
def mine_fp_tree(node, prefix, itemset_list, min_support):
    if node.count >= min_support:
        itemset = prefix + [node.item]
        itemset_list.append((itemset, node.count))
    for child_node in node.children.values():
        mine_fp_tree(child_node, prefix + [node.item], itemset_list, min_support)

In [None]:
df = pd.read_csv('/content/Market_Basket_Optimisation.csv', header=None)

transaction = []

for i in df.itertuples():
  l = set(list(i))
  transaction.append([i for i in l if (str(i)!="nan" and type(i)!=int)])

len(transaction)

7501

In [None]:
itemsets = fp_growth(transaction,150)
itemsets

[([None, 'mineral water'], 1788),
 ([None, 'mineral water', 'eggs'], 382),
 ([None, 'mineral water', 'spaghetti'], 341),
 ([None, 'mineral water', 'chocolate'], 174),
 ([None, 'eggs'], 966),
 ([None, 'eggs', 'french fries'], 184),
 ([None, 'eggs', 'spaghetti'], 167),
 ([None, 'french fries'], 714),
 ([None, 'spaghetti'], 691),
 ([None, 'cookies'], 305),
 ([None, 'chocolate'], 434),
 ([None, 'green tea'], 360),
 ([None, 'escalope'], 177),
 ([None, 'milk'], 215)]

**Using Libraries in-built func**

In [None]:
df = pd.read_csv("/content/Market_Basket_Optimisation.csv", names=[i for i in range(20)])
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7497,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7498,chicken,,,,,,,,,,,,,,,,,,,
7499,escalope,green tea,,,,,,,,,,,,,,,,,,


In [None]:
transaction = []

for i in df.itertuples():
  l = set(list(i))
  transaction.append([i for i in l if (str(i)!="nan" and type(i)!=int)])

len(transaction)

7501

In [None]:
t = TransactionEncoder()
t_arr = t.fit_transform(transaction)

data = pd.DataFrame(t_arr, columns=t.columns_)
data

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7497,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7498,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7499,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:
res = fpgrowth(data, min_support=0.05, use_colnames=True)
res

Unnamed: 0,support,itemsets
0,0.238368,(mineral water)
1,0.132116,(green tea)
2,0.076523,(low fat yogurt)
3,0.071457,(shrimp)
4,0.065858,(olive oil)
5,0.063325,(frozen smoothie)
6,0.179709,(eggs)
7,0.087188,(burgers)
8,0.062525,(turkey)
9,0.129583,(milk)


In [None]:
res = association_rules(res,metric="confidence", min_threshold=0.06)
res

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(mineral water),(eggs),0.238368,0.179709,0.050927,0.213647,1.188845,0.00809,1.043158
1,(eggs),(mineral water),0.179709,0.238368,0.050927,0.283383,1.188845,0.00809,1.062815
2,(mineral water),(spaghetti),0.238368,0.17411,0.059725,0.250559,1.439085,0.018223,1.102008
3,(spaghetti),(mineral water),0.17411,0.238368,0.059725,0.343032,1.439085,0.018223,1.159314
4,(mineral water),(chocolate),0.238368,0.163845,0.05266,0.220917,1.348332,0.013604,1.073256
5,(chocolate),(mineral water),0.163845,0.238368,0.05266,0.3214,1.348332,0.013604,1.122357
