***Data Mining CIA - Apriori & FP Growth Tree***

# ***Apriori***

In [None]:
import pandas as pd
df=pd.read_csv('/content/store_data.csv')

# Preprocess the data to convert it into a list of transactions
transactions=[]
for index, row in df.iterrows():
    transaction=[item.strip() for item in row if isinstance(item, str)]
    transactions.append(transaction)

# Define a function to generate candidate itemsets
def generate_candidates(prev_itemsets, k):
    candidates=[]
    n=len(prev_itemsets)
    for i in range(n):
        for j in range(i + 1, n):
            itemset1=prev_itemsets[i]
            itemset2=prev_itemsets[j]
            if itemset1[:-1]==itemset2[:-1]:
                candidate=sorted(itemset1 + [itemset2[-1]])
                if all(candidate[:i]+candidate[i+1:] in prev_itemsets for i in range(k)):
                    candidates.append(candidate)
    return candidates

# Define a function to calculate support count
def support_count(data,itemset):
    count=0
    for transaction in data:
        if all(item in transaction for item in itemset):
            count+=1
    return count

# Define a function to apply the Apriori algorithm
def apriori(data, min_support):
    itemsets=[[[item]] for item in sorted(set(item for transaction in data for item in transaction))]
    frequent_itemsets=[]
    while itemsets:
        candidates=generate_candidates([itemset for sublist in itemsets for itemset in sublist], len(itemsets[0][0]) + 1)
        frequent_itemsets_level=[]
        for candidate in candidates:
            support=support_count(data, candidate)
            if support>=min_support * len(data):
                frequent_itemsets_level.append(candidate)
        if frequent_itemsets_level:
            frequent_itemsets.extend(frequent_itemsets_level)
            itemsets=[frequent_itemsets_level]
        else:
            break
    return frequent_itemsets

# Define the minimum support threshold
min_support=0.05

# Apply the Apriori algorithm to find frequent itemsets
frequent_itemsets=apriori(transactions, min_support)

# Print the frequent itemsets
print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(itemset)

Frequent Itemsets:
['chocolate', 'mineral water']
['eggs', 'mineral water']
['mineral water', 'spaghetti']


# ***FP - Growth Tree***

In [None]:
import pandas as pd

# Define a class for the FP Tree Node
class FPNode:
    def __init__(self, item, count, parent):
        self.item=item
        self.count=count
        self.parent=parent
        self.children={}

    def increment(self, count):
        self.count+=count

class FPTree:
    def __init__(self):
        self.root=FPNode(None, 0, None)
        self.header_table={}

    # Add a transaction to the FP-tree
    def add_transaction(self, transaction, count):
        current_node=self.root
        for item in transaction:
            if item in current_node.children:
                child_node=current_node.children[item]
                child_node.count+=count
            else:
                child_node=FPNode(item, count, current_node)
                current_node.children[item]=child_node
                if item in self.header_table:
                    self.header_table[item].append(child_node)
                else:
                    self.header_table[item]=[child_node]
            current_node=child_node

    # Define a method to get the conditional pattern base for an item
    def conditional_pattern_base(self, item):
        conditional_base=[]
        if item in self.header_table:
            for node in self.header_table[item]:
                prefix_path=[]
                current=node.parent
                while current.item is not None:
                    prefix_path.append(current.item)
                    current = current.parent
                if prefix_path:
                    # Reverse the prefix path
                    conditional_base.append((prefix_path[::-1], node.count))
        return conditional_base

# Define a function to build conditional FP Tree
def build_conditional_tree(conditional_base, min_support):
    conditional_tree=FPTree()
    for transaction, count in conditional_base:
        if count>=min_support:
            conditional_tree.add_transaction(transaction, count)
    return conditional_tree

# Define a function to mine two-item frequent itemsets from the conditional FP-tree
def mine_two_itemsets(conditional_tree, min_support):
    frequent_itemsets=[]
    for item in conditional_tree.header_table:
        if conditional_tree.header_table[item][0].count>=min_support:
            frequent_itemsets.append([item])
    return frequent_itemsets

# Define a function to mine frequent itemsets from the FP-tree
def mine_frequent_itemsets(fp_tree, min_support):
    frequent_itemsets=[]
    for item in fp_tree.header_table:
        # Check if the single-item is frequent
        if fp_tree.header_table[item][0].count>=min_support:
            frequent_itemsets.append([item])
            # Construct a conditional pattern base for the current item
            conditional_base=fp_tree.conditional_pattern_base(item)
            # Build a conditional FP-tree from the conditional pattern base
            conditional_tree=build_conditional_tree(conditional_base, min_support)
            # Mine two-item frequent itemsets from the conditional FP-tree
            two_itemsets=mine_two_itemsets(conditional_tree, min_support)
            # Print the two-item frequent itemsets
            for two_itemset in two_itemsets:
                frequent_itemsets.append([item]+two_itemset)

    return frequent_itemsets

data=pd.read_csv('/content/store_data.csv')
transactions=data.values.tolist()

min_support=50
# Construct the FP-tree
fp_tree=FPTree()
for transaction in transactions:
    count=1  # Assuming each transaction has a count of 1
    fp_tree.add_transaction(transaction, count)

# Mine frequent itemsets
frequent_itemsets=mine_frequent_itemsets(fp_tree, min_support)

# Print the frequent itemsets
print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(itemset)


Frequent Itemsets:
['burgers']
['burgers', 'turkey']
['turkey']
['mineral water']
['mineral water', 'spaghetti']
['mineral water', 'frozen vegetables']
['milk']
['milk', 'mineral water']
['whole wheat pasta']
['soup']
['frozen vegetables']
['frozen vegetables', 'shrimp']
['spaghetti']
['spaghetti', 'frozen vegetables']
['spaghetti', 'ground beef']
['cookies']
['shrimp']
['red wine']
['parmesan cheese']
['ground beef']
['herb & pepper']
