# Generating FI-Tree

In [1]:
class FINode:
    """Class to represent a node in the FI-Tree."""
    def __init__(self, position=None, signature=None):
        self.position = position  # Position of differing bit (for internal nodes)
        self.signature = signature  # Signature (for leaf nodes)
        self.left = None  # Left child
        self.right = None  # Right child

    def is_leaf(self):
        """Check if the node is a leaf node."""
        return self.signature is not None

# Generate signatures using the earlier function
def generate_signatures(transaction, num_bits=5):
    bit_array = [0] * num_bits
    for item in transaction:
        hash_index = item % num_bits
        bit_array[hash_index] = 1
    return ''.join(map(str, bit_array))    

def find_first_differing_bit(sig1, sig2):
    """Find the first differing bit position between two signatures."""
    for i in range(len(sig1)):
        if sig1[i] != sig2[i]:
            return i
    return -1  # No difference

def insert_into_fi_tree(root, signature):
    """Insert a signature into the FI-Tree."""
    parent = None
    current = root
    direction = None  # Keep track of the direction to update the parent's child

    while not current.is_leaf():
        parent = current
        if signature[current.position] == '0':
            direction = 'left'
            current = current.left
        else:
            direction = 'right'
            current = current.right

    existing_signature = current.signature
    differing_position = find_first_differing_bit(existing_signature, signature)

    if differing_position == -1:
        # Signature already exists in the tree
        return

    # Create a new internal node
    new_node = FINode(position=differing_position)

    # Determine left and right children based on the differing bit
    if signature[differing_position] == '0':
        new_node.left = FINode(signature=signature)
        new_node.right = current
    else:
        new_node.left = current
        new_node.right = FINode(signature=signature)

    # Update the parent's child reference
    if parent is None:
        # Update the root node
        root.position = new_node.position
        root.left = FINode(signature=new_node.left.signature) if new_node.left else None
        root.right = FINode(signature=new_node.right.signature) if new_node.right else None
        root.signature = None
    else:
        if direction == 'left':
            parent.left = new_node
        else:
            parent.right = new_node

def build_fi_tree(transactions, num_bits):
    """Build the FI-Tree from a list of signatures."""
    # Initialize the tree with the first signature
    signature=generate_signatures(transactions[0], num_bits)
    root = FINode(signature=signature)

    # Insert remaining signatures into the tree
    for transaction in transactions[1:]:
        signature=generate_signatures(transaction, num_bits)
        insert_into_fi_tree(root, signature)

    return root

def print_fi_tree(node, depth=0):
    """Print the FI-Tree for visualization."""
    if node.is_leaf():
        print("  " * depth + f"Leaf: {node.signature}")
    else:
        print("  " * depth + f"Internal Node: Position {node.position}")
        if node.left:
            print("  " * depth + "Left:")
            print_fi_tree(node.left, depth + 1)
        if node.right:
            print("  " * depth + "Right:")
            print_fi_tree(node.right, depth + 1)

# Example Transactions
transactions = [
    [1, 2, 5],
    [2, 4],
    [2, 3],
    [1, 2, 4],
    [1, 3],
    [2, 3],
    [1, 2, 3, 5]
]

num_bits = 5
# Build the FI-Tree
fi_tree = build_fi_tree(transactions, num_bits)

# Print the FI-Tree
print("FI-Tree Structure:")
print_fi_tree(fi_tree)

FI-Tree Structure:
Internal Node: Position 0
Left:
  Internal Node: Position 3
  Left:
    Internal Node: Position 1
    Left:
      Leaf: 00101
    Right:
      Leaf: 01101
  Right:
    Internal Node: Position 1
    Left:
      Leaf: 00110
    Right:
      Leaf: 01010
Right:
  Internal Node: Position 3
  Left:
    Leaf: 11100
  Right:
    Leaf: 11110


# Helper Functions

In [2]:
def is_zero(signature):
    for i in signature:
        if i == '1':
            return False
    return True

def super_impose(sig1, sig2):
    super_imposed_sig = ""
    for b1, b2 in zip(sig1, sig2):
        if (b1 == b2):
            super_imposed_sig = super_imposed_sig + b1
        else:
            super_imposed_sig = super_imposed_sig + '0'
    if super_imposed_sig == sig2:
        return 1
    else:
        return 0

def support(signature, current):
    if (current.is_leaf()):
        return super_impose(current.signature, signature)
    
    if (signature[current.position] == '0'):
        return support(signature, current.left) + support(signature, current.right)
    else:
        return support(signature, current.right)

In [3]:
def OR(sig1, sig2):
    result_sig = ""
    for b1, b2 in zip(sig1, sig2):
        if b1 == '1' or b2 == '1':
            result_sig = result_sig + '1'
        else:
            result_sig = result_sig + '0'
    return result_sig

def AND(sig1, sig2):
    result_sig = ""
    for b1, b2 in zip(sig1, sig2):
        if b1 == '1' and b2 == '1':
            result_sig = result_sig + '1'
        else:
            result_sig = result_sig + '0'
    return result_sig

In [4]:
# Function to calculate confidence of a rule
def confidence(left_sig, right_sig, fi_tree):
    left_support = support(left_sig, fi_tree)
    combined_support = support(OR(left_sig,right_sig), fi_tree)
    return combined_support / left_support if left_support != 0 else 0

In [5]:
def convert_s_to_t(signature, num_bits):
    transaction = []
    for i, s in enumerate(signature):
        if i == 0 and s == '1':
            transaction.append(num_bits)
        elif s == '1':
            transaction.append(i)
    return transaction

# Generating Rules

In [6]:
# Function to find frequent itemsets using the FI-Tree
def find_frequent_signatures(fi_tree, min_support, num_bits):
    # Initialize with single-item signatures
    frequent_signatures = []
    candidates = []

    # Generate all possible single-item signatures
    for i in range(num_bits):
        signature = ''.join(['1' if j == i else '0' for j in range(num_bits)])
        if support(signature, fi_tree) >= min_support:
            frequent_signatures.append(signature)
            candidates.append(signature)

    # Generate higher-order itemsets
    k = 2
    while candidates:
        new_candidates = []
        for i in range(len(candidates)):
            for j in range(i + 1, len(candidates)):
                candidate = OR(candidates[i], candidates[j])
                if candidate not in new_candidates:
                    if support(candidate, fi_tree) >= min_support:
                        new_candidates.append(candidate)
                        frequent_signatures.append(candidate)
        candidates = new_candidates
        k += 1

    return frequent_signatures

# Function to generate association rules
def generate_association_rules(fi_tree, frequent_signatures, min_confidence):
    rules = []
    for signature in frequent_signatures:
        # Split into left and right parts to form rules
        for i in range(1, len(signature)):
            left = signature[:i] + '0' * (len(signature) - i)
            right = AND(signature, ''.join(['1' if j >= i else '0' for j in range(len(signature))]))
            if left != right and not is_zero(left) and not is_zero(right):  # Avoid trivial rules
                conf = confidence(left, right, fi_tree)
                if conf >= min_confidence:
                    rules.append((left, right, conf))
    return rules

# Main function for FI-Tree-based Apriori
def apriori_fi_tree(fi_tree, min_support, min_confidence, num_bits):
    # Find frequent signatures
    frequent_signatures = find_frequent_signatures(fi_tree, min_support, num_bits)

    # Generate association rules
    association_rules = generate_association_rules(fi_tree, frequent_signatures, min_confidence)

    return frequent_signatures, association_rules

# Example Usage
# Assume fi_tree is already built using the provided transactions
min_support = 2  # Example minimum support threshold
min_confidence = 0.6  # Example minimum confidence threshold
num_bits = 5  # Number of bits in signatures

frequent_signatures, association_rules = apriori_fi_tree(fi_tree, min_support, min_confidence, num_bits)

# Print the results
print("Frequent Signatures:")
for sig in frequent_signatures:
    print(convert_s_to_t(sig, num_bits))

print("\nAssociation Rules:")
for rule in association_rules:
    print(f"Rule: {convert_s_to_t(rule[0], num_bits)} -> {convert_s_to_t(rule[1], num_bits)} with confidence: {rule[2]:.2f}")

Frequent Signatures:
[5]
[1]
[2]
[3]
[4]
[5, 1]
[5, 2]
[1, 2]
[1, 3]
[2, 3]
[2, 4]
[5, 1, 2]

Association Rules:
Rule: [5] -> [1] with confidence: 1.00
Rule: [5] -> [2] with confidence: 1.00
Rule: [5] -> [2] with confidence: 1.00
Rule: [1] -> [2] with confidence: 0.75
Rule: [5] -> [1, 2] with confidence: 1.00
Rule: [5, 1] -> [2] with confidence: 1.00
