In [2]:
from collections import defaultdict

min_rating = 4.0
transactions = defaultdict(list)

with open("combined_data_1.txt", "r") as file:
    current_movie = None
    for line in file:
        line = line.strip()
        if line.endswith(":"):  # MovieID line
            current_movie = line.replace(":", "")
        else:
            parts = line.split(",")
            if len(parts) == 3:
                customer_id, rating, date = parts
                if float(rating) >= min_rating:
                    transactions[customer_id].append(current_movie)

transaction_list = list(transactions.values())
print("Sample transactions:", transaction_list[:5])


Sample transactions: [['1', '571', '607', '985', '1144', '1220', '1798', '1810', '1843', '1905', '1975', '2095', '2152', '2580', '2594', '2612', '3029', '3106', '3254', '3433', '3538', '3825', '3860', '4306', '4330', '4472'], ['1', '5', '114', '209', '215', '223', '290', '297', '304', '311', '316', '329', '473', '494', '658', '660', '789', '808', '935', '964', '1020', '1159', '1277', '1337', '1488', '1552', '1665', '1743', '1800', '1865', '1905', '2040', '2057', '2102', '2136', '2172', '2173', '2192', '2452', '2532', '2700', '2752', '2780', '2782', '2861', '3012', '3046', '3183', '3523', '3611', '3728', '3777', '3794', '3837', '3864', '3938', '3958', '3965', '4098', '4115', '4405'], ['1', '30', '44', '165', '166', '189', '191', '197', '232', '257', '262', '269', '299', '329', '331', '357', '413', '417', '442', '443', '457', '468', '482', '483', '516', '528', '548', '571', '588', '607', '629', '652', '672', '689', '692', '694', '696', '705', '758', '763', '819', '833', '843', '851', '88

In [None]:
from collections import defaultdict, Counter

class FPTreeNode:
    def __init__(self, item, count, parent):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}
        self.link = None

    def increment(self, count):
        self.count += count



def build_fp_tree(transactions, min_support):
    item_counter = Counter()
    for txn in transactions:
        item_counter.update(txn)

    # Filter items by min support
    item_counter = {item: count for item, count in item_counter.items() if count >= min_support}
    if not item_counter:
        return None, None

    header_table = {item: [count, None] for item, count in item_counter.items()}
    root = FPTreeNode(None, 1, None)

    for txn in transactions:
        filtered_txn = [item for item in txn if item in item_counter]
        sorted_txn = sorted(filtered_txn, key=lambda x: item_counter[x], reverse=True)

        current_node = root
        for item in sorted_txn:
            if item in current_node.children:
                current_node.children[item].increment(1)
            else:
                new_node = FPTreeNode(item, 1, current_node)
                current_node.children[item] = new_node

                # Link header table
                if header_table[item][1] is None:
                    header_table[item][1] = new_node
                else:
                    node = header_table[item][1]
                    while node.link:
                        node = node.link
                    node.link = new_node

            current_node = current_node.children[item]

    return root, header_table


# ---------------------------
# Step 3: Mine FP-Tree
# ---------------------------

def ascend_tree(node):
    path = []
    while node.parent and node.parent.item is not None:
        node = node.parent
        path.append(node.item)
    return path

def find_prefix_paths(base_item, node):
    cond_pats = []
    while node:
        path = ascend_tree(node)
        if path:
            cond_pats.append((path, node.count))
        node = node.link
    return cond_pats

def mine_fp_tree(tree, header_table, min_support, prefix, freq_itemsets):
    sorted_items = sorted(header_table.items(), key=lambda x: x[1][0])  # Sort by support

    for base_item, (count, node) in sorted_items:
        new_freq_set = prefix.copy()
        new_freq_set.add(base_item)
        freq_itemsets.append((new_freq_set, count))

        cond_pats = find_prefix_paths(base_item, node)
        cond_transactions = []
        for path, count in cond_pats:
            cond_transactions.extend([path] * count)

        cond_tree, cond_header = build_fp_tree(cond_transactions, min_support)
        if cond_tree:
            mine_fp_tree(cond_tree, cond_header, min_support, new_freq_set, freq_itemsets)


# ---------------------------
# Step 4: Run FP-Growth
# ---------------------------

min_support = 2  # You can increase this later
fp_root, header = build_fp_tree(transaction_list, min_support)

if fp_root is None or header is None:
    print("No frequent items found for the given minimum support.")
else:
    frequent_itemsets = []
    mine_fp_tree(fp_root, header, min_support, set(), frequent_itemsets)

    # Output Results
    print("\n✅ Frequent Itemsets:")
    for itemset, count in frequent_itemsets:
        print(f"Itemset: {set(itemset)}, Support: {count}")
