In [23]:
import pandas as pd

In [24]:
def read_dataset(filepath):
    with open(filepath, 'r') as file:
        # Read the file and split each transaction by spaces
        transactions = [[int(item) for item in line.strip().split()] for line in file.readlines()]
    return transactions

In [25]:
from collections import defaultdict

def find_frequent_1_itemsets(transactions, min_sup):
    item_count = defaultdict(int)
    for transaction in transactions:
        for item in set(transaction):  # Ensure that we count unique items
            item_count[item] += 1
    
    L1 = set()
    for item, count in item_count.items():
        if count >= min_sup:
            L1.add(tuple([item]))  # Create a frozenset of the frequent items
    

    return sorted(list(L1), key=lambda x: sorted(x))


In [26]:
import copy


def has_infrequent_subset(candidate, prev_frequent_itemsets):
    # print(f"candidate : {candidate}")
    # print(f"prev_freq_itemsets: {prev_frequent_itemsets}")
    for item in candidate:
        subset = copy.deepcopy(candidate)
        subset.remove(item)
        # if subset not in prev_frequent_itemsets:
        #     return True  # If any subset is not frequent, return True

        if not any(set(subset).issubset(set(sublist)) for sublist in prev_frequent_itemsets):
            return True
        
    return False  # Otherwise, return False


def apriori_gen(Lk_minus_1 : list[list[str]], k_1:int):
    candidates = []
    Lk_minus_1 = sorted(Lk_minus_1)
    # Lk_minus_1 = [sorted(row) for row in Lk_minus_1]

    # print(f"Lk_minus_1: {Lk_minus_1}")

    for i in range(len(Lk_minus_1)):
        for j in range(i + 1, len(Lk_minus_1)):
            l1 = list(Lk_minus_1[i])
            l2 = list(Lk_minus_1[j])

            # print(f"l1: {l1}, l2: {l2}")

            if l1[:k_1-1] == l2[:k_1-1] and l1[k_1-1] < l2[k_1-1]:
                candidate = sorted(l1[:k_1-1] + [l1[k_1-1]] + [l2[k_1-1]])

                # print(f"Candidate: {candidate}")

                if not has_infrequent_subset(candidate, Lk_minus_1):
                    candidates.append(candidate)
    return candidates

def count_support(transactions, itemset):
    count = 0
    itemset = set(itemset)
    for transaction in transactions:
        transaction_set = set(transaction)
        if itemset.issubset(transaction_set):
            count += 1
    return count

In [27]:
def APRIORI_ALGO(Transactions, min_sup):
    L = []

    L1 = find_frequent_1_itemsets(Transactions, min_sup)

    total_patterns = 0

    print(L1)

    # print("Frequent 1-itemsets:")
    # for itemset in L1:
    #     print(itemset)
    # The output will show the frequent 1-itemsets found in the dataset

    L.append(L1)
    Lk_minus_1 = L1

    total_patterns += len(L1)

    k = 2

    # Ck = apriori_gen(Lk_minus_1, k - 1)
    # print(f"Candidates for {k}-itemsets: {Ck}")
    while Lk_minus_1:
        # print(f"k : {k}")
        Ck = apriori_gen(Lk_minus_1, k - 1)

        # print(f"len(Ck) : {len(Ck)}")
        # print(f"Candidates for {k}-itemsets: {Ck}")
        Lk = []
        for candidate in Ck:
            if count_support(Transactions, candidate) >= min_sup:
                Lk.append(list(sorted(candidate)))
        if not Lk:
            break
        L.append(Lk)

        # print(f"len(Lk) : {len(Lk)}")
        total_patterns += len(Lk)

        Lk_minus_1 = Lk
        
        # print(f"#####  Frequent {k}-itemsets: {Lk}")

        k += 1

    print(f"L : {L}")
    print(f"LEN(L) : {total_patterns}")

In [28]:
import math

filepath = 'Datasets/sample.txt'
Transactions = read_dataset(filepath) 
min_sup = math.ceil(0.20 * len(Transactions))

APRIORI_ALGO(Transactions=Transactions, min_sup=min_sup)


[(1,), (2,), (3,), (4,), (5,)]
L : [[(1,), (2,), (3,), (4,), (5,)], [[1, 2], [1, 3], [1, 5], [2, 3], [2, 4], [2, 5]], [[1, 2, 3], [1, 2, 5]]]
LEN(L) : 13
