In [2]:
#Import libraries
import pandas as pd
from itertools import combinations

In [None]:
#Main pipeline pseudocode
def market_basket_analysis(csv_path, min_support, min_conf, output_csv=True):
    """
    All pipeline:
    CSV file -> transactions -> Apriori + triangular Matrix -> rules with metrics
    """
    print(f"Reading dataset: {csv_path}")
    transactions = get_transactions(csv_path)

    print("Applying apriori...")
    frequent_sets = apriori_triangular(transactions, min_support)

    print("Generating results (support, confidence, lift)...")
    rules = generate_rules(frequent_sets, transactions, min_conf)   
    result = pd.DataFrame(rules).sort_values("Lift", ascending=False)

    if output_csv:
        output_name = "association_rules_output.csv"
        result.to_csv(output_name, index=False)
        print(f"Final results in '{output_name}'")
    return result

In [None]:
#Open CSV and get transactions
def get_transactions(csv_file):
    """
    Read CSV file and group restaurant types by user_id
    Input: CSV file path 
    Output: List of transactions (one per user)
    """
    df = pd.read_csv(csv_file)
    #grouped = df.griupby("user_id")["restaurant_type"].apply(set)
    grouped = df.groupby("Member_number")["itemDescription"].apply(set)
    return grouped.tolist()

In [None]:
#Triangular matrix
def pair_index(i, j, n):
    """Instead of us having a 2D matrix we save all possible pairs this way"""
    #Only count pair once
    if i > j:
        i, j = j, i
    return int(i * (n - (i + 1) / 2) + (j - i - 1))

#Support function
def support(itemset, transactions):
    """Calculate support for an itemset in the list of transactions"""
    count = sum(1 for t in transactions if all(i in t for i in itemset))
    return count / len(transactions)

In [None]:
def apriori_triangular(transactions, min_support):
    """
    Apriori algorithm using triangular matrix optimization
    Input: List of transactions and minimum support 
    Output: Frequent itemsets with support counts
    """
    '''
    #1. Individual item counts
    Sort items by alphabetical order, indexing
    '''
    #Sort unique items by alphabetical order
    items = sorted(set(i for t in transactions for i in t))
    len_items = len(items)
    #Index items
    index = {item: idx for idx, item in enumerate(items)}
 
    #Count single items
    single_counts = [0] * len_items
    for t in transactions:
        for item in t:
            single_counts[index[item]] += 1

    #Get frequent 1-itemsets
    min_count = min_support * len(transactions)
    frequent_items = [items[i] for i, count in enumerate(single_counts) if count >= min_count]
    L1 = [frozenset([item]) for item in frequent_items]
    
    '''
    #2. Pair counts using triangular matrix
    Count how many times each pair of frequent items appear together
    '''
    pair_len = len(frequent_items)
    #All possible pairs for item
    pair_counts = [0] * (pair_len * (pair_len - 1) // 2)

    for t in transactions:
        #Only frequent items in transaction
        trans_items = [i for i in frequent_items if i in t]
        for i in range(len(trans_items)):
            for j in range(i + 1, len(trans_items)):
                #Use triangular matrix index for pair counting
                k = pair_index(i,j, pair_len)
                pair_counts[k] += 1

    #Get frequent 2-itemsets
    L2= list()
    for i in range(pair_len):
        for j in range(i + 1, pair_len):
            k = pair_index(i,j, pair_len)
            if pair_counts[k] >= min_count:
                L2.append(frozenset([frequent_items[i], frequent_items[j]]))

    '''
    #3. Generate larger itemsets (k>=3)
    Using previous frequent itemsets to generate candidates
    '''
    #Previous frequent itemsets
    all_frequent = [L1, L2]
    k = 3
    while True:
        #Until no more frequent itemsets
        prev_frequent = all_frequent[-1]
        if not prev_frequent:
            break
    
        candidates = set()
        prev_list = list(prev_frequent)
        len_prev = len(prev_list)
        #Iterate over previous
        for i in range(len_prev):
            for j in range(i + 1, len_prev):
                #Join items until we have k items
                #Generate union of two itemsets present in L2
                union = prev_list[i].union(prev_list[j])
                if len(union) == k:
                    #All possible combinations of size k-1 must be frequent
                    subsets = combinations(union, k-1)
                    if all(frozenset(s) in prev_frequent for s in subsets):
                        #If frequent, add to candidates
                        candidates.add(frozenset(union))

        #Filter by support
        candidate_counts = {c for c in candidates if support(c, transactions) >= min_support}
        if not candidate_counts:
            break
        #If min_support satisfied, add to all frequent
        all_frequent.append(list(candidate_counts))
        k += 1
    return all_frequent

In [57]:
#Items associated with users
csv_path = 'C:/Users/HPLaptop/Downloads/Groceries_dataset.csv'
min_support = 0.05
transactions = get_transactions(csv_path)
print("Applying apriori...")
frequent_sets = apriori_triangular(transactions, min_support)
print(frequent_sets)

Applying apriori...
[[frozenset({'UHT-milk'}), frozenset({'beef'}), frozenset({'berries'}), frozenset({'beverages'}), frozenset({'bottled beer'}), frozenset({'bottled water'}), frozenset({'brown bread'}), frozenset({'butter'}), frozenset({'butter milk'}), frozenset({'candy'}), frozenset({'canned beer'}), frozenset({'chicken'}), frozenset({'chocolate'}), frozenset({'citrus fruit'}), frozenset({'coffee'}), frozenset({'cream cheese '}), frozenset({'curd'}), frozenset({'dessert'}), frozenset({'domestic eggs'}), frozenset({'frankfurter'}), frozenset({'frozen meals'}), frozenset({'frozen vegetables'}), frozenset({'fruit/vegetable juice'}), frozenset({'grapes'}), frozenset({'ham'}), frozenset({'hamburger meat'}), frozenset({'hard cheese'}), frozenset({'hygiene articles'}), frozenset({'ice cream'}), frozenset({'long life bakery product'}), frozenset({'margarine'}), frozenset({'meat'}), frozenset({'misc. beverages'}), frozenset({'napkins'}), frozenset({'newspapers'}), frozenset({'oil'}), frozen