In [1]:
import pandas as pd
from itertools import combinations

## Preprocessing

In [2]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header=None)
dataset = dataset.fillna(0)
transactions = []
for i in range(0, 7501):
    transactions.append([str(dataset.values[i, j]) for j in range(0, 20)])

In [3]:
def create_candidates(transactions, length):
    """Create initial candidate itemsets of length 1"""
    candidates = []
    for transaction in transactions:
        for item in transaction:
            if [item] not in candidates:
                candidates.append([item])
    candidates.sort()
    return list(map(frozenset, candidates))

In [4]:
def scan_database(transactions, candidates, min_support):
    """Scan the database and count the support of each candidate itemset"""
    itemset_counts = {}
    for transaction in transactions:
        for candidate in candidates:
            if candidate.issubset(transaction):
                itemset_counts[candidate] = itemset_counts.get(candidate, 0) + 1
    num_transactions = float(len(transactions))
    frequent_itemsets = []
    support_data = {}
    for itemset in itemset_counts:
        support = itemset_counts[itemset] / num_transactions
        if support >= min_support:
            frequent_itemsets.append(itemset)
        support_data[itemset] = support
    return frequent_itemsets, support_data

In [5]:
def generate_candidates(frequent_itemsets, length):
    """Generate candidate itemsets of length k from frequent itemsets of length k-1"""
    candidates = []
    num_frequent_itemsets = len(frequent_itemsets)
    for i in range(num_frequent_itemsets):
        for j in range(i + 1, num_frequent_itemsets):
            itemset1 = list(frequent_itemsets[i])[:length - 2]
            itemset2 = list(frequent_itemsets[j])[:length - 2]
            itemset1.sort()
            itemset2.sort()
            if itemset1 == itemset2:
                candidates.append(frequent_itemsets[i] | frequent_itemsets[j])
    return candidates

In [6]:
def apriori(transactions, min_support=0.5):
    """Apriori algorithm"""
    candidates = create_candidates(transactions, 1)
    frequent_itemsets1, support_data = scan_database(transactions, candidates, min_support)
    frequent_itemsets = [frequent_itemsets1]
    k = 2
    while len(frequent_itemsets[k - 2]) > 0:
        candidates = generate_candidates(frequent_itemsets[k - 2], k)
        frequent_itemsets_k, support_data_k = scan_database(transactions, candidates, min_support)
        support_data.update(support_data_k)
        frequent_itemsets.append(frequent_itemsets_k)
        k += 1
    return frequent_itemsets, support_data

## Run Apriori algorithm

In [7]:
if __name__ == "__main__":
    frequent_itemsets, support_data = apriori(transactions, min_support=0.03)

    """Print results"""
    for i, itemset_list in enumerate(frequent_itemsets):
        print("Frequent itemsets of length {}: {}".format(i+1, len(itemset_list)))
        for itemset in itemset_list:
            print(itemset, "Support:", round(support_data[itemset], 4))

Frequent itemsets of length 1: 37
frozenset({'avocado'}) Support: 0.0333
frozenset({'cottage cheese'}) Support: 0.0319
frozenset({'frozen smoothie'}) Support: 0.0633
frozenset({'green tea'}) Support: 0.1321
frozenset({'honey'}) Support: 0.0475
frozenset({'low fat yogurt'}) Support: 0.0765
frozenset({'mineral water'}) Support: 0.2384
frozenset({'olive oil'}) Support: 0.0659
frozenset({'salmon'}) Support: 0.0425
frozenset({'shrimp'}) Support: 0.0715
frozenset({'tomato juice'}) Support: 0.0304
frozenset({'0'}) Support: 0.9999
frozenset({'burgers'}) Support: 0.0872
frozenset({'eggs'}) Support: 0.1797
frozenset({'turkey'}) Support: 0.0625
frozenset({'milk'}) Support: 0.1296
frozenset({'whole wheat rice'}) Support: 0.0585
frozenset({'french fries'}) Support: 0.1709
frozenset({'soup'}) Support: 0.0505
frozenset({'frozen vegetables'}) Support: 0.0953
frozenset({'spaghetti'}) Support: 0.1741
frozenset({'cookies'}) Support: 0.0804
frozenset({'cooking oil'}) Support: 0.0511
frozenset({'champagne'