In [11]:
import pandas as pd

DATA = pd.read_csv('Bakery sales.csv')
DATA.head()

Unnamed: 0.1,Unnamed: 0,date,time,ticket_number,article,Quantity,unit_price
0,0,2021-01-02,08:38,150040.0,BAGUETTE,1.0,"0,90 €"
1,1,2021-01-02,08:38,150040.0,PAIN AU CHOCOLAT,3.0,"1,20 €"
2,4,2021-01-02,09:14,150041.0,PAIN AU CHOCOLAT,2.0,"1,20 €"
3,5,2021-01-02,09:14,150041.0,PAIN,1.0,"1,15 €"
4,8,2021-01-02,09:25,150042.0,TRADITIONAL BAGUETTE,5.0,"1,20 €"


In [12]:
TRANSACTIONS = DATA.groupby('ticket_number')['article'].apply(list).reset_index(name='items')
TRANSACTIONS.head()

Unnamed: 0,ticket_number,items
0,150040.0,"[BAGUETTE, PAIN AU CHOCOLAT]"
1,150041.0,"[PAIN AU CHOCOLAT, PAIN]"
2,150042.0,[TRADITIONAL BAGUETTE]
3,150043.0,"[BAGUETTE, CROISSANT]"
4,150044.0,[BANETTE]


In [13]:
ITEMS = DATA['article'].unique()
ITEMS

array(['BAGUETTE', 'PAIN AU CHOCOLAT', 'PAIN', 'TRADITIONAL BAGUETTE',
       'CROISSANT', 'BANETTE', 'BANETTINE', 'SPECIAL BREAD', 'COUPE',
       'SAND JB EMMENTAL', 'KOUIGN AMANN', 'BOULE 200G', 'BOULE 400G',
       'GAL FRANGIPANE 6P', 'CAMPAGNE', 'MOISSON', 'CAFE OU EAU',
       'BRIOCHE', 'CEREAL BAGUETTE', 'SEIGLE', 'COMPLET',
       'DIVERS PATISSERIE', 'GAL FRANGIPANE 4P', 'COOKIE', 'FICELLE',
       'PAIN AUX RAISINS', 'GAL POMME 6P', 'GAL POMME 4P', 'FINANCIER X5',
       'VIK BREAD', 'DIVERS VIENNOISERIE', 'GACHE', 'SANDWICH COMPLET',
       'PAIN BANETTE', 'GRAND FAR BRETON', 'QUIM BREAD',
       'SPECIAL BREAD KG', 'GD KOUIGN AMANN', 'BOULE POLKA',
       'DEMI BAGUETTE', 'CHAUSSON AUX POMMES', 'BAGUETTE GRAINE',
       'DIVERS CONFISERIE', 'SUCETTE', 'DIVERS BOULANGERIE',
       'BOISSON 33CL', 'PATES', 'FORMULE SANDWICH', 'DIVERS SANDWICHS',
       'CROISSANT AMANDES', 'PAIN CHOCO AMANDES', 'SACHET VIENNOISERIE',
       'NANTAIS', 'CHOCOLAT', 'PAIN S/SEL', 'FONDANT CHOC

In [14]:
import itertools
from typing import Iterable

TRANSACTION_COUNT = len(TRANSACTIONS)
MINSUP = 0.01

def remove_infrequent_patterns(transactions: Iterable[frozenset[str]], patterns: list[frozenset[str]], threshold: int):
    counts = [0] * len(patterns)

    for transaction in transactions:
        for i, pattern in enumerate(patterns):
            if pattern.issubset(transaction):
                counts[i] += 1

    i = 0
    while i < len(patterns):
        if counts[i] < threshold:
            del patterns[i]
            del counts[i]
        else:
            i += 1

def get_candidates(F_k: list[frozenset[str]], k: int) -> set[frozenset[str]]:
    C_k_plus_1 = set()

    for i, p in enumerate(F_k):
        for q in F_k[i + 1:]:
            candidate = p.union(q)
            if len(candidate) != k + 1:
                continue
            all_subsets_of_candidate_are_frequent = True
            for subset in itertools.combinations(candidate, k):
                if set(subset) not in F_k:
                    all_subsets_of_candidate_are_frequent = False
                    break
            if not all_subsets_of_candidate_are_frequent:
                continue
            C_k_plus_1.add(candidate)

    return C_k_plus_1

def apriori(transactions: Iterable[frozenset[str]], items: frozenset[str], minsup: float) -> list[frozenset[str]]:
    threshold = minsup * len(transactions)

    # Start with singletons.
    F_1 = [frozenset([item]) for item in items]
    remove_infrequent_patterns(transactions, F_1, threshold)

    F = [F_1]

    for k in range(1, len(items)):
        C_k_plus_1 = get_candidates(F[-1], k)

        F_k_plus_1 = list(C_k_plus_1)
        remove_infrequent_patterns(transactions, F_k_plus_1, threshold)

        if not F_k_plus_1:
            break

        F.append(F_k_plus_1)

    return [pattern for F_k in F for pattern in F_k]

FREQUENT_PATTERNS = apriori(TRANSACTIONS['items'], frozenset(ITEMS), MINSUP)

for pattern in FREQUENT_PATTERNS:
    print(", ".join(pattern))

CAMPAGNE
MOISSON
CHAUSSON AUX POMMES
ECLAIR
BOULE 200G
CROISSANT
BOULE 400G
BAGUETTE GRAINE
PAIN CHOCO AMANDES
GRAND FAR BRETON
BANETTE
BANETTINE
FORMULE SANDWICH
TARTELETTE
TRADITIONAL BAGUETTE
FICELLE
SAND JB EMMENTAL
SANDWICH COMPLET
BRIOCHE
VIK BREAD
PAIN
PAIN AUX RAISINS
BOISSON 33CL
COMPLET
CAFE OU EAU
COUPE
PAIN AU CHOCOLAT
PAIN BANETTE
BAGUETTE
SPECIAL BREAD
CROISSANT AMANDES
CEREAL BAGUETTE
COOKIE
TRADITIONAL BAGUETTE, COUPE
COUPE, COMPLET
CROISSANT, BAGUETTE
TRADITIONAL BAGUETTE, BAGUETTE
COUPE, BAGUETTE
COUPE, MOISSON
TRADITIONAL BAGUETTE, PAIN AU CHOCOLAT
CAMPAGNE, COUPE
TRADITIONAL BAGUETTE, CROISSANT
COUPE, BOULE 400G
COUPE, VIK BREAD
SPECIAL BREAD, COUPE
PAIN AU CHOCOLAT, CROISSANT
TRADITIONAL BAGUETTE, VIK BREAD
COUPE, BOULE 200G
TRADITIONAL BAGUETTE, PAIN AU CHOCOLAT, CROISSANT


In [32]:
def support(transactions: Iterable[frozenset[str]], pattern: frozenset[str]) -> float:
    count = 0
    for transaction in transactions:
        if pattern.issubset(transaction):
            count += 1
    return count / len(transactions)

def kulczynski(transactions: Iterable[frozenset[str]], a: frozenset[str], b: frozenset[str]) -> float:
    a_union_b = a.union(b)
    support_a_union_b = support(transactions, a_union_b)
    return (support_a_union_b / support(transactions, a) + support_a_union_b / support(transactions, b)) / 2

def ir(transactions: Iterable[frozenset[str]], a: frozenset[str], b: frozenset[str]) -> float:
    a_union_b = a.union(b)
    support_a = support(transactions, a)
    support_b = support(transactions, b)
    support_a_union_b = support(transactions, a_union_b)
    return abs(support_a - support_b) / (support_a + support_b - support_a_union_b)

RESULTS = pd.DataFrame(columns=['a', 'b', 'kulczynski', 'ir'])

for pattern in FREQUENT_PATTERNS:
    if len(pattern) != 2:
        continue
    a, b = pattern
    kulczynski_value = kulczynski(TRANSACTIONS['items'], frozenset([a]), frozenset([b]))
    ir_value = ir(TRANSACTIONS['items'], frozenset([a]), frozenset([b]))
    RESULTS.loc[-1] = {'a': a, 'b': b, 'kulczynski': kulczynski_value, 'ir': ir_value}
    RESULTS.index += 1

In [36]:
RESULTS.sort_values(by='kulczynski', ascending=False)

Unnamed: 0,a,b,kulczynski,ir
0,COUPE,BOULE 200G,0.505527,0.848662
2,PAIN AU CHOCOLAT,CROISSANT,0.491778,0.055304
5,COUPE,BOULE 400G,0.480563,0.757106
7,CAMPAGNE,COUPE,0.479902,0.768759
13,COUPE,COMPLET,0.42886,0.804772
4,COUPE,VIK BREAD,0.425973,0.804601
9,COUPE,MOISSON,0.413541,0.803949
3,SPECIAL BREAD,COUPE,0.372677,0.660667
1,TRADITIONAL BAGUETTE,VIK BREAD,0.267596,0.932626
6,TRADITIONAL BAGUETTE,CROISSANT,0.251705,0.757407
