In [34]:
import pandas as pd
import numpy as np

In [35]:
#dataset found at https://www.kaggle.com/code/shrikantuppin/association-rules-apriori-grocery-dataset/data
#Inspired from machine learning in action 3.x
dataframe = pd.read_csv('groceries.csv', sep=',', header='infer')
data = dataframe.to_numpy()
data = np.delete(data, 0, 1)
data = np.nan_to_num(data)

In [36]:
def createC1(dataSet):
    C1 = []
    for transaction in dataSet:
        for item in transaction:
            if not [item] in C1 and str(item) != "nan":
                C1.append([str(item)])
    C1.sort()
    return list(map(frozenset, C1))
def scanD(D, Ck, minSupport):
    ssCnt = {}
    for tid in D:
        for can in Ck:
            if can.issubset(tid):
                if can not in ssCnt: ssCnt[can]=1
                else: ssCnt[can] += 1
    numItems = float(len(D))
    retList = []
    supportData = {}
    for key in ssCnt:
        support = ssCnt[key]/numItems
        if support >= minSupport:
            retList.insert(0,key)
        supportData[key] = support
    return retList, supportData
def aprioriGen(Lk, k): 
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i+1, lenLk): 
            L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2]
            L1.sort(); L2.sort()
            if L1==L2: 
                retList.append(Lk[i] | Lk[j]) 
    return retList

def apriori(dataSet, minSupport = 0.5):
    C1 = createC1(dataSet)
    D = list(map(set, dataSet))
    L1, supportData = scanD(D, C1, minSupport)
    L = [L1]
    k = 2
    while (len(L[k-2]) > 0):
        Ck = aprioriGen(L[k-2], k)
        Lk, supK = scanD(D, Ck, minSupport)
        supportData.update(supK)
        L.append(Lk)
        k += 1
    return L, supportData
     
C1 = createC1(data)

In [37]:
D = list(map(set,data)) 
L1,suppData0 = scanD(D, C1, 0.10) 
print("Common items:",L1)

Common items: [frozenset({'soda'}), frozenset({'other vegetables'}), frozenset({'rolls/buns'}), frozenset({'whole milk'}), frozenset({'yogurt'})]


In [38]:
L, suppData = apriori(data, minSupport = 0.02)

In [39]:
def calcConf(freqSet, H, supportData, brl, minConf):
    prunedH = []
    for conseq in H:
        conf = supportData[freqSet]/supportData[freqSet-conseq] 
        if conf >= minConf: 
            print(freqSet-conseq,'-->',conseq,'conf:',conf)
            brl.append((freqSet-conseq, conseq, conf))
            prunedH.append(conseq)
    return prunedH
def rulesFromConseq(freqSet, H, supportData, brl, minConf):
    m = len(H[0])
    if (len(freqSet) > (m + 1)):
        Hmp1 = aprioriGen(H, m+1)
        Hmp1 = calcConf(freqSet, Hmp1, supportData, brl, minConf)
        if (len(Hmp1) > 1):   
            rulesFromConseq(freqSet, Hmp1, supportData, brl, minConf)
def generateRules(L, supportData, minConf):  
    bigRuleList = []
    for i in range(1, len(L)):
        for freqSet in L[i]:
            H1 = [frozenset([item]) for item in freqSet]
            if (i > 1):
                rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf)
            else:
                calcConf(freqSet, H1, supportData, bigRuleList, minConf)
    return bigRuleList        

In [40]:
rules = generateRules(L, suppData, 0.4)

frozenset({'pip fruit'}) --> frozenset({'other vegetables'}) conf: 0.430406852248394
frozenset({'pip fruit'}) --> frozenset({'whole milk'}) conf: 0.443254817987152
frozenset({'tropical fruit'}) --> frozenset({'other vegetables'}) conf: 0.4363636363636364
frozenset({'root vegetables'}) --> frozenset({'whole milk'}) conf: 0.47831632653061223
frozenset({'domestic eggs'}) --> frozenset({'whole milk'}) conf: 0.4597902097902098
frozenset({'whipped/sour cream'}) --> frozenset({'whole milk'}) conf: 0.43789808917197454
frozenset({'other vegetables'}) --> frozenset({'whole milk'}) conf: 0.4137214137214137
frozenset({'root vegetables'}) --> frozenset({'other vegetables'}) conf: 0.46428571428571425
frozenset({'yogurt'}) --> frozenset({'whole milk'}) conf: 0.4036617262423714
frozenset({'butter'}) --> frozenset({'whole milk'}) conf: 0.488272921108742
frozenset({'curd'}) --> frozenset({'whole milk'}) conf: 0.49191685912240185
frozenset({'tropical fruit'}) --> frozenset({'whole milk'}) conf: 0.4472727

In [41]:
rules = generateRules(L, suppData, 0.3)

frozenset({'whipped/sour cream'}) --> frozenset({'yogurt'}) conf: 0.31528662420382164
frozenset({'yogurt'}) --> frozenset({'other vegetables'}) conf: 0.31473408892763727
frozenset({'pip fruit'}) --> frozenset({'other vegetables'}) conf: 0.430406852248394
frozenset({'fruit/vegetable juice'}) --> frozenset({'whole milk'}) conf: 0.33333333333333326
frozenset({'margarine'}) --> frozenset({'whole milk'}) conf: 0.35818181818181816
frozenset({'rolls/buns'}) --> frozenset({'whole milk'}) conf: 0.3054989816700611
frozenset({'pip fruit'}) --> frozenset({'whole milk'}) conf: 0.443254817987152
frozenset({'tropical fruit'}) --> frozenset({'other vegetables'}) conf: 0.4363636363636364
frozenset({'pastry'}) --> frozenset({'whole milk'}) conf: 0.35262449528936746
frozenset({'root vegetables'}) --> frozenset({'whole milk'}) conf: 0.47831632653061223
frozenset({'brown bread'}) --> frozenset({'whole milk'}) conf: 0.3407917383820998
frozenset({'domestic eggs'}) --> frozenset({'whole milk'}) conf: 0.459790