In [0]:
import tqdm
import pandas as pd

import tqdm

In [0]:
def createC1(dataSet):
    C1 = []
    for transaction in dataSet:
        for item in transaction:
            if not [item] in C1:
                C1.append([item])
                
    C1.sort()
    return list(map(frozenset, C1))#use frozen set so we
                            #can use it as a key in a dict
                          
def scanD(D, Ck, minSupport):
    ssCnt = {}
    for tid in D:
        for can in Ck:
            if can.issubset(tid):
                if not can in ssCnt: 
                  ssCnt[can]=1
                else: 
                  ssCnt[can] += 1
    numItems = float(len(D))
    retList = []
    supportData = {}
    for key in ssCnt:
        support = ssCnt[key]/numItems
        if support >= minSupport:
            retList.insert(0,key)
        supportData[key] = support
    return retList, supportData

def aprioriGen(Lk, k): #creates Ck
    retList = []
    lenLk = len(Lk)
    for i in range(lenLk):
        for j in range(i+1, lenLk): 
            L1 = list(Lk[i])[:k-2]
            L2 = list(Lk[j])[:k-2]
            L1.sort()
            L2.sort()
            if L1==L2: #if first k-2 elements are equal
                retList.append(Lk[i] | Lk[j]) #set union
    return retList


def apriori(dataSet, minSupport = 0.5):
    C1 = createC1(dataSet)
    D = list(map(set, dataSet))
    L1, supportData = scanD(D, C1, minSupport)
    L = [L1]
    k = 2
    while (len(L[k-2]) > 0):
        Ck = aprioriGen(L[k-2], k)
        Lk, supK = scanD(D, Ck, minSupport)#scan DB to get Lk
        supportData.update(supK)
        L.append(Lk)
        k += 1
    return L, supportData

def rulesFromConseq(freqSet, H, supportData, brl, minConf=0.7):
    m = len(H[0])
    if (len(freqSet) > (m + 1)): #try further merging
        Hmp1 = aprioriGen(H, m+1)#create Hm+1 new candidates
        Hmp1 = calcConf(freqSet, Hmp1, supportData, brl, minConf)
        if (len(Hmp1) > 1):    #need at least two sets to merge
            rulesFromConseq(freqSet, Hmp1, supportData, brl, minConf)

def calcConf(freqSet, H, supportData, brl, minConf=0.7):
    prunedH = [] #create new list to return
    for conseq in H:
        conf = round(supportData[freqSet]/supportData[freqSet-conseq], 4) #calc confidence
        lift = round(conf / supportData[conseq], 4)
        leverage = round(supportData[freqSet] - supportData[freqSet-conseq] * supportData[conseq], 4)
        intrest = round(supportData[freqSet]/(supportData[freqSet-conseq] * supportData[conseq]), 4)
        '''
        Intrest > 1 Complementarity effects between X and Y
        Intrest = 1 Conditional independence between X and Y
        Intrest < 1 Substitutability effects between X and Y
        '''
        if conf >= minConf: 
            print (freqSet-conseq,'-->',conseq,'conf:',conf, 'lift:', lift, 'leverage:', leverage, 'intrest:', intrest)
            brl.append((freqSet-conseq, conseq, conf, lift, leverage, intrest))
            prunedH.append(conseq)
    return prunedH

def generateRules(L, supportData, minConf=0.7):  #supportData is a dict coming from scanD
    bigRuleList = []
    for i in tqdm.tqdm(range(1, len(L))):#only get the sets with two or more items
        for freqSet in L[i]:
            H1 = [frozenset([item]) for item in freqSet]
            if (i > 1):
                rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf)
            else:
                calcConf(freqSet, H1, supportData, bigRuleList, minConf)
    return bigRuleList

def runApriori(dataSet, minSupport=0.5, minConf=0.5):
  L,suppData = apriori(dataSet, minSupport)
  rules= generateRules(L,suppData, minConf=minConf)
  return L, suppData, rules

In [74]:
dataset = [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
L,suppData, rules = runApriori(dataset, 0.5, 0.7)

100%|██████████| 3/3 [00:00<00:00, 782.57it/s]

frozenset({5}) --> frozenset({2}) conf: 1.0 lift: 1.3333 leverage: 0.1875 intrest: 1.3333
frozenset({2}) --> frozenset({5}) conf: 1.0 lift: 1.3333 leverage: 0.1875 intrest: 1.3333
frozenset({1}) --> frozenset({3}) conf: 1.0 lift: 1.3333 leverage: 0.125 intrest: 1.3333





In [0]:
df = []
with open('/content/drive/My Drive/test.txt') as f:
    for x in f:
        df.append(x.split())
df = list(map(lambda x: list(map(int, x[0].split(","))), df))

In [67]:
L,suppData, rules = runApriori(df, 0.4, 0.7)

100%|██████████| 2/2 [00:00<00:00, 396.14it/s]

frozenset({1}) --> frozenset({2}) conf: 1.0 lift: 1.1667 leverage: 0.0612 intrest: 1.1667
frozenset({3}) --> frozenset({2}) conf: 0.75 lift: 0.875 leverage: -0.0612 intrest: 0.875
frozenset({4}) --> frozenset({2}) conf: 0.8 lift: 0.9333 leverage: -0.0408 intrest: 0.9333
frozenset({3}) --> frozenset({4}) conf: 0.75 lift: 1.05 leverage: 0.0204 intrest: 1.05





RETAIL DATASET

In [0]:
df = []
with open('/content/drive/My Drive/retail.dat') as f:
    for x in f:
        df.append(x.split("\n"))
df = list(map(lambda x: list(map(int, x[0].split())), df))
df = df[1:]

In [82]:
L, suppData, rules = runApriori(df, 0.01, 0.7)

100%|██████████| 4/4 [00:00<00:00, 183.01it/s]

frozenset({2238}) --> frozenset({39}) conf: 0.7504 lift: 1.3055 leverage: 0.0034 intrest: 1.3056
frozenset({310}) --> frozenset({39}) conf: 0.714 lift: 1.2422 leverage: 0.0041 intrest: 1.2421
frozenset({286}) --> frozenset({38}) conf: 0.9434 lift: 5.3328 leverage: 0.0103 intrest: 5.3326
frozenset({225}) --> frozenset({39}) conf: 0.7218 lift: 1.2557 leverage: 0.0054 intrest: 1.2558
frozenset({255}) --> frozenset({39}) conf: 0.7171 lift: 1.2476 leverage: 0.0024 intrest: 1.2476
frozenset({255}) --> frozenset({48}) conf: 0.7171 lift: 1.5004 leverage: 0.004 intrest: 1.5004
frozenset({170}) --> frozenset({38}) conf: 0.9781 lift: 5.529 leverage: 0.0282 intrest: 5.5288
frozenset({110}) --> frozenset({38}) conf: 0.9753 lift: 5.5132 leverage: 0.0253 intrest: 5.5132
frozenset({89}) --> frozenset({39}) conf: 0.7164 lift: 1.2463 leverage: 0.0062 intrest: 1.2464
frozenset({89}) --> frozenset({48}) conf: 0.7292 lift: 1.5257 leverage: 0.0109 intrest: 1.5258
frozenset({36}) --> frozenset({38}) conf: 0.




In [70]:
L

[[frozenset({16217}),
  frozenset({16010}),
  frozenset({15832}),
  frozenset({14098}),
  frozenset({13041}),
  frozenset({12925}),
  frozenset({10515}),
  frozenset({3270}),
  frozenset({2958}),
  frozenset({2238}),
  frozenset({1393}),
  frozenset({1327}),
  frozenset({1146}),
  frozenset({1004}),
  frozenset({956}),
  frozenset({824}),
  frozenset({783}),
  frozenset({9}),
  frozenset({740}),
  frozenset({19}),
  frozenset({677}),
  frozenset({604}),
  frozenset({592}),
  frozenset({589}),
  frozenset({548}),
  frozenset({533}),
  frozenset({522}),
  frozenset({479}),
  frozenset({475}),
  frozenset({438}),
  frozenset({413}),
  frozenset({338}),
  frozenset({310}),
  frozenset({301}),
  frozenset({286}),
  frozenset({271}),
  frozenset({270}),
  frozenset({264}),
  frozenset({258}),
  frozenset({255}),
  frozenset({249}),
  frozenset({242}),
  frozenset({237}),
  frozenset({225}),
  frozenset({201}),
  frozenset({185}),
  frozenset({179}),
  frozenset({175}),
  frozenset({170}),
  

In [71]:
suppData

{frozenset({30}): 0.006125157382516079,
 frozenset({31}): 0.010435453318360726,
 frozenset({32}): 0.17203752226040994,
 frozenset({33}): 0.00041968670954276835,
 frozenset({34}): 0.0001588003765837502,
 frozenset({35}): 0.0008847449552523225,
 frozenset({36}): 0.03330270754642075,
 frozenset({37}): 0.012182257460781979,
 frozenset({38}): 0.1769036195142977,
 frozenset({39}): 0.5748006488129672,
 frozenset({40}): 0.002393348532797949,
 frozenset({41}): 0.16951940200315332,
 frozenset({42}): 0.0011456312882113407,
 frozenset({43}): 0.0024840916051315208,
 frozenset({44}): 0.0022232052721725027,
 frozenset({45}): 0.010333367361985459,
 frozenset({46}): 0.0008507163031272331,
 frozenset({47}): 0.00503624051451322,
 frozenset({48}): 0.47793241909687956,
 frozenset({49}): 0.012704030126700015,
 frozenset({50}): 0.002824378126382414,
 frozenset({51}): 0.0022118623881308063,
 frozenset({52}): 0.005682784904889917,
 frozenset({53}): 0.005773527977223488,
 frozenset({54}): 0.0009414593754608047,

In [72]:
rules

[(frozenset({2238}), frozenset({39}), 0.7504, 1.3055, 0.0034, 1.3056),
 (frozenset({310}), frozenset({39}), 0.714, 1.2422, 0.0041, 1.2421),
 (frozenset({286}), frozenset({38}), 0.9434, 5.3328, 0.0103, 5.3326),
 (frozenset({225}), frozenset({39}), 0.7218, 1.2557, 0.0054, 1.2558),
 (frozenset({255}), frozenset({39}), 0.7171, 1.2476, 0.0024, 1.2476),
 (frozenset({255}), frozenset({48}), 0.7171, 1.5004, 0.004, 1.5004),
 (frozenset({170}), frozenset({38}), 0.9781, 5.529, 0.0282, 5.5288),
 (frozenset({110}), frozenset({38}), 0.9753, 5.5132, 0.0253, 5.5132),
 (frozenset({89}), frozenset({39}), 0.7164, 1.2463, 0.0062, 1.2464),
 (frozenset({89}), frozenset({48}), 0.7292, 1.5257, 0.0109, 1.5258),
 (frozenset({36}), frozenset({38}), 0.9503, 5.3719, 0.0258, 5.3717),
 (frozenset({37}), frozenset({38}), 0.9739, 5.5053, 0.0097, 5.5054),
 (frozenset({41}), frozenset({39}), 0.7637, 1.3286, 0.032, 1.3287),
 (frozenset({48, 110}), frozenset({38, 39}), 0.7471, 6.3669, 0.0099, 6.3669),
 (frozenset({48, 170

In [83]:
for i in rules:
  if i[5] > 5 and i[4] > 0.01:
    print(i)

(frozenset({286}), frozenset({38}), 0.9434, 5.3328, 0.0103, 5.3326)
(frozenset({170}), frozenset({38}), 0.9781, 5.529, 0.0282, 5.5288)
(frozenset({110}), frozenset({38}), 0.9753, 5.5132, 0.0253, 5.5132)
(frozenset({36}), frozenset({38}), 0.9503, 5.3719, 0.0258, 5.3717)
(frozenset({48, 170}), frozenset({38, 39}), 0.7662, 6.5296, 0.0115, 6.5298)
(frozenset({48, 36}), frozenset({38, 39}), 0.7627, 6.4998, 0.0104, 6.4999)


KOSARAK DATA

In [0]:
df = []
with open('/content/drive/My Drive/kosarak.dat') as f:
    for x in f:
        df.append(x.split("\n"))
df = list(map(lambda x: list(map(int, x[0].split())), df))

In [87]:
L, suppData, rules = runApriori(df, 0.01, 0.85)

100%|██████████| 5/5 [00:00<00:00, 41.93it/s]

frozenset({987}) --> frozenset({6}) conf: 0.9228 lift: 1.5191 leverage: 0.0057 intrest: 1.5192
frozenset({897}) --> frozenset({6}) conf: 0.914 lift: 1.5047 leverage: 0.0044 intrest: 1.5047
frozenset({897}) --> frozenset({7}) conf: 0.8917 lift: 10.1589 leverage: 0.0116 intrest: 10.1584
frozenset({737}) --> frozenset({6}) conf: 0.8785 lift: 1.4462 leverage: 0.0048 intrest: 1.4462
frozenset({512}) --> frozenset({6}) conf: 0.9607 lift: 1.5815 leverage: 0.0046 intrest: 1.5815
frozenset({512}) --> frozenset({11}) conf: 0.8587 lift: 2.3351 leverage: 0.0064 intrest: 2.335
frozenset({504}) --> frozenset({6}) conf: 0.8631 lift: 1.4209 leverage: 0.0038 intrest: 1.4209
frozenset({25}) --> frozenset({6}) conf: 0.9645 lift: 1.5878 leverage: 0.0046 intrest: 1.5878
frozenset({364}) --> frozenset({6}) conf: 0.9661 lift: 1.5904 leverage: 0.0044 intrest: 1.5905
frozenset({364}) --> frozenset({11}) conf: 0.8525 lift: 2.3182 leverage: 0.006 intrest: 2.3181
frozenset({378}) --> frozenset({6}) conf: 0.9317 l




In [88]:
L

[[frozenset({987}),
  frozenset({897}),
  frozenset({737}),
  frozenset({667}),
  frozenset({512}),
  frozenset({504}),
  frozenset({490}),
  frozenset({446}),
  frozenset({438}),
  frozenset({423}),
  frozenset({378}),
  frozenset({364}),
  frozenset({361}),
  frozenset({316}),
  frozenset({314}),
  frozenset({303}),
  frozenset({294}),
  frozenset({278}),
  frozenset({273}),
  frozenset({269}),
  frozenset({254}),
  frozenset({229}),
  frozenset({218}),
  frozenset({215}),
  frozenset({205}),
  frozenset({155}),
  frozenset({148}),
  frozenset({138}),
  frozenset({136}),
  frozenset({135}),
  frozenset({91}),
  frozenset({90}),
  frozenset({87}),
  frozenset({83}),
  frozenset({77}),
  frozenset({73}),
  frozenset({69}),
  frozenset({64}),
  frozenset({56}),
  frozenset({55}),
  frozenset({49}),
  frozenset({40}),
  frozenset({32}),
  frozenset({28}),
  frozenset({27}),
  frozenset({25}),
  frozenset({14}),
  frozenset({11}),
  frozenset({7}),
  frozenset({6}),
  frozenset({4}),
  fr

In [89]:
suppData

{frozenset({1}): 0.19951676865299262,
 frozenset({2}): 0.04336051846359906,
 frozenset({3}): 0.4545758493417185,
 frozenset({4}): 0.07888569922080965,
 frozenset({5}): 0.005989886889117396,
 frozenset({6}): 0.6074472576823077,
 frozenset({7}): 0.08777558025135303,
 frozenset({8}): 2.6262573206922815e-05,
 frozenset({9}): 0.0015343403346659906,
 frozenset({10}): 0.0002969690970321272,
 frozenset({11}): 0.36774168132993673,
 frozenset({12}): 0.00592726075300858,
 frozenset({13}): 0.0006969682889529516,
 frozenset({14}): 0.011724218738952042,
 frozenset({15}): 5.8585740230827815e-05,
 frozenset({16}): 0.0003191912743610619,
 frozenset({17}): 0.0024616111886642655,
 frozenset({18}): 0.005976755602513934,
 frozenset({19}): 0.0011777753984335386,
 frozenset({20}): 0.004549485758614629,
 frozenset({21}): 0.008851497269702486,
 frozenset({22}): 2.1212078359437658e-05,
 frozenset({23}): 0.001094947282934782,
 frozenset({24}): 0.002353530598928083,
 frozenset({25}): 0.012795933745588392,
 frozen

In [90]:
rules

[(frozenset({987}), frozenset({6}), 0.9228, 1.5191, 0.0057, 1.5192),
 (frozenset({897}), frozenset({6}), 0.914, 1.5047, 0.0044, 1.5047),
 (frozenset({897}), frozenset({7}), 0.8917, 10.1589, 0.0116, 10.1584),
 (frozenset({737}), frozenset({6}), 0.8785, 1.4462, 0.0048, 1.4462),
 (frozenset({512}), frozenset({6}), 0.9607, 1.5815, 0.0046, 1.5815),
 (frozenset({512}), frozenset({11}), 0.8587, 2.3351, 0.0064, 2.335),
 (frozenset({504}), frozenset({6}), 0.8631, 1.4209, 0.0038, 1.4209),
 (frozenset({25}), frozenset({6}), 0.9645, 1.5878, 0.0046, 1.5878),
 (frozenset({364}), frozenset({6}), 0.9661, 1.5904, 0.0044, 1.5905),
 (frozenset({364}), frozenset({11}), 0.8525, 2.3182, 0.006, 2.3181),
 (frozenset({378}), frozenset({6}), 0.9317, 1.5338, 0.0038, 1.5338),
 (frozenset({423}), frozenset({6}), 0.9712, 1.5988, 0.004, 1.5988),
 (frozenset({438}), frozenset({6}), 0.972, 1.6001, 0.007, 1.6001),
 (frozenset({438}), frozenset({11}), 0.8789, 2.39, 0.0098, 2.39),
 (frozenset({314}), frozenset({6}), 0.89

In [91]:
for i in rules:
  if i[5] > 10 and i[4] > 0.01 and i[3] > 10:
    print(i)

(frozenset({897}), frozenset({7}), 0.8917, 10.1589, 0.0116, 10.1584)
(frozenset({87}), frozenset({7}), 0.8926, 10.1691, 0.015, 10.1692)
(frozenset({11, 987}), frozenset({148, 6}), 0.8618, 13.1766, 0.0106, 13.1766)
(frozenset({11, 87}), frozenset({6, 7}), 0.9182, 12.3491, 0.0118, 12.3489)
(frozenset({148, 7}), frozenset({218, 6}), 0.8891, 11.332, 0.0122, 11.3319)
(frozenset({1, 148}), frozenset({218, 6}), 0.8809, 11.2275, 0.0227, 11.2277)
(frozenset({11, 148}), frozenset({218, 6}), 0.8943, 11.3982, 0.046, 11.3984)
(frozenset({27, 87}), frozenset({6, 7}), 0.8916, 11.9914, 0.0111, 11.9908)
(frozenset({11, 148, 7}), frozenset({218, 6}), 0.9104, 11.6034, 0.0111, 11.6031)
(frozenset({11, 3, 148}), frozenset({218, 6}), 0.8893, 11.3345, 0.0198, 11.3349)
(frozenset({1, 218, 11}), frozenset({148, 6}), 0.8703, 13.3065, 0.021, 13.3059)
(frozenset({1, 11, 148}), frozenset({218, 6}), 0.9219, 11.75, 0.0207, 11.7502)
