## Load Data

In [2]:
import numpy as np
from libs.utils import *

In [3]:
path_to_data = "dataset/data_1.txt"
min_support = 2/9
min_confidence = 0.3
order = ['I' + str(i) for i in range(1,6)]

In [4]:
transactions = load_transactions(path_to_data, order)

In [5]:
transactions
num_trans = len(transactions)

## Initialization 

In [7]:
C = {}
L = {}
itemset_size = 1
discarded = {itemset_size: []}
C.update({itemset_size: [[f] for f in order]})

In [8]:
C

{1: [['I1'], ['I2'], ['I3'], ['I4'], ['I5']]}

__Create L1__

In [10]:
supp_count_L = {}
f, sup, new_discarded = get_frequent(C[itemset_size], transactions, min_support, discarded)
discarded.update({itemset_size : new_discarded})
L.update({itemset_size : f})
supp_count_L.update({itemset_size : sup})

In [11]:
print_table(L[1], supp_count_L[1])

Itemset | Frequency
['I1']  :  6
['I2']  :  7
['I3']  :  6
['I4']  :  2
['I5']  :  2




## Apriori algorithm

In [13]:
k = itemset_size + 1
convergence = False
while not convergence:
    C.update({ k : join_set_itemsets(L[k-1], order)})
    f, sup, new_discarded = get_frequent(C[k], transactions, min_support, discarded) 
    discarded.update({k : new_discarded})
    L.update({k : f})
    supp_count_L.update({k : sup})
    if len(L[k]) == 0:
        convergence = True
    else:
        print("Table L{}: \n".format(k))
        print_table(L[k], supp_count_L[k])
    k += 1

Table L2: 

Itemset | Frequency
['I1', 'I2']  :  4
['I1', 'I3']  :  4
['I1', 'I5']  :  2
['I2', 'I3']  :  4
['I2', 'I4']  :  2
['I2', 'I5']  :  2


Table L3: 

Itemset | Frequency
['I1', 'I2', 'I3']  :  2
['I1', 'I2', 'I5']  :  2




## Generating the Association rules

In [28]:
assoc_rules_str = ""

for i in range(1, len(L)):
    for j in range(len(L[i])):
        s = powerset(L[i][j])
        for z in s:
            S = set(z)
            X = set(L[i][j])
            X_S = set(X-S)
            sup_x = supp_count_L[i][j]
            sup_x_s = count_occurences(X_S, transactions)
            conf = sup_x / count_occurences(S, transactions)
            lift = conf / (sup_x_s / num_trans)
            if conf >= min_confidence:
                assoc_rules_str += write_rules(X, X_S, S, conf, sup_x, lift, num_trans)

In [30]:
print(assoc_rules_str)

Freq. Itemset: {'I1', 'I2'}
    Rule: ['I1'] -> ['I2'] 
    Conf: 0.667     Supp: 0.444     Lift: 0.857 
Freq. Itemset: {'I1', 'I2'}
    Rule: ['I2'] -> ['I1'] 
    Conf: 0.571     Supp: 0.444     Lift: 0.857 
Freq. Itemset: {'I3', 'I1'}
    Rule: ['I1'] -> ['I3'] 
    Conf: 0.667     Supp: 0.444     Lift: 1.000 
Freq. Itemset: {'I3', 'I1'}
    Rule: ['I3'] -> ['I1'] 
    Conf: 0.667     Supp: 0.444     Lift: 1.000 
Freq. Itemset: {'I1', 'I5'}
    Rule: ['I1'] -> ['I5'] 
    Conf: 0.333     Supp: 0.222     Lift: 1.500 
Freq. Itemset: {'I1', 'I5'}
    Rule: ['I5'] -> ['I1'] 
    Conf: 1.000     Supp: 0.222     Lift: 1.500 
Freq. Itemset: {'I3', 'I2'}
    Rule: ['I2'] -> ['I3'] 
    Conf: 0.571     Supp: 0.444     Lift: 0.857 
Freq. Itemset: {'I3', 'I2'}
    Rule: ['I3'] -> ['I2'] 
    Conf: 0.667     Supp: 0.444     Lift: 0.857 
Freq. Itemset: {'I2', 'I4'}
    Rule: ['I4'] -> ['I2'] 
    Conf: 1.000     Supp: 0.222     Lift: 1.286 
Freq. Itemset: {'I5', 'I2'}
    Rule: ['I5'] -> ['I2'] 