## **Apriori Algorithm** 
### **Author:** Hansal Shah 


# **Initializing the dataset**

In [None]:
transactions = {'T100':['I1','I2','I5'], 'T200':['I2','I4'], 'T300':['I2','I3'], 
                'T400':['I1','I2','I4'], 'T500':['I1','I3'], 'T600':['I2','I3'], 
                'T700':['I1','I3'], 'T800':['I1','I2','I3','I5'], 
                'T900':['I1','I2','I3']}

# finding unique items
unique = []

for transaction in transactions:
  for item in transactions[transaction]:
    unique.append(item)

unique = list(set(unique))
unique.sort()
unique

['I1', 'I2', 'I3', 'I4', 'I5']

# **Helper Functions**

In [None]:
def count_frequency(itemset, transactions):
  count = 0
  for transaction in transactions:
    if set(itemset).issubset(set(transactions[transaction])):
      count+=1
  return count

def join(set1, set2, order):
  set1.sort(key=lambda x: order.index(x))
  set2.sort(key=lambda x: order.index(x))

  for i in range(len(set1)-1):
    if set1[i] != set2[i]:
      return []

  if order.index(set1[-1]) < order.index(set2[-1]):
    return set1 + [set2[-1]]

def join_sets(itemsets, order):
  new_candidates = []
  for i in range(len(itemsets)):
    for j in range(i+1, len(itemsets)):
      new_cand = join(itemsets[i], itemsets[j], order)
      if len(new_cand) > 0:
        new_candidates.append(new_cand)
  return new_candidates

def get_frequent_itemsets(itemsets, transactions, min_sup, prev_discarded):
  lnew = []
  l_support_values = []
  c_support_values = []
  new_discarded = []
  num_transactions = len(transactions)
  num_prev_discarded = len(prev_discarded.keys())

  for itemset in itemsets:
    count_c = count_frequency(itemset, transactions)
    c_support_values.append(count_c)

    #checking if they have a discarded subset
    contains_discarded_subset = False
    if num_prev_discarded > 0:
      for discarded_itemset in prev_discarded[num_prev_discarded]:
        if set(discarded_itemset).issubset(set(itemset)):
          contains_discarded_subset = True
          break

    if not contains_discarded_subset:
      count_l = count_frequency(itemset, transactions)
      if count_l >= min_sup:
        lnew.append(itemset)
        l_support_values.append(count_l)
      else:
        new_discarded.append(itemset)
    
  return lnew, l_support_values, c_support_values, new_discarded


def table(frequent_itemsets, support_values):
  print('Itemset  |  Frequency')
  for i in range(len(frequent_itemsets)):
    print('{}   |   {}'.format(frequent_itemsets[i], support_values[i]))
  print('\n')

def print_result(c, l, support_count_C, support_count_L):
  for i in range(1,len(c)+1):
    print("For k={}".format(i))
    print('C{}'.format(i))
    table(c[i], support_count_C[i])
    print('L{}'.format(i))
    table(l[i], support_count_L[i])
    print('\n')

# **Initializing the candiadate and frequent itemsets**

In [None]:
k = 1 
c = {k: [[item] for item in unique]}
discarded_itemsets = {k: []}
l = {}
support_count_L = {}
support_count_C = {}

# Taking the input for the minimum support value
min_sup = int(input("Enter the value of minimum support: "))
print('The minimum support value is: {}'.format(min_sup))

Enter the value of minimum support: 2
The minimum support value is: 2


### **Generating the first candidate and frequent itemsets**

In [None]:
frequent_itemsets, l_support_values, c_support_values, new_discarded = get_frequent_itemsets(c[k], transactions, min_sup, discarded_itemsets)
discarded_itemsets.update({k: new_discarded})
l.update({k: frequent_itemsets})
support_count_L.update({k: l_support_values})
support_count_C.update({k: c_support_values})

### **Producing candidate and frequent itemsets till terminating condition**

In [None]:
k+=1

while True:
  c[k] = join_sets(l[k-1], unique)
  frequent_itemsets, l_support_values, c_support_values, new_discarded = get_frequent_itemsets(c[k], transactions, min_sup, discarded_itemsets)
  discarded_itemsets.update({k: new_discarded})
  l.update({k: frequent_itemsets})
  support_count_L.update({k: l_support_values})
  support_count_C.update({k: c_support_values})

  if(len(l[k])==0):
    break
  
  k+=1

# **Results**

In [None]:
print_result(c,l,support_count_C, support_count_L)

For k=1
C1
Itemset  |  Frequency
['I1']   |   6
['I2']   |   7
['I3']   |   6
['I4']   |   2
['I5']   |   2


L1
Itemset  |  Frequency
['I1']   |   6
['I2']   |   7
['I3']   |   6
['I4']   |   2
['I5']   |   2




For k=2
C2
Itemset  |  Frequency
['I1', 'I2']   |   4
['I1', 'I3']   |   4
['I1', 'I4']   |   1
['I1', 'I5']   |   2
['I2', 'I3']   |   4
['I2', 'I4']   |   2
['I2', 'I5']   |   2
['I3', 'I4']   |   0
['I3', 'I5']   |   1
['I4', 'I5']   |   0


L2
Itemset  |  Frequency
['I1', 'I2']   |   4
['I1', 'I3']   |   4
['I1', 'I5']   |   2
['I2', 'I3']   |   4
['I2', 'I4']   |   2
['I2', 'I5']   |   2




For k=3
C3
Itemset  |  Frequency
['I1', 'I2', 'I3']   |   2
['I1', 'I2', 'I5']   |   2
['I1', 'I3', 'I5']   |   1
['I2', 'I3', 'I4']   |   0
['I2', 'I3', 'I5']   |   1
['I2', 'I4', 'I5']   |   0


L3
Itemset  |  Frequency
['I1', 'I2', 'I3']   |   2
['I1', 'I2', 'I5']   |   2




For k=4
C4
Itemset  |  Frequency
['I1', 'I2', 'I3', 'I5']   |   1


L4
Itemset  |  Frequency






# **Generating Association Rules**

### **Helper functions**

In [None]:
powerset = []

def generate_powerset(subset, index, itemset):

  if(index==len(itemset)):
    powerset.append(subset.copy())
    return

  #excluding the element
  generate_powerset(subset, index+1, itemset)

  #including the element
  subset.append(itemset[index])
  generate_powerset(subset, index+1, itemset)
  subset.pop()


def generate_association_rules(l):
  possible_association_rules = {}
  final_association_rules = {}
  for i in range(2,len(l)):
    for itemset in l[i]:
      powerset.clear()
      generate_powerset([], 0, itemset)
      powerset.pop(0) 
      powerset.pop(-1)

      for s in powerset:
        itemset_s = list(set(itemset)-set(s))
        count_s = count_frequency(s, transactions)
        count_itemset = count_frequency(itemset, transactions)

        rule = '{}=>{}'.format(set(s),set(itemset_s))
        confidence = (count_itemset/count_s)

        possible_association_rules[rule]=confidence

        if confidence>=min_conf:
          final_association_rules[rule] = confidence

  return possible_association_rules, final_association_rules

def print_rules_values(frequent_itemsets, confidence_values):
  print('Rule  |  Confidence')
  for i in range(len(frequent_itemsets)):
    print('{}   |   {}'.format(frequent_itemsets[i], confidence_values[i]))
  print('\n')

def print_rules_percentage(frequent_itemsets, confidence_values):
  print('Rule  |  Confidence(%)')
  for i in range(len(frequent_itemsets)):
    print('{}   |   {}'.format(frequent_itemsets[i], confidence_values[i]*100))
  print('\n')

### **Initializing the minimum confidence**

In [None]:
min_conf = float(input('Enter the value of minimum confidence: '))


Enter the value of minimum confidence: 0.6


### **Generating the association rules**

In [None]:
possible_association_rules, final_association_rules = generate_association_rules(l)

### **Printing the frequent itemsets and their support count**

In [None]:
print("Final frequent itemsets: \n")

for i in range(1,len(l)+1):
    if len(l[i])!=0:
      print("For k={}".format(i))
      print('L{}'.format(i))
      table(l[i], support_count_L[i])

Final frequent itemsets: 

For k=1
L1
Itemset  |  Frequency
['I1']   |   6
['I2']   |   7
['I3']   |   6
['I4']   |   2
['I5']   |   2


For k=2
L2
Itemset  |  Frequency
['I1', 'I2']   |   4
['I1', 'I3']   |   4
['I1', 'I5']   |   2
['I2', 'I3']   |   4
['I2', 'I4']   |   2
['I2', 'I5']   |   2


For k=3
L3
Itemset  |  Frequency
['I1', 'I2', 'I3']   |   2
['I1', 'I2', 'I5']   |   2




### **Printing the minimum confidence**

In [None]:
print("The value of minimum confidence is: {}".format(min_conf))
print("The value of minimum support is: {}".format(min_sup))

The value of minimum confidence is: 0.6
The value of minimum support is: 2


### **Printing the tentative set of rules and their confidence**

In [None]:
print_rules_values(list(possible_association_rules.keys()), list(possible_association_rules.values()))

Rule  |  Confidence
{'I2'}=>{'I1'}   |   0.5714285714285714
{'I1'}=>{'I2'}   |   0.6666666666666666
{'I3'}=>{'I1'}   |   0.6666666666666666
{'I1'}=>{'I3'}   |   0.6666666666666666
{'I5'}=>{'I1'}   |   1.0
{'I1'}=>{'I5'}   |   0.3333333333333333
{'I3'}=>{'I2'}   |   0.6666666666666666
{'I2'}=>{'I3'}   |   0.5714285714285714
{'I4'}=>{'I2'}   |   1.0
{'I2'}=>{'I4'}   |   0.2857142857142857
{'I5'}=>{'I2'}   |   1.0
{'I2'}=>{'I5'}   |   0.2857142857142857
{'I3'}=>{'I2', 'I1'}   |   0.3333333333333333
{'I2'}=>{'I3', 'I1'}   |   0.2857142857142857
{'I3', 'I2'}=>{'I1'}   |   0.5
{'I1'}=>{'I3', 'I2'}   |   0.3333333333333333
{'I3', 'I1'}=>{'I2'}   |   0.5
{'I2', 'I1'}=>{'I3'}   |   0.5
{'I5'}=>{'I2', 'I1'}   |   1.0
{'I2'}=>{'I5', 'I1'}   |   0.2857142857142857
{'I2', 'I5'}=>{'I1'}   |   1.0
{'I1'}=>{'I2', 'I5'}   |   0.3333333333333333
{'I5', 'I1'}=>{'I2'}   |   1.0
{'I2', 'I1'}=>{'I5'}   |   0.5




### **Printing the final set of rules and their (confidence %)**

In [None]:
print_rules_percentage(list(final_association_rules.keys()), list(final_association_rules.values()))

Rule  |  Confidence(%)
{'I1'}=>{'I2'}   |   66.66666666666666
{'I3'}=>{'I1'}   |   66.66666666666666
{'I1'}=>{'I3'}   |   66.66666666666666
{'I5'}=>{'I1'}   |   100.0
{'I3'}=>{'I2'}   |   66.66666666666666
{'I4'}=>{'I2'}   |   100.0
{'I5'}=>{'I2'}   |   100.0
{'I5'}=>{'I2', 'I1'}   |   100.0
{'I2', 'I5'}=>{'I1'}   |   100.0
{'I5', 'I1'}=>{'I2'}   |   100.0


