In [1]:
import numpy as np
import pandas as pd
from itertools import combinations,chain

# User input for data
dataset_input = int(input("Please Select the Dataset from the menu: \n 1. Amazon \n 2. Best Buy \n 3. K-Mart \n 4. Nike \n 5. Generic \n 6. Exit \n Your Selection is: "))

if dataset_input == 6:
    quit()
    
# load csv
datasets_list = ('Amazon', 'BestBuy', 'K-Mart', 'Nike', 'Generic')
df_tr=pd.read_csv("Dataset_" + datasets_list[dataset_input-1] +".csv")
df_itemset=pd.read_csv("Itemset_" + datasets_list[dataset_input-1] +".csv")
print("You have selected dataset located in Dataset_"+ datasets_list[dataset_input-1] +".csv")

# set order
order = sorted(df_itemset['Item Name'])

dataset = []
for lines in df_tr['Transaction']:
    trans = list(lines.strip().split(', '))
    trans_1= list(np.unique(trans))
    trans_1.sort(key=lambda x: order.index(x))
    dataset.append(sorted(trans_1))
    
trans_num = len(dataset)

# User Input for Support and Confidence
minimum_support = int(input("Enter Minimum Support in % (value from 1 to 100): "))
minimum_confidence = int(input("Enter Minimum Confidence in % (value from 1 to 100): "))

Please Select the Dataset from the menu: 
 1. Amazon 
 2. Best Buy 
 3. K-Mart 
 4. Nike 
 5. Generic 
 6. Exit 
 Your Selection is: 1
You have selected dataset located in Dataset_Amazon.csv
Enter Minimum Support in % (value from 1 to 100): 30
Enter Minimum Confidence in % (value from 1 to 100): 40


In [2]:
C = {}
L = {}
sup_count_L = {}
itemset_size = 1
non_frequent = {itemset_size:[]}
C.update({itemset_size:[[f] for f in order]})

In [3]:
C

{1: [['A Beginner’s Guide'],
  ['Android Programming: The Big Nerd Ranch'],
  ['Beginning Programming with Java'],
  ['C++ Programming in Easy Steps'],
  ['Effective Java (2nd Edition)'],
  ['HTML and CSS: Design and Build Websites'],
  ['Head First Java 2nd Edition'],
  ['Java 8 Pocket Guide'],
  ['Java For Dummies'],
  ['Java: The Complete Reference']]}

In [4]:
def count_items(itemset, dataset):
    count=0
    for i in range (0, len(dataset)):
        if set(itemset).issubset(set(dataset[i])):
            count += 1
    return count

In [5]:
def frequent_itemsets(itemsets, dataset, minimum_support, non_frequent):
    L = []
    sup_count = []
    new_non_frequent = []
    trans_num = len(dataset)
    K = len(non_frequent.keys())
    for i in range(0, len(itemsets)):
        temp = 0
        if K>0:
            for j in non_frequent[K]:
                if set(j).issubset(set(itemsets[i])):
                    temp = 1
                    break
        if temp == 0:
            freq_count = count_items(itemsets[i], dataset)
            if freq_count >= (minimum_support/100)*trans_num:
                L.append(itemsets[i])
                sup_count.append(freq_count)
            else:
                new_non_frequent.append(itemsets[i])
    return L, sup_count, new_non_frequent

In [6]:
def print_table (table, sup_count):
        print("Itemset | Count")
        for i in range (0, len(table)):
            print("{} : {}".format(table[i], sup_count[i]))
        print("\n\n")

In [7]:
def get_candidate_set(items, order):
    Temp = []
    for i in range (0, len(items)):
        for j in range (i+1, len(items)):
            items_1 = join_itemsets(items[i],items[j],order)
            if len(items_1) > 0:
                Temp.append(items_1)
    return Temp

In [8]:
def join_itemsets (item_1, item_2, order):
    item_1.sort(key=lambda x: order.index(x))
    item_2.sort(key=lambda x: order.index(x))
        
    for i in range (0, len(item_1)-1):
        if item_1[i] != item_2[i]:
            return []
    if order.index(item_1[-1]) < order.index(item_2[-1]):
        return item_1 + [item_2[-1]]
    return []

In [9]:
def all_subsets(x):
    s = list(x)
    subsets = list(chain.from_iterable(combinations(s, r) for r in range(1,len(s)+1)))
    return subsets

In [10]:
def write_assoc_rules(item_2, item_1, conf, support, trans_num, rule_no):
    assoc_rules = ""
    assoc_rules += "Rule {}: {} -> {} \n".format(rule_no, list(item_1), list(item_2))
    assoc_rules += "Confidence: {0:2.2f}% \n".format(conf*100)
    assoc_rules += "Support: {0:2.2f}% \n\n".format((support/trans_num)*100)
    return assoc_rules

In [11]:
freq_set, support, new_non_frequent = frequent_itemsets(C[itemset_size], dataset, minimum_support, non_frequent)
L.update({itemset_size : freq_set})
non_frequent.update({itemset_size : new_non_frequent})
sup_count_L.update({itemset_size : support})

In [12]:
print("\nTable C1: \n")
print_table(C[1], [count_items(item, dataset) for item in C[1]])
print("\nTable L1: \n")
print_table(L[1], sup_count_L[1])


Table C1: 

Itemset | Count
['A Beginner’s Guide'] : 11
['Android Programming: The Big Nerd Ranch'] : 13
['Beginning Programming with Java'] : 6
['C++ Programming in Easy Steps'] : 1
['Effective Java (2nd Edition)'] : 0
['HTML and CSS: Design and Build Websites'] : 2
['Head First Java 2nd Edition'] : 8
['Java 8 Pocket Guide'] : 4
['Java For Dummies'] : 13
['Java: The Complete Reference'] : 10




Table L1: 

Itemset | Count
['A Beginner’s Guide'] : 11
['Android Programming: The Big Nerd Ranch'] : 13
['Beginning Programming with Java'] : 6
['Head First Java 2nd Edition'] : 8
['Java For Dummies'] : 13
['Java: The Complete Reference'] : 10





#### Generating Candidate Set and Frequent Set

In [13]:
K = itemset_size + 1
temp = 0
while temp == 0:
    C.update({K : get_candidate_set(L[K-1],order)})
    print("Table C{}: \n".format(K))
    print_table(C[K], [count_items(item, dataset) for item in C[K]])
    freq_set, support, new_non_frequent = frequent_itemsets(C[K], dataset, minimum_support, non_frequent)
    L.update({K : freq_set})
    non_frequent.update({K : new_non_frequent})
    sup_count_L.update({K : support})
    if len(L[K]) == 0:
        temp = 1
    else:
        print("Table L{}: \n".format(K))
        print_table(L[K], sup_count_L[K])
    K += 1

Table C2: 

Itemset | Count
['A Beginner’s Guide', 'Android Programming: The Big Nerd Ranch'] : 6
['A Beginner’s Guide', 'Beginning Programming with Java'] : 1
['A Beginner’s Guide', 'Head First Java 2nd Edition'] : 3
['A Beginner’s Guide', 'Java For Dummies'] : 9
['A Beginner’s Guide', 'Java: The Complete Reference'] : 9
['Android Programming: The Big Nerd Ranch', 'Beginning Programming with Java'] : 3
['Android Programming: The Big Nerd Ranch', 'Head First Java 2nd Edition'] : 6
['Android Programming: The Big Nerd Ranch', 'Java For Dummies'] : 9
['Android Programming: The Big Nerd Ranch', 'Java: The Complete Reference'] : 6
['Beginning Programming with Java', 'Head First Java 2nd Edition'] : 4
['Beginning Programming with Java', 'Java For Dummies'] : 1
['Beginning Programming with Java', 'Java: The Complete Reference'] : 0
['Head First Java 2nd Edition', 'Java For Dummies'] : 3
['Head First Java 2nd Edition', 'Java: The Complete Reference'] : 1
['Java For Dummies', 'Java: The Complet

### Association Rules

In [14]:
# Association Rules
assoc_rules = ""
rule_no = 1
for i in range (1, len(L)):
    for j in range (0, len(L[i])):
        subsets = list(all_subsets(set(L[i][j])))
        subsets.pop()
        for k in subsets:
            item_1 = set(k)
            freq_set_1 = set(L[i][j])
            item_2 = set(freq_set_1-item_1)
            support_freq_set_1 = count_items(freq_set_1, dataset)
            support_item_1 = count_items(item_1, dataset)
            support_item_2 = count_items(item_2, dataset)
            conf = support_freq_set_1/support_item_1
            if conf >= (minimum_confidence/100) and support_freq_set_1 >= (minimum_support/100)*trans_num:
                assoc_rules += write_assoc_rules(item_2, item_1, conf, support_freq_set_1, trans_num, rule_no)
                rule_no += 1

In [15]:
print("Final Association Rules: \n")
print(assoc_rules)

Final Association Rules: 

Rule 1: ['A Beginner’s Guide'] -> ['Android Programming: The Big Nerd Ranch'] 
Confidence: 54.55% 
Support: 30.00% 

Rule 2: ['Android Programming: The Big Nerd Ranch'] -> ['A Beginner’s Guide'] 
Confidence: 46.15% 
Support: 30.00% 

Rule 3: ['A Beginner’s Guide'] -> ['Java For Dummies'] 
Confidence: 81.82% 
Support: 45.00% 

Rule 4: ['Java For Dummies'] -> ['A Beginner’s Guide'] 
Confidence: 69.23% 
Support: 45.00% 

Rule 5: ['Java: The Complete Reference'] -> ['A Beginner’s Guide'] 
Confidence: 90.00% 
Support: 45.00% 

Rule 6: ['A Beginner’s Guide'] -> ['Java: The Complete Reference'] 
Confidence: 81.82% 
Support: 45.00% 

Rule 7: ['Head First Java 2nd Edition'] -> ['Android Programming: The Big Nerd Ranch'] 
Confidence: 75.00% 
Support: 30.00% 

Rule 8: ['Android Programming: The Big Nerd Ranch'] -> ['Head First Java 2nd Edition'] 
Confidence: 46.15% 
Support: 30.00% 

Rule 9: ['Java For Dummies'] -> ['Android Programming: The Big Nerd Ranch'] 
Confidence

### Testing Results with apriori_python package

In [16]:
from apriori_python import apriori

#### Package use condition confidence > minConf and our code uses condition confidence >= min confidence
#### Hence, using minConf = 0.39 instead of 0.4 to verify results

In [17]:
freqItemSet, rules = apriori(dataset, minSup=0.3, minConf=0.39)

In [18]:
for i in range (len(rules)):
    print("Rule {}: {} \n ".format(i+1, rules[i]))

Rule 1: [{'Android Programming: The Big Nerd Ranch'}, {'Java: The Complete Reference'}, 0.46153846153846156] 
 
Rule 2: [{'Android Programming: The Big Nerd Ranch'}, {'Head First Java 2nd Edition'}, 0.46153846153846156] 
 
Rule 3: [{'Android Programming: The Big Nerd Ranch'}, {'A Beginner’s Guide'}, 0.46153846153846156] 
 
Rule 4: [{'Java For Dummies'}, {'Java: The Complete Reference', 'Android Programming: The Big Nerd Ranch'}, 0.46153846153846156] 
 
Rule 5: [{'Android Programming: The Big Nerd Ranch'}, {'Java: The Complete Reference', 'Java For Dummies'}, 0.46153846153846156] 
 
Rule 6: [{'A Beginner’s Guide'}, {'Android Programming: The Big Nerd Ranch'}, 0.5454545454545454] 
 
Rule 7: [{'Java: The Complete Reference'}, {'Android Programming: The Big Nerd Ranch'}, 0.6] 
 
Rule 8: [{'Java: The Complete Reference'}, {'Java For Dummies', 'Android Programming: The Big Nerd Ranch'}, 0.6] 
 
Rule 9: [{'Java: The Complete Reference', 'Java For Dummies'}, {'Android Programming: The Big Nerd

### Results are verified