In [201]:
import numpy as np
import pandas as pd 
from itertools import combinations 
from collections import OrderedDict 

In [202]:
def fetch_dataset(filename):
    
    dataset = pd.read_excel(filename)
    
    return dataset

In [203]:
def list_to_dict(input_list):
    
    output_dict = dict()
    
    for x in input_list:
        
        if x not in output_dict:
            output_dict[x] = 1
        
        else:
            output_dict[x] += 1
    
    output_dict = OrderedDict(sorted(output_dict.items())) 
    
    return output_dict

In [204]:
def keep_required_elements(input_dict,min_support):
        
    output_dict = {}    
    
    for key in input_dict.keys() :
        
        if input_dict[key] >= min_support:
            
            output_dict[key] =  input_dict[key]
            
    return output_dict

In [205]:
def combination_list(dataset, k):

    y = list()

    for i in range(len(dataset)):

        x = combinations(dataset["items"][i].split(","),k)

        y = y + [','.join(i) for i in x]
        
    return list_to_dict(y)

In [206]:
def create_final_itemset_table(dataset):

    k = 1

    store_combinations = dict()

    final_itemset_table = None

    while k :

        res_dict = keep_required_elements(combination_list(dataset, k), min_support)

        store_combinations[k] = res_dict.copy()

        if(len(res_dict) == 0):
            break

        k += 1
        
    return k, store_combinations

In [207]:
def generate_rules(k, store_combinations):

    rules_dict = dict()
    
    for data in list(store_combinations[k-1].keys()):

        for i in range(1,k-1):

            original_start = data

            start = original_start.split(",")

            total = len(start)

            x = combinations(start,i)

            y = [','.join(i) for i in x]

            z = [sorted(list(set(start) - set(i.split(",")))) for i in y]

            for i,j in zip(y,z):

                temp = ','.join(j)

                value = store_combinations[total].get(original_start) / store_combinations[len(j)].get(temp)
                
                rules_dict[(temp,i)] = value * 100
                            
    return rules_dict

In [208]:
def print_all_rules(rules_dict):
    
    for key in rules_dict.keys():
        
        a,b = key
        
        print(a," ==> ",b," == ",str(rules_dict[key]))

In [209]:
def print_final_rules(rules_dict,min_confidence_percent):
    print("")
    print("Association Rules for the Itemset are as follows :")
    print("")
    for key in rules_dict.keys():
        
        if rules_dict[key] >= min_confidence_percent:
            
            a,b = key
            
            print(a," ====> ",b, " == ",str(rules_dict[key]))

In [210]:
def display_final_itemset(final_itemset):
    
    print("Itemset" ," "," Count")
    
    for key in final_itemset.keys():
        
        print(key, " ",final_itemset[key])

In [211]:
min_support = 2

min_confidence_percent = 60

In [212]:
dataset = fetch_dataset("apriori_data.xlsx")

In [213]:
dataset.head(10)

Unnamed: 0,TID,items
0,T1,"l1,l2,l5"
1,T2,"l2,l4"
2,T3,"l2,l3"
3,T4,"l1,l2,l4"
4,T5,"l1,l3"
5,T6,"l2,l3"
6,T7,"l1,l3"
7,T8,"l1,l2,l3,l5"
8,T9,"l1,l2,l3"


In [214]:
k , store_combinations = create_final_itemset_table(dataset)

display_final_itemset(store_combinations[k-1])

rules_dict = generate_rules(k, store_combinations)

print_final_rules(rules_dict, min_confidence_percent)

Itemset    Count
l1,l2,l3   2
l1,l2,l5   2

Association Rules for the Itemset are as follows :

l2,l5  ====>  l1  ==  100.0
l1,l5  ====>  l2  ==  100.0
l5  ====>  l1,l2  ==  100.0
