In [350]:
import pandas as pd
import numpy as np
from itertools import combinations

File needs to be a **csv** of the following format:

```
item1, item2, item3, ... so on
 , t, ...
t, t, t,...
t, t, ...
... so on...```

In [351]:
df = pd.read_csv("test1.csv", low_memory=False)
df.head()

Unnamed: 0,handphone,laptop,charger,powerbank,tablet
0,t,t,,,
1,t,t,t,,
2,t,t,t,t,
3,t,t,,,t
4,t,,t,,t


In [352]:
item_list = list(df.columns)
item_dict = dict()

for i, item in enumerate(item_list):
    item_dict[item] = i + 1

item_dict

{'handphone': 1, 'laptop': 2, 'charger': 3, 'powerbank': 4, 'tablet': 5}

In [353]:
transactions = list()

for i, row in df.iterrows():
    transaction = set()
    
    for item in item_dict:
        if row[item] == 't':
            transaction.add(item_dict[item])
    transactions.append(transaction)
    
transactions

[{1, 2},
 {1, 2, 3},
 {1, 2, 3, 4},
 {1, 2, 5},
 {1, 3, 5},
 {4, 5},
 {1, 2, 3, 5},
 {1, 3},
 {1, 4},
 {2, 3, 4}]

In [354]:
def get_initial_support(transactions, item_set):
    transactionIDs = []
    match_count = 0
    transactionID = 0
    for transaction in transactions:
        if item_set.issubset(transaction):
            match_count += 1
            transactionIDs.append(transactionID)
        transactionID += 1

    return float(match_count/len(transactions)), transactionIDs


def get_support(transactions, transactionIDs, item_set):
    match_count = 0
    newTransactionIDs = []
    for transactionID in transactionIDs:
        if item_set.issubset(transactions[transactionID]):
            match_count += 1
            newTransactionIDs.append(transactionID)
            
    return float(match_count / len(transactionIDs)), newTransactionIDs


In [355]:
def self_join(frequent_item_sets_per_level, level):
    current_level_candidates = list()
    last_level_items = frequent_item_sets_per_level[level - 1]
    
    if len(last_level_items) == 0:
        return current_level_candidates
    
    for i in range(len(last_level_items)):
        for j in range(i+1, len(last_level_items)):
            itemset_i = last_level_items[i][0]
            itemset_j = last_level_items[j][0]
            union_set = itemset_i.union(itemset_j)
            
            if union_set not in current_level_candidates and len(union_set) == level:
                current_level_candidates.append(union_set)
                
    return current_level_candidates

In [356]:
def get_single_drop_subsets(item_set):
    single_drop_subsets = list()
    for item in item_set:
        temp = item_set.copy()
        temp.remove(item)
        single_drop_subsets.append(temp)
        
    return single_drop_subsets

def is_valid_set(item_set, prev_level_sets):
    single_drop_subsets = get_single_drop_subsets(item_set)
    
    for single_drop_set in single_drop_subsets:
        if single_drop_set not in prev_level_sets:
            return False
    return True

def pruning(frequent_item_sets_per_level, level, candidate_set):
    post_pruning_set = list()
    if len(candidate_set) == 0:
        return post_pruning_set
    
    prev_level_sets = list()
    for item_set, _, _ in frequent_item_sets_per_level[level - 1]:
        prev_level_sets.append(item_set)
        
    for item_set in candidate_set:
        if is_valid_set(item_set, prev_level_sets):
            post_pruning_set.append(item_set)
            
    return post_pruning_set

---
## Apriori Improved Algorithm

In [357]:
from collections import defaultdict

def apriori(min_support):
    frequent_item_sets_per_level = defaultdict(list)
    print("level : 1", end = " ")
    
    for item in range(1, len(item_list) + 1):
        support, transactionIDs = get_initial_support(transactions, {item})
        if support >= min_support:
            frequent_item_sets_per_level[1].append(({item}, support, transactionIDs))
        
    for level in range(2, len(item_list) + 1):
        print(level, end = " ")
        current_level_candidates = self_join(frequent_item_sets_per_level, level)

        post_pruning_candidates = pruning(frequent_item_sets_per_level, level, current_level_candidates)
        if len(post_pruning_candidates) == 0:
            break

        for item_set in post_pruning_candidates:
            min_support = 1
            selected_item_transactionIDs = []
            for item in item_set:
                for tuple_item in frequent_item_sets_per_level[1]:
                    if item in tuple_item[0]: 
                        if (min_support > tuple_item[1]):
                            min_support =  tuple_item[1]
                            selected_item_transactionIDs = tuple_item[2]
                        break
            support, transactionIDs = get_support(transactions, selected_item_transactionIDs,  item_set)
            if support >= min_support:
                frequent_item_sets_per_level[level].append((item_set, support, transactionIDs))
                
    return frequent_item_sets_per_level

In [358]:
min_support = 0.005
frequent_item_sets_per_level = apriori(min_support)

level : 1 2 3 4 

In [359]:
for level in frequent_item_sets_per_level:
    print(len(frequent_item_sets_per_level[level]))

5
9
3


In [360]:
for level in frequent_item_sets_per_level:
    print(frequent_item_sets_per_level[level])

[({1}, 0.8, [0, 1, 2, 3, 4, 6, 7, 8]), ({2}, 0.6, [0, 1, 2, 3, 6, 9]), ({3}, 0.6, [1, 2, 4, 6, 7, 9]), ({4}, 0.4, [2, 5, 8, 9]), ({5}, 0.4, [3, 4, 5, 6])]
[({1, 2}, 0.8333333333333334, [0, 1, 2, 3, 6]), ({1, 3}, 0.8333333333333334, [1, 2, 4, 6, 7]), ({1, 4}, 0.5, [2, 8]), ({1, 5}, 0.75, [3, 4, 6]), ({2, 3}, 0.6666666666666666, [1, 2, 6, 9]), ({2, 4}, 0.5, [2, 9]), ({2, 5}, 0.5, [3, 6]), ({3, 4}, 0.5, [2, 9]), ({3, 5}, 0.5, [4, 6])]
[({1, 2, 5}, 0.5, [3, 6]), ({1, 3, 5}, 0.5, [4, 6]), ({2, 3, 4}, 0.5, [2, 9])]


---
## Generating Association Rules


In [361]:
item_support_dict = dict()
item_list = list()

key_list = list(item_dict.keys())
val_list = list(item_dict.values())

for level in frequent_item_sets_per_level:
    for set_support_pair in frequent_item_sets_per_level[level]:
        for i in set_support_pair[0]:
            item_list.append(key_list[val_list.index(i)])
        item_support_dict[frozenset(item_list)] = set_support_pair[1]
        item_list = list()

In [362]:
item_support_dict

{frozenset({'handphone'}): 0.8,
 frozenset({'laptop'}): 0.6,
 frozenset({'charger'}): 0.6,
 frozenset({'powerbank'}): 0.4,
 frozenset({'tablet'}): 0.4,
 frozenset({'handphone', 'laptop'}): 0.8333333333333334,
 frozenset({'charger', 'handphone'}): 0.8333333333333334,
 frozenset({'handphone', 'powerbank'}): 0.5,
 frozenset({'handphone', 'tablet'}): 0.75,
 frozenset({'charger', 'laptop'}): 0.6666666666666666,
 frozenset({'laptop', 'powerbank'}): 0.5,
 frozenset({'laptop', 'tablet'}): 0.5,
 frozenset({'charger', 'powerbank'}): 0.5,
 frozenset({'charger', 'tablet'}): 0.5,
 frozenset({'handphone', 'laptop', 'tablet'}): 0.5,
 frozenset({'charger', 'handphone', 'tablet'}): 0.5,
 frozenset({'charger', 'laptop', 'powerbank'}): 0.5}


**find_subset** finds all the subsets of the given itemset.

In [363]:
def find_subset(item, item_length):
    combs = []
    for i in range(1, item_length + 1):
        combs.append(list(combinations(item, i)))
        
    subsets = []
    for comb in combs:
        for elt in comb:
            subsets.append(elt)
            
    return subsets

**association_rules**

In [364]:
def association_rules(min_confidence, support_dict):
    rules = list()
    for item, support in support_dict.items():
        item_length = len(item)
       
        if item_length > 1:
            subsets = find_subset(item, item_length)
           
            for A in subsets:
                B = item.difference(A)
               
                if B:
                    A = frozenset(A)
                    
                    AB = A | B
                    
                    confidence = support_dict[AB] / support_dict[A]
                    if confidence >= min_confidence:
                        rules.append((A, B, confidence))
    
    return rules

### Specify Minimum confidence value here

In [365]:
association_rules = association_rules(min_confidence = 0.6, support_dict = item_support_dict)

---
### Printing the output in the required format

In [366]:
print("Number of rules: ", len(association_rules), "\n")

for rule in association_rules:
    print('{0} -> {1} <confidence: {2}>'.format(set(rule[0]), set(rule[1]), rule[2]))

Number of rules:  36 

{'laptop'} -> {'handphone'} <confidence: 1.388888888888889>
{'handphone'} -> {'laptop'} <confidence: 1.0416666666666667>
{'charger'} -> {'handphone'} <confidence: 1.388888888888889>
{'handphone'} -> {'charger'} <confidence: 1.0416666666666667>
{'powerbank'} -> {'handphone'} <confidence: 1.25>
{'handphone'} -> {'powerbank'} <confidence: 0.625>
{'tablet'} -> {'handphone'} <confidence: 1.875>
{'handphone'} -> {'tablet'} <confidence: 0.9375>
{'laptop'} -> {'charger'} <confidence: 1.1111111111111112>
{'charger'} -> {'laptop'} <confidence: 1.1111111111111112>
{'laptop'} -> {'powerbank'} <confidence: 0.8333333333333334>
{'powerbank'} -> {'laptop'} <confidence: 1.25>
{'laptop'} -> {'tablet'} <confidence: 0.8333333333333334>
{'tablet'} -> {'laptop'} <confidence: 1.25>
{'charger'} -> {'powerbank'} <confidence: 0.8333333333333334>
{'powerbank'} -> {'charger'} <confidence: 1.25>
{'tablet'} -> {'charger'} <confidence: 1.25>
{'charger'} -> {'tablet'} <confidence: 0.83333333333