<a href="https://colab.research.google.com/github/Eswar-11/DWDM/blob/main/dwdm08.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1)**
Develop a python code to generate frequent item setsusing Apriorialgorithmwith minimum
support is 3. Consider the following
transactions:(("a","b","c"),("a","b"),("a","b","d"),("b","e"),("b","c","e"),("a","d","e"),("
a","c"),("a","b","d"),("c","e"),("a","b","d","e"),("a",'b','e','c'))


In [None]:
def create_candidate_1(X):
    """
    create the 1-item candidate,
    it's basically creating a frozenset for each unique item
    and storing them in a list
    """
    c1 = []
    for transaction in X:
        for t in transaction:
            t = frozenset([t])
            if t not in c1:
                c1.append(t)
    return(c1)


In [None]:
def apriori(X, min_support):
    """
    pass in the transaction data and the minimum support 
    threshold to obtain the frequent itemset. Also
    store the support for each itemset, they will
    be used in the rule generation step
    """

    # the candidate sets for the 1-item is different,
    # create them independently from others
    c1 = create_candidate_1(X)
    freq_item, item_support_dict = create_freq_item(X, c1, min_support = 0.5)
    freq_items = [freq_item]
    k = 0
    while len(freq_items[k]) > 0:
        freq_item = freq_items[k]
        ck = create_candidate_k(freq_item, k)       
        freq_item, item_support = create_freq_item(X, ck, min_support = 0.5)
        freq_items.append(freq_item)
        item_support_dict.update(item_support)
        k += 1
        
    return freq_items, item_support_dict



In [None]:
def create_freq_item(X, ck, min_support):
    """
    filters the candidate with the specified
    minimum support
    """
    # loop through the transaction and compute
    # the count for each candidate (item)
    item_count = {}
    for transaction in X:
        for item in ck:
            if item.issubset(transaction):
                if item not in item_count: 
                    item_count[item] = 1
                else: 
                    item_count[item] += 1    
    
    n_row = X.shape[0]
    freq_item = []
    item_support = {}
    
    # if the support of an item is greater than the 
    # min_support, then it is considered as frequent
    for item in item_count:
        support = item_count[item] / n_row
        if support >= min_support:
            freq_item.append(item)
        
        item_support[item] = support
        
    return freq_item, item_support


In [None]:
def create_candidate_k(freq_item, k):
    """create the list of k-item candidate"""
    ck = []
    
    # for generating candidate of size two (2-itemset)
    if k == 0:
        for f1, f2 in combinations(freq_item, 2):
            item = f1 | f2 # union of two sets
            ck.append(item)
    else:    
        for f1, f2 in combinations(freq_item, 2):       
            # if the two (k+1)-item sets has
            # k common elements then they will be
            # unioned to be the (k+2)-item candidate
            intersection = f1 & f2
            if len(intersection) == k:
                item = f1 | f2
                if item not in ck:
                    ck.append(item)
    return ck

In [None]:
import numpy as np
from itertools import combinations
X = np.array([list(['a','b','c']),
              list(['a','b']),
              list(['a','b','d']),
              list(['b','e']),
              list(['b','c','e']),
             list(['a','d','e']),
             list(['a','c']),
              list(['a','b','d']),
              list(['c','e']),
              list(['a','b','d','e']),
              list(['a','b','e','c'])],dtype=object)
freq_items, item_support_dict = apriori(X, min_support = 3)
freq_items
print(item_support_dict)
#19BCN7003

{frozenset({'a'}): 0.7272727272727273, frozenset({'b'}): 0.7272727272727273, frozenset({'c'}): 0.45454545454545453, frozenset({'d'}): 0.36363636363636365, frozenset({'e'}): 0.5454545454545454, frozenset({'a', 'b'}): 0.5454545454545454, frozenset({'e', 'b'}): 0.36363636363636365, frozenset({'a', 'e'}): 0.2727272727272727}


**2)** Develop a python code to provideassociation rules from thegenerated frequent itemsetsin
exercise 1with minimum confidence of 80%.[Perform the comparison of your output 
with predefined packages output carried out in Lab Exercise 8.]*** Do not use any 
predefined packages such as mlxtend, apyori to apply Apriori algorithm.

In [None]:
def create_rules(freq_items, item_support_dict, min_confidence):
    """
    create the association rules, the rules will be a list.
    each element is a tuple of size 4, containing rules'
    left hand side, right hand side, confidence and lift
    """
    association_rules = []

    # for the list that stores the frequent items, loop through
    # the second element to the one before the last to generate the rules
    # because the last one will be an empty list. It's the stopping criteria
    # for the frequent itemset generating process and the first one are all
    # single element frequent itemset, which can't perform the set
    # operation X -> Y - X
    for idx, freq_item in enumerate(freq_items[1:(len(freq_items) - 1)]):
        for freq_set in freq_item:
            
            # start with creating rules for single item on
            # the right hand side
            subsets = [frozenset([item]) for item in freq_set]
            rules, right_hand_side = compute_conf(freq_items, item_support_dict, 
                                                  freq_set, subsets, min_confidence)
            association_rules.extend(rules)
            
            # starting from 3-itemset, loop through each length item
            # to create the rules, as for the while loop condition,
            # e.g. suppose you start with a 3-itemset {2, 3, 5} then the 
            # while loop condition will stop when the right hand side's
            # item is of length 2, e.g. [ {2, 3}, {3, 5} ], since this
            # will be merged into 3 itemset, making the left hand side
            # null when computing the confidence
            if idx != 0:
                k = 0
                while len(right_hand_side[0]) < len(freq_set) - 1:
                    ck = create_candidate_k(right_hand_side, k = k)
                    rules, right_hand_side = compute_conf(freq_items, item_support_dict,
                                                          freq_set, ck, min_confidence)
                    association_rules.extend(rules)
                    k += 1    
    
    return association_rules

def compute_conf(freq_items, item_support_dict, freq_set, subsets, min_confidence):
    """
    create the rules and returns the rules info and the rules's
    right hand side (used for generating the next round of rules) 
    if it surpasses the minimum confidence threshold
    """
    rules = []
    right_hand_side = []
    
    for rhs in subsets:
        # create the left hand side of the rule
        # and add the rules if it's greater than
        # the confidence threshold
        lhs = freq_set - rhs
        conf = item_support_dict[freq_set] / item_support_dict[lhs]
        if conf >= min_confidence:
            lift = conf / item_support_dict[rhs]
            rules_info = lhs, rhs, conf, lift
            rules.append(rules_info)
            right_hand_side.append(rhs)
            
    return rules, right_hand_side


association_rules = create_rules(freq_items, item_support_dict, min_confidence = 0.5)
association_rules
#19BCN7003

[(frozenset({'b'}), frozenset({'a'}), 0.7499999999999999, 1.0312499999999998),
 (frozenset({'a'}), frozenset({'b'}), 0.7499999999999999, 1.0312499999999998)]

In [None]:
pip install apriori_python

Collecting apriori_python
  Downloading apriori_python-1.0.4-py3-none-any.whl (5.0 kB)
Installing collected packages: apriori-python
Successfully installed apriori-python-1.0.4


In [None]:
from apriori_python import apriori
import numpy as np
itemSetList = np.array([list(['a','b','c']),
              list(['a','b']),
              list(['a','b','d']),
              list(['b','e']),
              list(['b','c','e']),
             list(['a','d','e']),
             list(['a','c']),
              list(['a','b','d']),
              list(['c','e']),
              list(['a','b','d','e']),
              list(['a','b','e','c'])],dtype=object)
freqItemSet, rules = apriori(itemSetList, minSup=0.5, minConf=0.5)
print(freqItemSet)
print(rules)
#19BCN7003

{1: {frozenset({'a'}), frozenset({'e'}), frozenset({'b'})}, 2: {frozenset({'a', 'b'})}}
[[{'a'}, {'b'}, 0.75], [{'b'}, {'a'}, 0.75]]
