In [10]:
import operator #The operator module exports a set of efficient functions corresponding to the intrinsic operators of Python.
import collections #This module implements some nice data structures which will help you to solve various real life problems.
import itertools as it #Functions creating iterators for efficient looping

#create a method named get_all_subcombinations which return subitem in subcombination after executing the method
def get_all_subcombinations(item):
    subcombinations = [it.combinations(item, i + 1) for i, a in enumerate(item)]
    return [frozenset(subitem) for subcombination in subcombinations for subitem in subcombination]

#create a method get_items_with_support_higher_then_min_and_item_frequency which get 3 parameters
def get_items_with_support_higher_then_min_and_item_frequency(items, transactions, min_support):
        #Create a variable item_frequency which get the result from collectios library and default dictionary
        item_frequency = collections.defaultdict(int)
        #Create a for loop and run items number time
        for item in items:
            #Create a for loop
            for transaction in transactions:
                #Checking is the item present in the subset
                if item.issubset(transaction):
                    #If get the item increase the frequency
                    item_frequency[item] += 1
        #Create a variable named support_items, which get the value after checking that, is the item frequency is grater than min_support
        support_items = [item for item in item_frequency.keys()
                         if calc_support(item, item_frequency, transactions) > min_support]
        return support_items, item_frequency

#create a method calc_support which get three parameters
def calc_support(item, item_frequency, transactions):
    #return the value after divide the item frequency with transactions length
    return float(item_frequency[item]) / len(transactions)

#create a method get_items_union which get two parameters
def get_items_union(items, k):
    #return the value after comparing the length 
    return set([i.union(j) for i in items for j in items if len(i.union(j)) == k])

#create a method run_apriori
#define threshold value
def run_apriori(file_name, min_support=0.1, min_confidence=0.5):
    
    transactions = read_transactions(file_name)
    items = set([frozenset([item]) for transaction in transactions for item in transaction])
    #create a empty dict
    item_frequency = {}
    #call this method get_items_with_support_higher_then_min_and_item_frequency and pass 3 parameters
    support_items, item_frequency_1 = get_items_with_support_higher_then_min_and_item_frequency(items, transactions,
                                                                                                min_support)
    #Update the item frequency
    item_frequency.update(item_frequency_1)
    #Set the k value 2
    k = 2
    #create a empty dict
    large_set = {}
    #Create a variable and get the support items value
    current_items_set = support_items
    #Create a while loop
    while current_items_set:
        #decreasing k value
        large_set[k-1] = current_items_set
        #calling get_items_union
        current_items_set = get_items_union(current_items_set, k)
        current_items_set, item_frequency_k = \
            get_items_with_support_higher_then_min_and_item_frequency(current_items_set, transactions, min_support)
        item_frequency.update(item_frequency_k)
        #incresing k value
        k += 1
    #calling calc_support and passing 3 parameters and also creat a for loop
    item_supports = {item: calc_support(item, item_frequency, transactions) for items_set in large_set.values()
                     for item in items_set}
    rules = {}
    #creat a for loop and run for large_set.items
    for k, items_set in large_set.items():
        if k > 1:
            for item in items_set:
                #calling get_all_subcombinations
                subcombinations = get_all_subcombinations(item)
                for subcombination in subcombinations:
                    remain = item.difference(subcombination)
                    if remain:
                        #calling calc_support method
                        confidence = calc_support(item, item_frequency, transactions) / \
                                     calc_support(subcombination, item_frequency, transactions)
                            #checking between confidence and min_confidence
                        if confidence >= min_confidence:
                            rules[(subcombination, remain)] = confidence
    #Sorted the output in descending order
    item_supports = sorted(item_supports.items(), key=operator.itemgetter(1), reverse=True)
    rules = sorted(rules.items(), key=operator.itemgetter(1), reverse=True)
    return item_supports, rules

#read the file
def read_transactions(file_name):
    with open(file_name, 'r') as f:
        transactions = list(map(lambda x: set(x.strip().split(",")), f.read().strip().split("\n")))
    return transactions

#condition
if __name__ == "__main__":
    #using run_apriori method and passed data set
    #item_supports, rules = run_apriori("division.csv")
    #item_supports, rules = run_apriori("AprioriTestOrginalMarried2.csv")
    #item_supports, rules = run_apriori("AprioriTestOrginaluUnmarried2.csv")
    #item_supports, rules = run_apriori("AprioriTestOrginalMale2.csv")
    #item_supports, rules = run_apriori("AprioriTestOrginalFemale2.csv")
    #item_supports, rules = run_apriori("AprioriTestOrginalAgeRange2.csv")
    item_supports, rules = run_apriori("aprioritestbmdgendermaritalage2.csv")

In [11]:
#printing support
for items, support in item_supports[: 100]:
    print("{0} - {1:.2f}".format(", ".join(items), support))

Adult - 0.76
Married - 0.67
Male - 0.57
Male, Adult - 0.45
Adult, Married - 0.44
Female - 0.43
Male, Married - 0.35
Female, Married - 0.33
Unmarried - 0.31
Adult, Female - 0.31
Adult, Unmarried - 0.30
Senior - 0.23
Male, Adult, Married - 0.23
Senior, Married - 0.22
Male, Unmarried - 0.22
Male, Adult, Unmarried - 0.22
Adult, Female, Married - 0.21
Senior, Female - 0.12
Senior, Male - 0.11
Senior, Male, Married - 0.11
Senior, Married, Female - 0.11


In [12]:
#printing confidence
for items, rule in rules[: 100]:
    print("{0} => {1} - {2:.2f}".format(", ".join(items[0]), ", ".join(items[1]), rule))

Senior, Male => Married - 1.00
Male, Unmarried => Adult - 1.00
Unmarried => Adult - 0.98
Senior => Married - 0.97
Senior, Female => Married - 0.94
Male => Adult - 0.79
Female => Married - 0.76
Adult, Unmarried => Male - 0.72
Female => Adult - 0.71
Unmarried => Male - 0.70
Unmarried => Male, Adult - 0.70
Adult, Female => Married - 0.68
Male, Married => Adult - 0.66
Married => Adult - 0.65
Married, Female => Adult - 0.64
Male => Married - 0.61
Adult => Male - 0.60
Adult => Married - 0.58
Adult, Married => Male - 0.52
Married => Male - 0.52
Senior => Female - 0.52
Male, Adult => Married - 0.51
Senior, Married => Male - 0.50
Senior, Married => Female - 0.50
