In [109]:
# Imports
import pandas as pd
from collections import defaultdict
from itertools import chain, combinations

# Calling: candidateSet = getUnion(currentLSet, k)
def getUnion(itemSet, length):
    return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])

# Calling: candidateSet = pruning(candidateSet, currentLSet, k-1)
def pruning(candidateSet, prevFreqSet, length):
    tempCandidateSet = candidateSet.copy()
    for item in candidateSet:
        subsets = combinations(item, length)
        for subset in subsets:
            # if the subset is not in previous K-frequent get, then remove the set
            if(frozenset(subset) not in prevFreqSet):
                tempCandidateSet.remove(item)
                break
    return tempCandidateSet

# Calling: subsets = powerset(item)
def powerset(s):
    return chain.from_iterable(combinations(s, r) for r in range(1, len(s)))

# Calling: rules = associationRule(globalFreqItemSet, globalItemSetWithSup, minConf)
# {1: {frozenset({'Bread'}), frozenset({'Milk'}), frozenset({'Cheese'})}}
# defaultdict(<class 'int'>, {frozenset({'Bread'}): 159, frozenset({'Meat'}): 150, frozenset({'Milk'}): 158,
# frozenset({'Cheese'}): 158, frozenset({'Diaper'}): 128, frozenset({'Bagel'}): 134, frozenset({'Eggs'}): 138, frozenset({'Pencil'}): 114, frozenset({'Wine'}): 138})
def associationRule(freqItemSet, itemSetWithSup, minConf):
    rules = []
    for k, itemSet in freqItemSet.items():
        for item in itemSet:
            subsets = powerset(item)
            for s in subsets:
                confidence = float(
                    itemSetWithSup[item] / itemSetWithSup[frozenset(s)])
                if(confidence > minConf):
                    rules.append([set(s), set(item.difference(s)), confidence])
    return rules

# Calling: L1ItemSet = getAboveMinSup(C1ItemSet, itemSetList, minSup, globalItemSetWithSup)
def getAboveMinSup(itemSet, itemSetList, minSup, globalItemSetWithSup):
    freqItemSet = set()
    localItemSetWithSup = defaultdict(int)

    for item in itemSet:
        for itemSet in itemSetList:
            if item.issubset(itemSet):
                globalItemSetWithSup[item] += 1
                localItemSetWithSup[item] += 1

    for item, supCount in localItemSetWithSup.items():
        support = float(supCount / len(itemSetList))
        if(support >= minSup):
            freqItemSet.add(item)
    return freqItemSet

# Calling: C1ItemSet = getItemSetFromList(itemSetList)
def getItemSetFromList(itemSetList):
    tempItemSet = set()
    for itemSet in itemSetList:
        tempItemSet.add(frozenset([itemSet]))
    return tempItemSet



def theAlgorithm(itemSetList, minSup, minConf, main_dataset):
    # Converting the itemSetList ['Milk', 'Cheese', 'Meat', 'Diaper', 'Pencil', 'Bagel', 'Bread', 'Wine', 'Eggs']
    # to frozenSet to be immutable and to make it easy to use the frozenSet method such as: issubset
    C1ItemSet = getItemSetFromList(itemSetList)

    # The following dict is to store the final items with it's support
    globalFreqItemSet = dict()
    # Storing global itemset with support count
    # The following dict is used to store each itemset and the support count
    # Example
    # defaultdict(<class 'int'>, {frozenset({'Bread'}): 159, frozenset({'Meat'}): 150, frozenset({'Milk'}): 158,
    # frozenset({'Cheese'}): 158, frozenset({'Diaper'}): 128, frozenset({'Bagel'}): 134, frozenset({'Eggs'}): 138,
    # frozenset({'Pencil'}): 114, frozenset({'Wine'}): 138})
    globalItemSetWithSup = defaultdict(int)
    
    # The following code is used to check on the C1ItemSet which one satisfies the min support
    # We're sending the first candidate item set, the item set list (the main dataset), the min support and the 'globalItemSetWithSup' to store each item with the support count
    L1ItemSet = getAboveMinSup(
        C1ItemSet, main_dataset, minSup, globalItemSetWithSup)
    # L1ItemSet: items that satisfies only the min support 
    # Example: [Bread, Milk, Cheese]
    currentLSet = L1ItemSet
    # currentLSet: [Bread, Milk, Cheese]
    k = 2

    # Calculating frequent item set
    while(currentLSet):
        # Storing frequent itemset
        globalFreqItemSet[k-1] = currentLSet

        # Self-joining Lk [The union function is used to generate the combinations of the itemset]
        candidateSet = getUnion(currentLSet, k)

        # Perform subset testing and remove pruned supersets
        # Sending:
        # candidateSet: {frozenset({'Cheese', 'Milk'}), frozenset({'Cheese', 'Bread'}), frozenset({'Bread', 'Milk'})}
        # currentLSet: {frozenset({'Bread'}), frozenset({'Milk'}), frozenset({'Cheese'})}
        # K = 1
        candidateSet = pruning(candidateSet, currentLSet, k-1)
        # Removing any itemset that is not in the prevoius one, "Pruning"
        
        # Scanning itemSet for counting support
        currentLSet = getAboveMinSup(
            candidateSet, main_dataset, minSup, globalItemSetWithSup)
        # Checking if the newItemSet satisfies the min support

        # Add one to k, continue looping until the end of the first itemset
        k += 1

    # Generating the associationRules
    rules = associationRule(globalFreqItemSet, globalItemSetWithSup, minConf)
    rules.sort(key=lambda x: x[2])

    return globalFreqItemSet, rules


def runApriori():
    df = pd.read_csv('retail_dataset.csv', na_values=" Empty")
    df = df.replace(np.nan,'Empty')
    print(df.shape[0])
    vals = df.values
    data_list = vals.tolist()
    main_dataset = []
    all_values = []
    for i in data_list:
        each_line = []
        for j in range(1, 8):
            if i[j] != 'Empty':
                all_values.append(i[j])
                each_line.append(i[j])
        main_dataset.append(each_line)


    # Getting min support and min confidence from the user

    min_support = input('Please enter the minimum support in range from 0.1 to 0.9: ')
    while float(min_support) < 0.1 or float(min_support) > 0.9:
        print('Value Range Error: Value should be in range 0.1 - 0.9')
        min_support = input('Please enter the minimum support in range from 0.1 to 0.9: ')
    print('#'*50)
    min_confidence = input('Please enter the minimum confidence in range from 0.1 to 0.9: ')
    while float(min_confidence) < 0.1 or float(min_confidence) > 0.9:
        print('Value Range Error: Value should be in range 0.1 - 0.9')
        min_confidence = input('Please enter the minimum confidence in range from 0.1 to 0.9: ')

    first_item_set = set(all_values)
    freqItemSet, rules = theAlgorithm(list(first_item_set), float(min_support), float(min_confidence), main_dataset)
    print('#'*100)
    print(f'Frequest Item Sets with min support {min_support}')
    for i in freqItemSet:
        print(freqItemSet[i])
    print('#'*100)
    print(f'Association Rules with min confidence {min_confidence}')
    for i in rules:
        print(i)

# Running The Algorithm
runApriori()

315
Please enter the minimum support in range from 0.1 to 0.9: 0.3
##################################################
Please enter the minimum confidence in range from 0.1 to 0.9: 0.3
####################################################################################################
Frequest Item Sets with min support 0.3
{frozenset({'Bread'}), frozenset({'Meat'}), frozenset({'Milk'}), frozenset({'Cheese'}), frozenset({'Diaper'}), frozenset({'Bagel'}), frozenset({'Eggs'}), frozenset({'Pencil'}), frozenset({'Wine'})}
{frozenset({'Cheese', 'Milk'}), frozenset({'Cheese', 'Meat'})}
####################################################################################################
Association Rules with min confidence 0.3
[{'Cheese'}, {'Milk'}, 0.6075949367088608]
[{'Milk'}, {'Cheese'}, 0.6075949367088608]
[{'Cheese'}, {'Meat'}, 0.6455696202531646]
[{'Meat'}, {'Cheese'}, 0.68]
