In [1]:
import pandas as pd
import numpy as np
from itertools import chain,combinations
import time

# Apriori Algorithim

In [2]:
def readData(path):
    '''
    Function to read csv file containing the transactions
    
    Parameters:-
    path - Location of the input file
    
    '''
    transactionData = pd.read_csv(path, header = None)
    return transactionData

In [3]:
def frequency(transactionData,support):
    '''Fucntion to determine the frequent items in the transaction database
    
    Parameters:-
    transactionData - Single column dataframe containing all the transactions
    support         - User determined support level for generating itemsets
    
    '''
    
    ## Extract transaction information from dataframe to a list
    Transactions = []
    for i in range(len(transactionData)):
        Transactions.append(transactionData[0][i].split(","))
    
    ## Initialize dictionaries to store frequent itemsets
    FrequentItemSets = {}
    AprioriResults = {}
    
    ## Calculate total no. of occurrences of items among all transactions
    for i,items in enumerate(Transactions):
        for j in range(len(items)):
            if items[j] in FrequentItemSets:
                FrequentItemSets[items[j]] += 1
            else:
                FrequentItemSets[items[j]] = 1

    association = []
    nonFrequent = []
    
    ## Assign items to the lists depending on whether they meet the minimum support
    for i in FrequentItemSets:
        if FrequentItemSets[i]/len(Transactions) >= support:
            association.append(i)
        else:
            nonFrequent.append(i)
    
    ## Delete all items which do not meet the minimum support requirement
    for i in nonFrequent:
        del FrequentItemSets[i]

    n_combinations = list(combinations(association,2))
    return n_combinations,Transactions,FrequentItemSets,AprioriResults

In [4]:
def support_level(n_combinations,support,Transactions,AprioriResults):
    '''
    Function to determine all itemset combinations which meet the minimum support
    
    Parameters:-
    n_combinations - List of all itemset combinations meeting the support requirement
    support        - User determined support level for generating itemsets
    Transaction    - List containing all transactions
    AprioriResults - Dictionary containing all itemsets meeting support level
    
    '''
    
    ## Loop to generate itemsets as long as they meet support requirements
    while len(n_combinations) > 0:
        itemSets = []
        for i in n_combinations:
            count = 0
            for j in range(len(Transactions)):
                if set(i).issubset(Transactions[j]) == True: ## for itemsets present in transactions, increase count
                     count += 1
            if count/len(Transactions) >= support:           ## if itemset meets minimum support add it to dictionary
                itemSets.append(i)
                AprioriResults[tuple(sorted(i))] = count
         
        addition = []
        ## Loop to generate (n+1)th itemset
        for i in range(len(itemSets)):
            j = len(itemSets) - 1
            while j > i:                                     ## Generating (n+1)th itemset, eg. (A,B),(A,C) => (A,B,C)
                if len(list(set(itemSets[i]) - set(itemSets[j]))) == 1 and set(itemSets[i]).intersection(set(itemSets[j])) != set():
                    addition.append(tuple(set(itemSets[i]).intersection(set(itemSets[j])).union(set(itemSets[i]).symmetric_difference(set(itemSets[j])))))
                j -= 1
        ## List of new itemsets for which support levels need to be checked        
        n_combinations = list(frozenset(sorted(sub)) for sub in set(sorted(addition)))
        
    return AprioriResults

In [5]:
def confidence_level(AprioriResults,FrequentItemSets,Transactions,confidence,support):
    '''
    Function to generate the support and confidence levels of itemsets which meet user defined requirements
    
    Parameters:-
    ApriroiResults   -  Dictionary containing frequent itemsets
    FrequentItemSets -  Dictionary containing frequent items
    Transactions     -  List containing all transactions
    confidence       -  User determined confidence level for generating itemsets
    support          -  User determined support level for generating itemsets
    
    '''
    ## iterate over itemsets which are greater than minimum support level
    for i,combination in enumerate(AprioriResults):  
    ## iterate to get association of 1 item to the rest of the set
        for j in combination:
            ## for 2-itemsets
            if len(set(combination) - set((j,))) == 1:
                BaseGroup = list(set(combination) - set((j,)),)[0]
                ## calculate support and confidence level
                confidenceCalculation  = AprioriResults.get(combination)/FrequentItemSets.get(BaseGroup)
                supportLevel = AprioriResults.get(combination)/len(Transactions)
                ## print valid associations
                if confidenceCalculation >= confidence and supportLevel >= support:
                    print(set((BaseGroup,)),"=>","{",j,"}","(",supportLevel*100,"%,",confidenceCalculation*100,"%",")")
            ## for n-itemsets, where n > 2
            else:
                BaseGroup = tuple(set(combination) - set((j,)))
                ## calculate support and confidence level
                confidenceCalculation  = AprioriResults.get(combination)/AprioriResults.get(tuple(sorted(BaseGroup)))
                supportLevel = AprioriResults.get(combination)/len(Transactions)
                ## print valid associations
                if confidenceCalculation >= confidence and supportLevel >= support:
                    print(set(BaseGroup),"=>","{",j,"}","(",supportLevel*100,"%,",confidenceCalculation*100,"%",")")
                
            FrequentItemSets[combination] = AprioriResults.get(combination)

In [6]:
def main(support,confidence,path):
    '''
    Function to execute Apriori Algorithim
    
    Parameters:-
    support     -  User determined support level for generating itemsets
    confidence  -  User determined confidence level for generating itemsets
    path        -  Location of the input file
    
    '''
    
    transactionData = readData(path)
    n_combinations,Transactions,FrequentItemSets,AprioriResults = frequency(transactionData,support)
    AprioriResults = support_level(n_combinations,support,Transactions,AprioriResults)
    return confidence_level(AprioriResults,FrequentItemSets,Transactions,confidence,support)

In [7]:
## Executing Apriori Algorithm on
start_time = time.time()
main(.30,.55,"C:/Users/Shank/Desktop/NJIT/CourseMaterial/Spring2022/DataMining/MidTermProject/WorkingDirectory/TransactionDatabase/Database2.csv")
print("Time to execute Apriori Algorithim --- %s seconds" % (time.time() - start_time))

{'WaterBottle'} => { Tomato } ( 45.0 %, 69.23076923076923 % )
{'Tomato'} => { WaterBottle } ( 45.0 %, 64.28571428571429 % )
{'Chocolates'} => { Tomato } ( 30.0 %, 66.66666666666666 % )
{'Tomato'} => { MangoJuice } ( 45.0 %, 64.28571428571429 % )
{'MangoJuice'} => { Tomato } ( 45.0 %, 75.0 % )
{'Tomato'} => { HardDisk } ( 45.0 %, 64.28571428571429 % )
{'HardDisk'} => { Tomato } ( 45.0 %, 81.81818181818183 % )
{'Lamp'} => { Tomato } ( 35.0 %, 63.63636363636363 % )
{'Desk'} => { Tomato } ( 35.0 %, 63.63636363636363 % )
{'Tomato'} => { Fan } ( 45.0 %, 64.28571428571429 % )
{'Fan'} => { Tomato } ( 45.0 %, 75.0 % )
{'Tomato'} => { Pen } ( 40.0 %, 57.14285714285714 % )
{'Pen'} => { Tomato } ( 40.0 %, 72.72727272727273 % )
{'Shorts'} => { Tomato } ( 30.0 %, 66.66666666666666 % )
{'Tomato'} => { Moisturizer } ( 45.0 %, 64.28571428571429 % )
{'Moisturizer'} => { Tomato } ( 45.0 %, 75.0 % )
{'Sunscreen'} => { Tomato } ( 35.0 %, 70.0 % )
{'Tomato'} => { RazerBlades } ( 45.0 %, 64.28571428571429 % 

{'Chips'} => { Water } ( 40.0 %, 61.53846153846154 % )
{'LightBulb'} => { Chips } ( 40.0 %, 66.66666666666666 % )
{'Chips'} => { LightBulb } ( 40.0 %, 61.53846153846154 % )
{'Eggs'} => { Chips } ( 35.0 %, 70.0 % )
{'Mouse'} => { Chips } ( 40.0 %, 72.72727272727273 % )
{'Chips'} => { Mouse } ( 40.0 %, 61.53846153846154 % )
{'Notebook'} => { Bed } ( 45.0 %, 75.0 % )
{'Bed'} => { Notebook } ( 45.0 %, 81.81818181818183 % )
{'Water'} => { Bed } ( 35.0 %, 70.0 % )
{'Bed'} => { Water } ( 35.0 %, 63.63636363636363 % )
{'ShavingCream'} => { Bed } ( 40.0 %, 61.53846153846154 % )
{'Bed'} => { ShavingCream } ( 40.0 %, 72.72727272727273 % )
{'WetWipes'} => { Bed } ( 30.0 %, 75.0 % )
{'Eggs'} => { Bed } ( 30.0 %, 60.0 % )
{'ChickenBreast'} => { Bed } ( 35.0 %, 70.0 % )
{'Bed'} => { ChickenBreast } ( 35.0 %, 63.63636363636363 % )
{'Umbrella'} => { Notebook } ( 30.0 %, 60.0 % )
{'Water'} => { Umbrella } ( 30.0 %, 60.0 % )
{'Umbrella'} => { Water } ( 30.0 %, 60.0 % )
{'Umbrella'} => { ShavingCream } ( 

{'Notebook', 'WaterBottle'} => { Shoes } ( 30.0 %, 66.66666666666666 % )
{'Notebook', 'Shoes'} => { WaterBottle } ( 30.0 %, 75.0 % )
{'Water', 'WaterBottle'} => { Chips } ( 30.0 %, 85.71428571428571 % )
{'Chips', 'WaterBottle'} => { Water } ( 30.0 %, 66.66666666666666 % )
{'Chips', 'Water'} => { WaterBottle } ( 30.0 %, 75.0 % )
{'HardDisk', 'ShavingCream'} => { Fan } ( 30.0 %, 85.71428571428571 % )
{'Fan', 'ShavingCream'} => { HardDisk } ( 30.0 %, 75.0 % )
{'HardDisk', 'Fan'} => { ShavingCream } ( 30.0 %, 75.0 % )
{'WaterBottle', 'RazerBlades'} => { Notebook } ( 30.0 %, 75.0 % )
{'Notebook', 'WaterBottle'} => { RazerBlades } ( 30.0 %, 66.66666666666666 % )
{'Notebook', 'RazerBlades'} => { WaterBottle } ( 30.0 %, 66.66666666666666 % )
{'Moisturizer', 'WaterBottle'} => { Chips } ( 30.0 %, 66.66666666666666 % )
{'Chips', 'WaterBottle'} => { Moisturizer } ( 30.0 %, 66.66666666666666 % )
{'Moisturizer', 'Chips'} => { WaterBottle } ( 30.0 %, 75.0 % )
{'Notebook', 'HardDisk'} => { Chips } ( 3

{'WaterBottle', 'Desk'} => { Moisturizer } ( 35.0 %, 87.5 % )
{'Moisturizer', 'Desk'} => { WaterBottle } ( 35.0 %, 77.77777777777779 % )
{'Eggs', 'Mouse'} => { Chips } ( 30.0 %, 75.0 % )
{'Chips', 'Mouse'} => { Eggs } ( 30.0 %, 75.0 % )
{'Eggs', 'Chips'} => { Mouse } ( 30.0 %, 85.71428571428571 % )
{'Tomato', 'ShavingCream'} => { Moisturizer } ( 30.0 %, 60.0 % )
{'Moisturizer', 'Tomato'} => { ShavingCream } ( 30.0 %, 66.66666666666666 % )
{'Moisturizer', 'ShavingCream'} => { Tomato } ( 30.0 %, 85.71428571428571 % )
{'Tomato', 'Shoes'} => { ShavingCream } ( 30.0 %, 75.0 % )
{'Tomato', 'ShavingCream'} => { Shoes } ( 30.0 %, 60.0 % )
{'ShavingCream', 'Shoes'} => { Tomato } ( 30.0 %, 85.71428571428571 % )
{'Moisturizer', 'RazerBlades'} => { Chips } ( 30.0 %, 66.66666666666666 % )
{'Chips', 'RazerBlades'} => { Moisturizer } ( 30.0 %, 75.0 % )
{'Moisturizer', 'Chips'} => { RazerBlades } ( 30.0 %, 75.0 % )
{'Tomato', 'WaterBottle'} => { Pen } ( 30.0 %, 66.66666666666666 % )
{'WaterBottle', 'P

# Brute Force Method