# Apriori Algorithim

In [1]:
import pandas as pd
import numpy as np
from itertools import chain,combinations
import time

In [2]:
def readData(path):
    '''
    Function to read csv file containing the transactions
    
    Parameters:-
    path - Location of the input file
    
    '''
    transactionData = pd.read_csv(path, header = None)
    return transactionData

In [3]:
def frequency(transactionData,support):
    '''Fucntion to determine the frequent items in the transaction database
    
    Parameters:-
    transactionData - Single column dataframe containing all the transactions
    support         - User determined support level for generating itemsets
    
    '''
    
    ## Extract transaction information from dataframe to a list
    Transactions = []
    for i in range(len(transactionData)):
        Transactions.append(transactionData[0][i].split(","))
    
    ## Initialize dictionaries to store frequent itemsets
    FrequentItemSets = {}
    AprioriResults = {}
    
    ## Calculate total no. of occurrences of items among all transactions
    for i,items in enumerate(Transactions):
        for j in range(len(items)):
            if items[j] in FrequentItemSets:
                FrequentItemSets[items[j]] += 1
            else:
                FrequentItemSets[items[j]] = 1

    association = []
    nonFrequent = []
    
    ## Assign items to the lists depending on whether they meet the minimum support
    for i in FrequentItemSets:
        if FrequentItemSets[i]/len(Transactions) >= support:
            association.append(i)
        else:
            nonFrequent.append(i)
    
    ## Delete all items which do not meet the minimum support requirement
    for i in nonFrequent:
        del FrequentItemSets[i]

    n_combinations = list(combinations(association,2))
    return n_combinations,Transactions,FrequentItemSets,AprioriResults

In [4]:
def support_level(n_combinations,support,Transactions,AprioriResults):
    '''
    Function to determine all itemset combinations which meet the minimum support
    
    Parameters:-
    n_combinations - List of all itemset combinations meeting the support requirement
    support        - User determined support level for generating itemsets
    Transaction    - List containing all transactions
    AprioriResults - Dictionary containing all itemsets meeting support level
    
    '''
    
    ## Loop to generate itemsets as long as they meet support requirements
    while len(n_combinations) > 0:
        itemSets = []
        for i in n_combinations:
            count = 0
            for j in range(len(Transactions)):
                if set(i).issubset(Transactions[j]) == True: ## for itemsets present in transactions, increase count
                     count += 1
            if count/len(Transactions) >= support:           ## if itemset meets minimum support add it to dictionary
                itemSets.append(i)
                AprioriResults[tuple(sorted(i))] = count
         
        addition = []
        
        ## Loop to generate (n+1)th itemset
        for i in range(len(itemSets)):
            j = len(itemSets) - 1
            while j > i:                                     ## Generating (n+1)th itemset, eg. (A,B),(A,C) => (A,B,C)
                if len(list(set(itemSets[i]) - set(itemSets[j]))) == 1 and set(itemSets[i]).intersection(set(itemSets[j])) != set():
                    addition.append(tuple(sorted(set(itemSets[i]).intersection(set(itemSets[j])).union(set(itemSets[i]).symmetric_difference(set(itemSets[j]))))))
                j -= 1
        ## List of new itemsets for which support levels need to be checked        
        n_combinations = list(frozenset(sorted(sub)) for sub in set(sorted(addition)))
        
    return AprioriResults

In [5]:
def confidence_level(AprioriResults,FrequentItemSets,Transactions,confidence,support):
    '''
    Function to generate the support and confidence levels of itemsets which meet user defined requirements
    
    Parameters:-
    ApriroiResults   -  Dictionary containing frequent itemsets
    FrequentItemSets -  Dictionary containing frequent items
    Transactions     -  List containing all transactions
    confidence       -  User determined confidence level for generating itemsets
    support          -  User determined support level for generating itemsets
    
    '''
    ## iterate over itemsets which are greater than minimum support level
    for i,combination in enumerate(AprioriResults):  
    ## iterate to get association of 1 item to the rest of the set
        for j in combination:
            ## for 2-itemsets
            if len(set(combination) - set((j,))) == 1:
                BaseGroup = list(set(combination) - set((j,)),)[0]
                ## calculate support and confidence level
                confidenceCalculation  = AprioriResults.get(combination)/FrequentItemSets.get(BaseGroup)
                supportLevel = AprioriResults.get(combination)/len(Transactions)
                ## print valid associations
                if confidenceCalculation >= confidence and supportLevel >= support:
                    print(set((BaseGroup,)),"=>","{",j,"}","(",supportLevel*100,"%,",confidenceCalculation*100,"%",")")
            ## for n-itemsets, where n > 2
            else:
                BaseGroup = tuple(set(combination) - set((j,)))
                ## calculate support and confidence level
                confidenceCalculation  = AprioriResults.get(combination)/AprioriResults.get(tuple(sorted(BaseGroup)))
                supportLevel = AprioriResults.get(combination)/len(Transactions)
                ## print valid associations
                if confidenceCalculation >= confidence and supportLevel >= support:
                    print(set(BaseGroup),"=>","{",j,"}","(",supportLevel*100,"%,",confidenceCalculation*100,"%",")")
                
            FrequentItemSets[combination] = AprioriResults.get(combination)

In [6]:
def Apriori(support,confidence,path):
    '''
    Function to execute Apriori Algorithim
    
    Parameters:-
    support     -  User determined support level for generating itemsets
    confidence  -  User determined confidence level for generating itemsets
    path        -  Location of the input file
    
    '''
    
    transactionData = readData(path)
    n_combinations,Transactions,FrequentItemSets,AprioriResults = frequency(transactionData,support)
    AprioriResults = support_level(n_combinations,support,Transactions,AprioriResults)
    return confidence_level(AprioriResults,FrequentItemSets,Transactions,confidence,support)

## Apriori Algorithm Execution on Datasets

In [13]:
## Executing Apriori Algorithm on Dataset 1
start_time = time.time()
Apriori(.15,.55,"C:/Users/Shank/Desktop/NJIT/CourseMaterial/Spring2022/DataMining/MidTermProject/WorkingDirectory/TransactionDatabase/Database1.csv")
print("Time to execute Apriori Algorithim --- %s seconds" % (time.time() - start_time))

{'Milk'} => { Eggs } ( 15.0 %, 75.0 % )
{'Eggs'} => { Milk } ( 15.0 %, 100.0 % )
{'ShavingCream'} => { RazerBlades } ( 15.0 %, 75.0 % )
{'RazerBlades'} => { ShavingCream } ( 15.0 %, 100.0 % )
{'ShavingCream'} => { Moisturizer } ( 15.0 %, 75.0 % )
{'Moisturizer'} => { ShavingCream } ( 15.0 %, 75.0 % )
Time to execute Apriori Algorithim --- 0.008975982666015625 seconds


In [19]:
## Executing Apriori Algorithm on Dataset 2
start_time = time.time()
Apriori(.35,.85,"C:/Users/Shank/Desktop/NJIT/CourseMaterial/Spring2022/DataMining/MidTermProject/WorkingDirectory/TransactionDatabase/Database2.csv")
print("Time to execute Apriori Algorithim --- %s seconds" % (time.time() - start_time))

{'Banana'} => { Tomato } ( 35.0 %, 87.5 % )
{'WetWipes'} => { Moisturizer } ( 35.0 %, 87.5 % )
{'Onion'} => { RazerBlades } ( 35.0 %, 87.5 % )
{'WetWipes'} => { Notebook } ( 35.0 %, 87.5 % )
{'Sunscreen', 'WaterBottle'} => { MangoJuice } ( 35.0 %, 87.5 % )
{'Sunscreen', 'MangoJuice'} => { WaterBottle } ( 35.0 %, 100.0 % )
{'Notebook', 'Tomato'} => { RazerBlades } ( 35.0 %, 87.5 % )
{'WaterBottle', 'Eggs'} => { Moisturizer } ( 35.0 %, 87.5 % )
{'Moisturizer', 'Eggs'} => { WaterBottle } ( 35.0 %, 87.5 % )
{'Moisturizer', 'Notebook'} => { Eggs } ( 35.0 %, 87.5 % )
{'Notebook', 'Eggs'} => { Moisturizer } ( 35.0 %, 87.5 % )
{'Moisturizer', 'Eggs'} => { Notebook } ( 35.0 %, 87.5 % )
{'WaterBottle', 'Eggs'} => { Notebook } ( 35.0 %, 87.5 % )
{'Notebook', 'Eggs'} => { WaterBottle } ( 35.0 %, 87.5 % )
{'Bed', 'RazerBlades'} => { Notebook } ( 35.0 %, 87.5 % )
{'Moisturizer', 'MangoJuice'} => { WaterBottle } ( 35.0 %, 87.5 % )
{'MangoJuice', 'RazerBlades'} => { Tomato } ( 35.0 %, 87.5 % )
{'Shoes

In [9]:
## Executing Apriori Algorithm on Dataset 3
start_time = time.time()
Apriori(.35,.55,"C:/Users/Shank/Desktop/NJIT/CourseMaterial/Spring2022/DataMining/MidTermProject/WorkingDirectory/TransactionDatabase/Database3.csv")
print("Time to execute Apriori Algorithim --- %s seconds" % (time.time() - start_time))

{'Milk'} => { MangoJuice } ( 55.00000000000001 %, 68.75 % )
{'MangoJuice'} => { Milk } ( 55.00000000000001 %, 91.66666666666666 % )
{'Lamp'} => { Milk } ( 40.0 %, 88.88888888888889 % )
{'Bed'} => { Milk } ( 35.0 %, 70.0 % )
{'Fan'} => { Milk } ( 35.0 %, 87.5 % )
{'Milk'} => { Banana } ( 50.0 %, 62.5 % )
{'Banana'} => { Milk } ( 50.0 %, 76.92307692307693 % )
{'Moisturizer'} => { Milk } ( 35.0 %, 77.77777777777779 % )
{'Sunscreen'} => { Milk } ( 35.0 %, 77.77777777777779 % )
{'Eggs'} => { Milk } ( 35.0 %, 77.77777777777779 % )
{'ChickenBreast'} => { Milk } ( 40.0 %, 88.88888888888889 % )
{'Milk'} => { Chocolates } ( 50.0 %, 62.5 % )
{'Chocolates'} => { Milk } ( 50.0 %, 100.0 % )
{'Notebook'} => { Milk } ( 45.0 %, 100.0 % )
{'Milk'} => { Notebook } ( 45.0 %, 56.25 % )
{'Pen'} => { Milk } ( 40.0 %, 80.0 % )
{'Shoes'} => { Milk } ( 45.0 %, 81.81818181818183 % )
{'Milk'} => { Shoes } ( 45.0 %, 56.25 % )
{'Mouse'} => { Milk } ( 50.0 %, 76.92307692307693 % )
{'Milk'} => { Mouse } ( 50.0 %, 62.

In [14]:
## Executing Apriori Algorithm on Dataset 4
start_time = time.time()
Apriori(.40,.55,"C:/Users/Shank/Desktop/NJIT/CourseMaterial/Spring2022/DataMining/MidTermProject/WorkingDirectory/TransactionDatabase/Database4.csv")
print("Time to execute Apriori Algorithim --- %s seconds" % (time.time() - start_time))

{'WaterBottle'} => { Eggs } ( 45.0 %, 64.28571428571429 % )
{'Eggs'} => { WaterBottle } ( 45.0 %, 75.0 % )
{'Eggs'} => { Bed } ( 40.0 %, 66.66666666666666 % )
{'Bed'} => { Eggs } ( 40.0 %, 61.53846153846154 % )
{'Notebook'} => { Eggs } ( 40.0 %, 72.72727272727273 % )
{'Eggs'} => { Notebook } ( 40.0 %, 66.66666666666666 % )
{'Shoes'} => { Eggs } ( 45.0 %, 60.0 % )
{'Eggs'} => { Shoes } ( 45.0 %, 75.0 % )
{'Eggs'} => { Chocolates } ( 40.0 %, 66.66666666666666 % )
{'Chocolates'} => { Eggs } ( 40.0 %, 61.53846153846154 % )
{'Shorts'} => { Eggs } ( 45.0 %, 69.23076923076923 % )
{'Eggs'} => { Shorts } ( 45.0 %, 75.0 % )
{'WaterBottle'} => { Onion } ( 40.0 %, 57.14285714285714 % )
{'Onion'} => { WaterBottle } ( 40.0 %, 66.66666666666666 % )
{'Onion'} => { Chips } ( 50.0 %, 83.33333333333334 % )
{'Chips'} => { Onion } ( 50.0 %, 83.33333333333334 % )
{'Onion'} => { Banana } ( 40.0 %, 66.66666666666666 % )
{'Banana'} => { Onion } ( 40.0 %, 88.88888888888889 % )
{'Shoes'} => { Onion } ( 50.0 %, 6

In [15]:
## Executing Apriori Algorithm on Dataset 5
start_time = time.time()
Apriori(.45,.55,"C:/Users/Shank/Desktop/NJIT/CourseMaterial/Spring2022/DataMining/MidTermProject/WorkingDirectory/TransactionDatabase/Database5.csv")
print("Time to execute Apriori Algorithim --- %s seconds" % (time.time() - start_time))

{'Eggs'} => { Bed } ( 50.0 %, 76.92307692307693 % )
{'Bed'} => { Eggs } ( 50.0 %, 66.66666666666666 % )
{'Eggs'} => { Banana } ( 45.0 %, 69.23076923076923 % )
{'Banana'} => { Eggs } ( 45.0 %, 75.0 % )
{'ShavingCream'} => { Eggs } ( 45.0 %, 90.0 % )
{'Eggs'} => { ShavingCream } ( 45.0 %, 69.23076923076923 % )
{'ChickenBreast'} => { Bed } ( 45.0 %, 81.81818181818183 % )
{'Bed'} => { ChickenBreast } ( 45.0 %, 60.0 % )
{'Onion'} => { MangoJuice } ( 45.0 %, 75.0 % )
{'MangoJuice'} => { Onion } ( 45.0 %, 69.23076923076923 % )
{'Onion'} => { Bed } ( 50.0 %, 83.33333333333334 % )
{'Bed'} => { Onion } ( 50.0 %, 66.66666666666666 % )
{'MangoJuice'} => { Bed } ( 50.0 %, 76.92307692307693 % )
{'Bed'} => { MangoJuice } ( 50.0 %, 66.66666666666666 % )
{'MangoJuice'} => { Banana } ( 45.0 %, 69.23076923076923 % )
{'Banana'} => { MangoJuice } ( 45.0 %, 75.0 % )
{'Shirt'} => { MangoJuice } ( 45.0 %, 64.28571428571429 % )
{'MangoJuice'} => { Shirt } ( 45.0 %, 69.23076923076923 % )
{'HardDisk'} => { Bed }