### Importing data set and the libraries

In [1]:
import pandas as pd
import numpy as np
from pandas.compat import StringIO

In [2]:
temp = 'RawData/retail_25k.dat'
rawdata = open(temp,'r')
rawdata= rawdata.read()

In [3]:
dataSet=[]
for i in rawdata.split('\n'):
    a=i.split()
    dataSet.append(a)

In [4]:
dataSet[1:5]

[['30', '31', '32'],
 ['33', '34', '35'],
 ['36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46'],
 ['38', '39', '47', '48']]

### Creating functions for generating item sets, association and the final output

In [5]:
# Generating the first candidate set for the dataset
import itertools
def generateC1(dataSet):
    productDict = {}
    returneSet = []
    for data in dataSet:
        for product in data:
            if product not in productDict:
                productDict[product] = 1
            else:
                 productDict[product] = productDict[product] + 1
    for key in productDict:
        tempArray = []
        tempArray.append(key)
        returneSet.append(tempArray)
        returneSet.append(productDict[key])
        tempArray = []
    return returneSet

#   Creating Frequent item sets by taking candidate sets as input
def generateFrequentItemSet(CandidateList, noOfTransactions, minimumSupport, dataSet, fatherFrequentArray):
    frequentItemsArray = []
    for i in range(len(CandidateList)):
        if i%2 != 0:
            support = (CandidateList[i] * 1.0 / noOfTransactions) * 100
            if support >= minimumSupport:
                frequentItemsArray.append(CandidateList[i-1])
                frequentItemsArray.append(CandidateList[i])
            else: 
                eleminatedItemsArray.append(CandidateList[i-1])

    for k in frequentItemsArray:
        fatherFrequentArray.append(k)

    if len(frequentItemsArray) == 2 or len(frequentItemsArray) == 0:
        returnArray = fatherFrequentArray
        return returnArray

    else:
        generateCandidateSets(dataSet, eleminatedItemsArray, frequentItemsArray, noOfTransactions, minimumSupport)

#   Creating Candidate sets by taking frequent sets as the input
def generateCandidateSets(dataSet, eleminatedItemsArray, frequentItemsArray, noOfTransactions, minimumSupport):
    onlyElements = []
    arrayAfterCombinations = []
    candidateSetArray = []
    for i in range(len(frequentItemsArray)):
        if i%2 == 0:
            onlyElements.append(frequentItemsArray[i])
    for item in onlyElements:
        tempCombinationArray = []
        k = onlyElements.index(item)
        for i in range(k + 1, len(onlyElements)):
            for j in item:
                if j not in tempCombinationArray:
                    tempCombinationArray.append(j)
            for m in onlyElements[i]:
                if m not in tempCombinationArray:
                    tempCombinationArray.append(m)
            arrayAfterCombinations.append(tempCombinationArray)
            tempCombinationArray = []
    sortedCombinationArray = []
    uniqueCombinationArray = []
    for i in arrayAfterCombinations:
        sortedCombinationArray.append(sorted(i))
    for i in sortedCombinationArray:
        if i not in uniqueCombinationArray:
            uniqueCombinationArray.append(i)
    arrayAfterCombinations = uniqueCombinationArray
    for item in arrayAfterCombinations:
        count = 0
        for transaction in dataSet:
            if set(item).issubset(set(transaction)):
                count = count + 1
        if count != 0:
            candidateSetArray.append(item)
            candidateSetArray.append(count)
    generateFrequentItemSet(candidateSetArray, noOfTransactions, minimumSupport, dataSet, fatherFrequentArray)

#   Generating Association Rules by taking all the frequent sets as an input
def generateAssociationRule(freqSet):
    associationRule = []
    for item in freqSet:
        if isinstance(item, list):
            if len(item) != 0:
                length = len(item) - 1
                while length > 0:
                    combinations = list(itertools.combinations(item, length))
                    temp = []
                    LHS = [] #Antecedent
                    for RHS in combinations:
                        LHS = set(item) - set(RHS)
                        temp.append(list(LHS))
                        temp.append(list(RHS))
                        #print(temp)
                        associationRule.append(temp)
                        temp = []
                    length = length - 1
    return associationRule

#   This function creates the final output of the algorithm by taking Association Rules as the input and generating the supports for each itemsets
def aprioriOutput(rules, dataSet, minimumSupport, minimumConfidence):
    returnAprioriOutput = []
    for rule in rules:
        for i in rule:
            if len(i)==3: # for items sets of size 3
                item_size = "Item set size : %d" %len(i)
                returnAprioriOutput.append(item_size)
                supportOfX = 0
                supportOfXinPercentage = 0
                supportOfXandY = 0
                supportOfXandYinPercentage = 0
                for transaction in dataSet:
                    if set(rule[0]).issubset(set(transaction)):
                        supportOfX = supportOfX + 1
                    if set(rule[0] + rule[1]).issubset(set(transaction)):
                        supportOfXandY = supportOfXandY + 1
                        
                supportOfXandYinPercentage = (supportOfXandY * 1.0 / noOfTransactions) * 100
                supportOfXandYAppendString = "Support of X & Y: " + str(round(supportOfXandYinPercentage))
                returnAprioriOutput.append(supportOfXandYAppendString)
                returnAprioriOutput.append(rule)
    return returnAprioriOutput


In [6]:
noOfTransactions = len(dataSet)
firstCandidateSet = generateC1(dataSet)

In [7]:
firstCandidateSet[:10]

[['0'], 67, ['1'], 74, ['2'], 128, ['3'], 6, ['4'], 8]

### Generating the frequent item sets

In [None]:
fatherFrequentArray=[]
eleminatedItemsArray=[]
frequentItemSet = generateFrequentItemSet(firstCandidateSet, noOfTransactions, 0, dataSet, fatherFrequentArray)
print('Completed')

In [10]:
fatherFrequentArray[:35]

[[1],
 3,
 [2],
 3,
 [3],
 4,
 [4],
 3,
 [5],
 4,
 [6],
 2,
 [1, 2],
 3,
 [1, 3],
 3,
 [1, 4],
 2,
 [1, 5],
 3,
 [1, 6],
 1,
 [2, 3],
 3,
 [2, 4],
 2,
 [2, 5],
 3,
 [2, 6],
 1,
 [3, 4],
 3,
 [3, 5],
 4,
 [3, 6]]

### Finding the association rules

In [11]:
associationRules = generateAssociationRule(fatherFrequentArray)

In [12]:
associationRules[:10]

[[[2], [1]],
 [[1], [2]],
 [[3], [1]],
 [[1], [3]],
 [[4], [1]],
 [[1], [4]],
 [[5], [1]],
 [[1], [5]],
 [[6], [1]],
 [[1], [6]],
 [[3], [2]],
 [[2], [3]],
 [[4], [2]],
 [[2], [4]],
 [[5], [2]],
 [[2], [5]],
 [[6], [2]],
 [[2], [6]],
 [[4], [3]],
 [[3], [4]],
 [[5], [3]],
 [[3], [5]],
 [[6], [3]],
 [[3], [6]],
 [[5], [4]],
 [[4], [5]],
 [[6], [4]],
 [[4], [6]],
 [[6], [5]],
 [[5], [6]],
 [[3], [1, 2]],
 [[2], [1, 3]],
 [[1], [2, 3]],
 [[2, 3], [1]],
 [[1, 3], [2]],
 [[1, 2], [3]],
 [[4], [1, 2]],
 [[2], [1, 4]],
 [[1], [2, 4]],
 [[2, 4], [1]],
 [[1, 4], [2]],
 [[1, 2], [4]],
 [[5], [1, 2]],
 [[2], [1, 5]],
 [[1], [2, 5]],
 [[2, 5], [1]],
 [[1, 5], [2]],
 [[1, 2], [5]],
 [[6], [1, 2]],
 [[2], [1, 6]],
 [[1], [2, 6]],
 [[2, 6], [1]],
 [[1, 6], [2]],
 [[1, 2], [6]],
 [[4], [1, 2, 3]],
 [[3], [1, 2, 4]],
 [[2], [1, 3, 4]],
 [[1], [2, 3, 4]],
 [[3, 4], [1, 2]],
 [[2, 4], [1, 3]],
 [[2, 3], [1, 4]],
 [[1, 4], [2, 3]],
 [[1, 3], [2, 4]],
 [[1, 2], [3, 4]],
 [[2, 3, 4], [1]],
 [[1, 3, 4], [2]]

In [13]:
a=[]
for i in associationRules:
    if i not in a:
        a.append(i)

In [14]:
len(a)

602

### Using Apriori to calculate the support values for the frequent item sets

In [15]:
AprioriOutput = aprioriOutput(associationRules, newdata, 0, 0)

In [16]:
AprioriOutput[:10]

['Item set size : 3',
 'Support of X & Y: 50',
 [[4], [1, 2, 3]],
 'Item set size : 3',
 'Support of X & Y: 50',
 [[3], [1, 2, 4]],
 'Item set size : 3',
 'Support of X & Y: 50',
 [[2], [1, 3, 4]],
 'Item set size : 3',
 'Support of X & Y: 50',
 [[1], [2, 3, 4]],
 'Item set size : 3',
 'Support of X & Y: 50',
 [[2, 3, 4], [1]],
 'Item set size : 3',
 'Support of X & Y: 50',
 [[1, 3, 4], [2]],
 'Item set size : 3',
 'Support of X & Y: 50',
 [[1, 2, 4], [3]],
 'Item set size : 3',
 'Support of X & Y: 50',
 [[1, 2, 3], [4]],
 'Item set size : 3',
 'Support of X & Y: 75',
 [[5], [1, 2, 3]],
 'Item set size : 3',
 'Support of X & Y: 75',
 [[3], [1, 2, 5]],
 'Item set size : 3',
 'Support of X & Y: 75',
 [[2], [1, 3, 5]],
 'Item set size : 3',
 'Support of X & Y: 75',
 [[1], [2, 3, 5]],
 'Item set size : 3',
 'Support of X & Y: 75',
 [[2, 3, 5], [1]],
 'Item set size : 3',
 'Support of X & Y: 75',
 [[1, 3, 5], [2]],
 'Item set size : 3',
 'Support of X & Y: 75',
 [[1, 2, 5], [3]],
 'Item set

### Writing the results to the file

In [20]:
import csv
with open("output.csv",'w') as resultFile:
    wr = csv.writer(resultFile, dialect='excel')
    wr.writerow(AprioriOutput)