In [20]:
import numpy as np
import pandas as pd
from csv import reader
from collections import defaultdict
from itertools import combinations

In [21]:
def powerset(s):
    return chain.from_iterable(combinations(s, r) for r in range(1, len(s)))
def getAboveMinSup(itemSet, itemSetList, minSup, globalItemSetWithSup):
    freqItemSet = set()
    localItemSetWithSup = defaultdict(int)
    list=[]

    for item in itemSet:
        for itemSet in itemSetList:
            if item.issubset(itemSet):
                globalItemSetWithSup[item] += 1
                localItemSetWithSup[item] += 1

    for item, supCount in localItemSetWithSup.items():
        support = float(supCount / len(itemSetList))
        if(support >= minSup):
            freqItemSet.add(item)  
    return freqItemSet
def getFromFile(fname):
    itemSets = []
    itemSet = set()

    with open(fname, 'r') as file:
        csv_reader = reader(file)
        for line in csv_reader:
            line = list(filter(None, line))
            record = set(line)
            for item in record:
                itemSet.add(frozenset([item]))
            itemSets.append(record)
    return itemSet, itemSets


def getUnion(itemSet, length):
    return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])


def pruning(candidateSet, prevFreqSet, length):
    tempCandidateSet = candidateSet.copy()
    for item in candidateSet:
        subsets = combinations(item, length)
        for subset in subsets:
            # if the subset is not in previous K-frequent get, then remove the set
            if(frozenset(subset) not in prevFreqSet):
                tempCandidateSet.remove(item)
                break
    return tempCandidateSet


def associationRule(freqItemSet, itemSetWithSup, minConf):
    rules = []
    for k, itemSet in freqItemSet.items():
        for item in itemSet:
            subsets = powerset(item)
            for s in subsets:
                confidence = float(
                    itemSetWithSup[item] / itemSetWithSup[frozenset(s)])
                if(confidence > minConf):
                    rules.append([set(s), set(item.difference(s)), confidence])
    return rules


def getItemSetFromList(itemSetList):
    tempItemSet = set()

    for itemSet in itemSetList:
        for item in itemSet:
            tempItemSet.add(frozenset([item]))

    return tempItemSet

In [16]:
def apriori(itemSetList, minSup, minConf):
    C1ItemSet = getItemSetFromList(itemSetList)
    # Final result global frequent itemset
    globalFreqItemSet = dict()
    # Storing global itemset with support count
    globalItemSetWithSup = defaultdict(int)

    L1ItemSet = getAboveMinSup(
        C1ItemSet, itemSetList, minSup, globalItemSetWithSup)
    currentLSet = L1ItemSet
    k = 2

    # Calculating frequent item set
    while(currentLSet):
        # Storing frequent itemset
        globalFreqItemSet[k-1] = currentLSet
        # Self-joining Lk
        candidateSet = getUnion(currentLSet, k)
        # Perform subset testing and remove pruned supersets
        candidateSet = pruning(candidateSet, currentLSet, k-1)
        # Scanning itemSet for counting support
        currentLSet = getAboveMinSup(
            candidateSet, itemSetList, minSup, globalItemSetWithSup)
        k += 1

    rules = associationRule(globalFreqItemSet, globalItemSetWithSup, minConf)
    rules.sort(key=lambda x: x[2])

    return globalFreqItemSet,rules

In [22]:
list1 = [[1,3,5],[2,3,5],[1,2,3,5],[2,5],[1,3,5]]
freqItemSet, rules = apriori(list1, minSup=0.02, minConf=0.6)
print(freqItemSet,"\n------------------------------------------\n")
results=[]
for item in rules:
    value0 = str(item[0])
    value1 = str(item[1])
    value2 = str(item[2])[:7]
    rows = (value0,value1,value2)
    results.append(rows)
    
    Label = ['Title1','Title2','confidence']
    
    store_suggestion = pd.DataFrame.from_records(results,columns=Label)
    print(store_suggestion)
    

{1: {frozenset({3}), frozenset({2}), frozenset({1}), frozenset({5})}, 2: {frozenset({2, 3}), frozenset({1, 2}), frozenset({2, 5}), frozenset({1, 5}), frozenset({3, 5}), frozenset({1, 3})}, 3: {frozenset({1, 2, 3}), frozenset({1, 3, 5}), frozenset({2, 3, 5}), frozenset({1, 2, 5})}, 4: {frozenset({1, 2, 3, 5})}} 
------------------------------------------

  Title1 Title2 confidence
0    {2}    {3}    0.66666
  Title1  Title2 confidence
0    {2}     {3}    0.66666
1    {2}  {3, 5}    0.66666
   Title1  Title2 confidence
0     {2}     {3}    0.66666
1     {2}  {3, 5}    0.66666
2  {2, 5}     {3}    0.66666
   Title1  Title2 confidence
0     {2}     {3}    0.66666
1     {2}  {3, 5}    0.66666
2  {2, 5}     {3}    0.66666
3     {3}     {1}       0.75
   Title1  Title2 confidence
0     {2}     {3}    0.66666
1     {2}  {3, 5}    0.66666
2  {2, 5}     {3}    0.66666
3     {3}     {1}       0.75
4     {3}  {1, 5}       0.75
   Title1  Title2 confidence
0     {2}     {3}    0.66666
1     {2}  {

In [24]:
itemSetList = [['eggs', 'bacon', 'soup'],
                ['eggs', 'bacon', 'apple'],
                ['soup', 'bacon', 'banana']]
apriori(itemSetList, minSup=0.5, minConf=0.5)

({1: {frozenset({'soup'}), frozenset({'bacon'}), frozenset({'eggs'})},
  2: {frozenset({'bacon', 'soup'}), frozenset({'bacon', 'eggs'})}},
 [[{'bacon'}, {'soup'}, 0.6666666666666666],
  [{'bacon'}, {'eggs'}, 0.6666666666666666],
  [{'soup'}, {'bacon'}, 1.0],
  [{'eggs'}, {'bacon'}, 1.0]])

In [19]:
store_data=pd.read_csv('store_data.csv',header=None)
num_records=len(store_data)
records=[]
for i in range(0,num_records):
    records.append([str(store_data.values[i,j]) for j in range(0,20)])
freqItemSet, rules = apriori(records, minSup=0.0053, minConf=0.2)
print(rules)


[[{'burgers'}, {'green tea'}, 0.20030581039755352], [{'burgers'}, {'nan', 'green tea'}, 0.20030581039755352], [{'burgers', 'nan'}, {'green tea'}, 0.20030581039755352], [{'soup'}, {'chocolate'}, 0.20052770448548812], [{'soup'}, {'chocolate', 'nan'}, 0.20052770448548812], [{'soup', 'nan'}, {'chocolate'}, 0.20052770448548812], [{'chocolate', 'spaghetti'}, {'frozen vegetables'}, 0.20068027210884354], [{'chocolate', 'spaghetti'}, {'frozen vegetables', 'nan'}, 0.20068027210884354], [{'chocolate', 'nan', 'spaghetti'}, {'frozen vegetables'}, 0.20068027210884354], [{'green tea'}, {'spaghetti'}, 0.20080726538849647], [{'green tea'}, {'nan', 'spaghetti'}, 0.20080726538849647], [{'cottage cheese'}, {'green tea'}, 0.200836820083682], [{'mineral water', 'spaghetti'}, {'frozen vegetables'}, 0.20089285714285715], [{'mineral water', 'spaghetti'}, {'frozen vegetables', 'nan'}, 0.20089285714285715], [{'mineral water', 'nan', 'spaghetti'}, {'frozen vegetables'}, 0.20089285714285715], [{'light mayo'}, {'eg