## Importing Libraries

In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder

## Generate Dataframe

In [2]:
with open("Data/store_data.csv",'r') as f:
    dataset = [ [x.strip() for x in line.split(',')] for line in [ x.strip() for x in f.readlines() ]]

te = TransactionEncoder()
te_try = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_try, columns=te.columns_)
df.head()

Unnamed: 0,almonds,antioxydant juice,asparagus,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,body spray,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,True,True,False,True,False,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,True,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


In [3]:
MIN_SUPP = 0.05

## Apriori algorithms for frequent pattern mining

In [4]:
apriori(df,min_support=MIN_SUPP)

Unnamed: 0,support,itemsets
0,0.087188,(14)
1,0.081056,(16)
2,0.059992,(22)
3,0.163845,(24)
4,0.080389,(29)
5,0.05106,(30)
6,0.179709,(36)
7,0.079323,(39)
8,0.170911,(42)
9,0.063325,(47)


## Model Training with Column Result return

In [5]:
apriori(df,min_support=MIN_SUPP, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.087188,(burgers)
1,0.081056,(cake)
2,0.059992,(chicken)
3,0.163845,(chocolate)
4,0.080389,(cookies)
5,0.05106,(cooking oil)
6,0.179709,(eggs)
7,0.079323,(escalope)
8,0.170911,(french fries)
9,0.063325,(frozen smoothie)


## Calculate the length of Itemset

In [6]:
frequent_itemsets = apriori(df, min_support=MIN_SUPP, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.087188,(burgers),1
1,0.081056,(cake),1
2,0.059992,(chicken),1
3,0.163845,(chocolate),1
4,0.080389,(cookies),1
5,0.05106,(cooking oil),1
6,0.179709,(eggs),1
7,0.079323,(escalope),1
8,0.170911,(french fries),1
9,0.063325,(frozen smoothie),1


## Length is 2 and Support is > MIN_SUPP

In [7]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) & (frequent_itemsets['support'] >= MIN_SUPP) ]

Unnamed: 0,support,itemsets,length
25,0.05266,"(chocolate, mineral water)",2
26,0.050927,"(eggs, mineral water)",2
27,0.059725,"(spaghetti, mineral water)",2


## Conclusion
From analysis of the transaction data we can conclude that mineral water is purchased with chocolate,eggs,spaghetti.